8.15.5. 05 - The Cube Swarm
A thousand cubes from one draw call. Tutorial 04 drew a single cube; this rung
fans that same 36-index cube over N_INSTANCES copies with
glDrawElementsInstanced, each copy reading its own world offset, colour and
animation phase from a second, per-instance vertex-attribute stream. It is a
faithful port of the dasVulkan instancing rung – same Lissajous swarm, same
per-instance breathing.
The headline is the vertex attribute divisor. Two attribute streams share one
VAO: the per-vertex geometry (binding 0, locations 0-1) steps once per vertex as
usual, while the per-instance data (binding 1, locations 2-4) steps once per
instance because glVertexAttribDivisor(loc, 1) is set on it. The shader sees
all five as plain @in @location inputs – there is no instancing keyword in the
shader at all; instancing lives entirely on the host side, in that divisor.
// per-vertex inputs (binding 0)
var @in @location = 0 a_pos : float3
var @in @location = 1 a_normal : float3
// per-instance inputs (binding 1) -- explicit locations so they line up with the
// host-side bind_vertex_buffer(..., 2u) offset; the divisor makes them step per instance
var @in @location = 2 a_offset : float3
var @in @location = 3 a_color : float3
var @in @location = 4 a_phase : float
var @uniform u_view : float4x4
var @uniform u_proj : float4x4
var @uniform u_cam_pos : float3
var @uniform u_time : float
var @inout w_pos : float3
var @inout w_normal : float3
var @inout w_color : float3
var @out f_FragColor : float4
[vertex_program]
def vs_main {
let t = u_time
// per-instance breathing: each cube scales between ~0.5 and ~0.9 at its own phase offset
let breathe = 0.7 + 0.2 * sin(t * 1.5 + a_phase * 6.2832)
let local = a_pos * breathe
// gentle per-instance spin about Y, desynchronised by the phase
let ang = t * 0.25 + a_phase * 6.2832
let c = cos(ang)
let s = sin(ang)
let rotated = float3(c * local.x - s * local.z, local.y, s * local.x + c * local.z)
let world = rotated + a_offset
gl_Position = u_proj * u_view * float4(world, 1.0)
w_pos = world
// the normal rotates by the same Y rotation; uniform scale only, so no inverse-transpose
w_normal = float3(c * a_normal.x - s * a_normal.z, a_normal.y, s * a_normal.x + c * a_normal.z)
w_color = a_color
}
[fragment_program]
def fs_main {
let n = normalize(w_normal)
let v = normalize(u_cam_pos - w_pos)
let l = normalize(float3(0.5, 1.0, 0.3)) // warm key light from above-front
let key = max(dot(n, l), 0.0) * 0.7 + 0.3 // ambient floor 0.3
let rim = pow(1.0 - max(dot(n, v), 0.0), 2.5) // silhouette glow
let lit = w_color * key + float3(0.25, 0.35, 0.55) * rim * 0.4
f_FragColor = float4(lit, 1.0)
}
var program : uint
var vao : uint
var vbo : uint
var inst_vbo : uint
var ebo : uint
var window : GLFWwindow?
var time : float = 0.0
// 1000 small cubes -- comfortably visible at 640x480 with a ~6-unit swarm radius
// and the camera orbiting at distance 14.
let N_INSTANCES = 1000
[vertex_buffer]
struct Vertex {
xyz : float3
normal : float3
}
// per-instance row: world offset + RGB tint + animation phase. All-float, so the
// [vertex_buffer] macro emits three GL_FLOAT attributes (3 + 3 + 1 components).
[vertex_buffer]
struct Instance {
offset : float3
color : float3
phase : float
}
// faceted unit cube (half-extent 0.5), per-face normals, 24 verts / 36 indices --
// same winding as tutorial 04, which renders correctly under GL_CULL_FACE GL_BACK.
let vertices = [Vertex(
xyz=float3(0.5, 0.5, 0.5), normal=float3(0, 0, 1)), Vertex(
xyz=float3(-0.5, 0.5, 0.5), normal=float3(0, 0, 1)), Vertex(
xyz=float3(-0.5, -0.5, 0.5), normal=float3(0, 0, 1)), Vertex(
xyz=float3(0.5, -0.5, 0.5), normal=float3(0, 0, 1)), Vertex(
xyz=float3(0.5, 0.5, 0.5), normal=float3(1, 0, 0)), Vertex(
xyz=float3(0.5, -0.5, 0.5), normal=float3(1, 0, 0)), Vertex(
xyz=float3(0.5, -0.5, -0.5), normal=float3(1, 0, 0)), Vertex(
xyz=float3(0.5, 0.5, -0.5), normal=float3(1, 0, 0)), Vertex(
xyz=float3(0.5, 0.5, 0.5), normal=float3(0, 1, 0)), Vertex(
xyz=float3(0.5, 0.5, -0.5), normal=float3(0, 1, 0)), Vertex(
xyz=float3(-0.5, 0.5, -0.5), normal=float3(0, 1, 0)), Vertex(
xyz=float3(-0.5, 0.5, 0.5), normal=float3(0, 1, 0)), Vertex(
xyz=float3(-0.5, 0.5, 0.5), normal=float3(-1, 0, 0)), Vertex(
xyz=float3(-0.5, 0.5, -0.5), normal=float3(-1, 0, 0)), Vertex(
xyz=float3(-0.5, -0.5, -0.5), normal=float3(-1, 0, 0)), Vertex(
xyz=float3(-0.5, -0.5, 0.5), normal=float3(-1, 0, 0)), Vertex(
xyz=float3(-0.5, -0.5, -0.5), normal=float3(0, -1, 0)), Vertex(
xyz=float3(0.5, -0.5, -0.5), normal=float3(0, -1, 0)), Vertex(
xyz=float3(0.5, -0.5, 0.5), normal=float3(0, -1, 0)), Vertex(
xyz=float3(-0.5, -0.5, 0.5), normal=float3(0, -1, 0)), Vertex(
xyz=float3(0.5, -0.5, -0.5), normal=float3(0, 0, -1)), Vertex(
xyz=float3(-0.5, -0.5, -0.5), normal=float3(0, 0, -1)), Vertex(
xyz=float3(-0.5, 0.5, -0.5), normal=float3(0, 0, -1)), Vertex(
xyz=float3(0.5, 0.5, -0.5), normal=float3(0, 0, -1)
)];
let indices = fixed_array<int>(
0, 1, 2, 2, 3, 0,
4, 5, 6, 6, 7, 4,
8, 9, 10, 10, 11, 8,
12, 13, 14, 14, 15, 12,
16, 17, 18, 18, 19, 16,
20, 21, 22, 22, 23, 20)
// Build N_INSTANCES rows of (offset, colour, phase). Offsets sample a Lissajous-style
// 3D curve (7 azimuthal lobes against 3 elevation lobes against radial breathing) so
// the swarm fills a balanced volume with no obvious axis alignment. Colours cycle the
// hue circle three times across the swarm; phase sweeps [0,1) so the per-cube breathing
// in the vertex shader is desynchronised.
def gen_instances(n : int) : array<Instance> {
var result : array<Instance>
result |> resize(n)
for (i in range(n)) {
let fi = float(i) / float(n)
let theta = fi * 2.0 * PI * 7.0
let phi = fi * 2.0 * PI * 3.0
let r = 5.0 + 1.5 * sin(fi * 2.0 * PI * 5.0)
let x = r * cos(theta) * cos(phi * 0.5)
let y = r * sin(phi)
let z = r * sin(theta) * cos(phi * 0.5)
let hue = fi * 6.0 * PI // three full hue cycles
let cr = 0.5 + 0.5 * cos(hue)
let cg = 0.5 + 0.5 * cos(hue + 2.0944) // 2 pi / 3
let cb = 0.5 + 0.5 * cos(hue + 4.1888) // 4 pi / 3
result[i] = Instance(offset = float3(x, y, z), color = float3(cr, cg, cb), phase = fi)
}
return <- result
}
def create_gl_objects {
program = create_shader_program(@@vs_main, @@fs_main)
glGenVertexArrays(1, safe_addr(vao))
glBindVertexArray(vao)
// per-vertex geometry (binding 0) -> locations 0,1, divisor 0 (default)
glGenBuffers(1, safe_addr(vbo))
glBindBuffer(GL_ARRAY_BUFFER, vbo)
glBufferData(GL_ARRAY_BUFFER, vertices, GL_STATIC_DRAW)
bind_vertex_buffer(null, type<Vertex>)
// per-instance attributes (binding 1) -> locations 2,3,4, divisor 1. Each
// glVertexAttribPointer records the buffer bound *now* (inst_vbo) into the VAO,
// so the two streams coexist; the divisor advances them once per instance.
var instances <- gen_instances(N_INSTANCES)
glGenBuffers(1, safe_addr(inst_vbo))
glBindBuffer(GL_ARRAY_BUFFER, inst_vbo)
glBufferData(GL_ARRAY_BUFFER, instances, GL_STATIC_DRAW)
bind_vertex_buffer(null, type<Instance>, 2u)
glVertexAttribDivisor(2u, 1u)
glVertexAttribDivisor(3u, 1u)
glVertexAttribDivisor(4u, 1u)
delete instances
glGenBuffers(1, safe_addr(ebo))
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo)
glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices, GL_STATIC_DRAW)
}
[export]
def init {
if (glfwInit() == 0) {
panic("can't init glfw")
}
glfwInitOpenGL(3, 3)
window = glfwCreateWindow(640, 480, "OpenGL - 05 cube swarm", null, null)
if (window == null) {
panic("can't create window")
}
glfwMakeContextCurrent(window)
create_gl_objects()
}
[export]
def update : bool {
time += 1.0 / 60.0
let t = time
var display_w, display_h : int
glfwGetFramebufferSize(window, display_w, display_h)
let h = max(display_h, 1)
let aspect = float(display_w) / float(h)
// camera orbits the swarm with a gentle vertical sway to show depth
let camera_angle = t * 0.3
let camera_r = 14.0
let cam_pos = float3(camera_r * cos(camera_angle), sin(camera_angle * 2.0) * 2.0, camera_r * sin(camera_angle))
u_view = look_at_rh(cam_pos, float3(0, 0, 0), float3(0, 1, 0))
u_proj = perspective_rh_opengl(60.0 * PI / 180.0, aspect, 0.1, 60.0)
u_cam_pos = cam_pos
u_time = t
glViewport(0, 0, display_w, display_h)
glClearColor(0.02, 0.0, 0.04, 1.0)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
glEnable(GL_DEPTH_TEST)
glDepthFunc(GL_LEQUAL)
glEnable(GL_CULL_FACE)
glCullFace(GL_BACK)
glUseProgram(program)
vs_main_bind_uniform(program)
fs_main_bind_uniform(program)
glBindVertexArray(vao)
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo)
// the instancing call: 36 indices fanned over N_INSTANCES instances
glDrawElementsInstanced(GL_TRIANGLES, 36, GL_UNSIGNED_INT, null, N_INSTANCES)
glfwPollEvents()
glfwSwapBuffers(window)
return glfwWindowShouldClose(window) == 0
}
[export]
def shutdown {
glfwDestroyWindow(window)
glfwTerminate()
}
// Desktop driver. On the web this is never called -- the run path drives the
// three lifecycle functions directly and persists the Context across frames.
[export]
def main {
init()
while (update()) {
}
shutdown()
}
8.15.5.1. Two attribute streams, one VAO
The cube geometry (24 vertices, pos + normal) goes into vbo and is wired by
bind_vertex_buffer(null, type<Vertex>) to locations 0-1. The instance buffer is
built once at init – gen_instances samples a 3D Lissajous curve into
N_INSTANCES rows of (offset, colour, phase) – uploaded into inst_vbo,
and wired by bind_vertex_buffer(null, type<Instance>, 2u). The 2u is the
macro’s vindex argument: it offsets the generated attribute locations to 2-4 so
the two structs do not collide. Each glVertexAttribPointer the macro emits
records whichever array buffer is bound at that moment into the VAO, so the two
streams coexist; the three glVertexAttribDivisor(loc, 1) calls then mark the
instance attributes to advance once per instance instead of once per vertex.
Explicit @location = N on the shader inputs lowers to
layout(location = N) in ... in GLSL, pinning each input to the exact location
the host binds – important here, where the locations are split across two structs.
8.15.5.2. Animated on the GPU
The instance buffer never changes after init. The swarm’s motion is entirely in the
vertex shader: each cube breathes (0.7 + 0.2 * sin) and spins about Y, both
keyed off u_time and the cube’s own a_phase so the thousand cubes are
desynchronised. Only the camera matrices and u_time move per frame – one small
uniform update drives a thousand independently-animating cubes.
8.15.5.3. Run it
Locally, in a window:
daslang tutorials/opengl/05_instancing/05_instancing.das
In the browser, it runs live in the daslang playground – the same .das, lowered
to WebGL2, a thousand cubes swarming from one instanced draw on your GPU.