Operations 16 min read

How to Trace Go Goroutine State Changes with eBPF Uprobes and Ring Buffers

This article explains how to monitor Go goroutine state transitions without modifying source code by attaching eBPF uprobes to the runtime.casgstatus function, defining a custom data struct, using a ring buffer to deliver events, and processing them in a Go user‑space program.

BirdNest Tech Talk
BirdNest Tech Talk
BirdNest Tech Talk
How to Trace Go Goroutine State Changes with eBPF Uprobes and Ring Buffers

Goal

Track the state transitions of Go goroutines at runtime without recompiling or modifying the original binary.

Kernel‑space probe

A custom header goroutine.h defines the runtime enum and a struct for passing event data:

#ifndef EBPF_EXAMPLE_GOROUTINE_H
#define EBPF_EXAMPLE_GOROUTINE_H

enum goroutine_state {
  IDLE,
  RUNNABLE,
  RUNNING,
  SYSCALL,
  WAITING,
  MORIBUND_UNUSED,
  DEAD,
  ENQUEUE_UNUSED,
  COPYSTACK,
  PREEMPTED,
};

struct goroutine_execute_data {
  enum goroutine_state old_state;
  enum goroutine_state new_state;
  u64 goid;
  u32 pid;
  u32 tgid;
};

#endif

The eBPF program includes the kernel headers, the custom header, declares a volatile constant pid_target for optional PID filtering, and creates a ring‑buffer map rb (256 KB capacity). The probe is attached to runtime.casgstatus, the sole runtime function that changes a goroutine’s state.

#include <vmlinux.h>
#include "goroutine.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

const volatile int pid_target = 0;
#define GOID_OFFSET 0x98

struct {
  __uint(type, BPF_MAP_TYPE_RINGBUF);
  __uint(max_entries, 256 * 1024);
} rb SEC(".maps");

SEC("uprobe//opt/ddns-go/ddns-go:runtime.casgstatus")
int uprobe_runtime_casgstatus(struct pt_regs *ctx) {
  void *gp = (void *)ctx->ax;          // goroutine pointer
  u32 oldval = (u32)ctx->bx;           // old state
  u32 newval = (u32)ctx->cx;           // new state

  u64 tgid_pid = bpf_get_current_pid_tgid();
  u32 pgid = tgid_pid >> 32;
  u32 pid = tgid_pid;

  if (pid_target && pid_target != pgid)
    return 0;

  u64 goid;
  if (bpf_probe_read_user(&goid, sizeof(goid), gp + GOID_OFFSET) != 0)
    return 0;

  struct goroutine_execute_data *data = bpf_ringbuf_reserve(&rb, sizeof(*data), 0);
  if (!data)
    return 0;

  data->pid = pid;
  data->tgid = pgid;
  data->goid = goid;
  data->old_state = oldval;
  data->new_state = newval;
  bpf_printk("pgid:%d, pid:%d, goid:%lu, old:%d, new:%d", pgid, pid, goid, oldval, newval);
  bpf_ringbuf_submit(data, 0);
  return 0;
}

char LICENSE[] SEC("license") = "GPL";

The probe extracts the three arguments from the CPU registers ( ax, bx, cx), reads the goroutine ID from the known offset, optionally filters by PGID, reserves a slot in the ring buffer, fills the goroutine_execute_data structure, prints a debug message, and submits the record to user space.

User‑space loader and consumer

The Go program performs the following steps:

Remove the default memory‑lock limit with rlimit.RemoveMemlock().

Load the compiled eBPF object via loadBpf().

Rewrite the constant pid_target based on the -pid flag.

Open the target executable ( /opt/ddns-go/ddns-go) with link.OpenExecutable and attach an uprobe to runtime.casgstatus.

Create a ring‑buffer reader for the map rb.

Spawn a goroutine that continuously reads events, decodes them with binary.Read (little‑endian), and prints a human‑readable line.

package main

import (
    "bytes"
    "encoding/binary"
    "flag"
    "fmt"
    "log"
    "os"
    "os/signal"
    "syscall"

    "github.com/cilium/ebpf/link"
    "github.com/cilium/ebpf/ringbuf"
    "github.com/cilium/ebpf/rlimit"
)

type GoroutineState uint32

const (
    IDLE GoroutineState = iota
    RUNNABLE
    RUNNING
    SYSCALL
    WAITING
    MORIBUND_UNUSED
    DEAD
    ENQUEUE_UNUSED
    COPYSTACK
    PREEMPTED
)

func (s GoroutineState) String() string {
    switch s {
    case IDLE:
        return "IDLE"
    case RUNNABLE:
        return "RUNNABLE"
    case RUNNING:
        return "RUNNING"
    case SYSCALL:
        return "SYSCALL"
    case WAITING:
        return "WAITING"
    case MORIBUND_UNUSED:
        return "MORIBUND_UNUSED"
    case DEAD:
        return "DEAD"
    case ENQUEUE_UNUSED:
        return "ENQUEUE_UNUSED"
    case COPYSTACK:
        return "COPYSTACK"
    case PREEMPTED:
        return "PREEMPTED"
    default:
        return fmt.Sprintf("UNKNOWN(%d)", s)
    }
}

type GoroutineExecuteData struct {
    OldState GoroutineState
    NewState GoroutineState
    Goid     uint64
    Pid      uint32
    Tgid     uint32
}

func main() {
    var pidTarget int
    flag.IntVar(&pidTarget, "pid", 0, "Target PID for filtering")
    flag.Parse()

    if err := rlimit.RemoveMemlock(); err != nil {
        log.Fatal(err)
    }

    spec, err := loadBpf()
    if err != nil {
        log.Fatalf("loading BPF program: %v", err)
    }

    objs := bpfObjects{}
    if err := spec.LoadAndAssign(&objs, nil); err != nil {
        log.Fatalf("loading objects: %v", err)
    }
    defer objs.Close()

    ex, err := link.OpenExecutable("/opt/ddns-go/ddns-go")
    if err != nil {
        log.Fatalf("opening executable: %v", err)
    }

    if err := spec.RewriteConstants(map[string]interface{}{ "pid_target": uint32(pidTarget) }); err != nil {
        log.Fatalf("rewriting constants: %v", err)
    }

    up, err := ex.Uprobe("runtime.casgstatus", objs.UprobeRuntimeCasgstatus, nil)
    if err != nil {
        log.Fatalf("creating uprobe: %v", err)
    }
    defer up.Close()

    rb, err := ringbuf.NewReader(objs.Rb)
    if err != nil {
        log.Fatalf("opening ringbuf reader: %v", err)
    }
    defer rb.Close()

    sig := make(chan os.Signal, 1)
    signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
    fmt.Println("Listening for events…")

    go func() {
        for {
            rec, err := rb.Read()
            if err != nil {
                log.Printf("reading from ringbuf: %v", err)
                return
            }
            var ev GoroutineExecuteData
            if err := binary.Read(bytes.NewReader(rec.RawSample), binary.LittleEndian, &ev); err != nil {
                log.Printf("parsing event: %v", err)
                return
            }
            fmt.Printf("TGID:%d PID:%d GoID:%d Old:%s New:%s
", ev.Tgid, ev.Pid, ev.Goid, ev.OldState.String(), ev.NewState.String())
        }
    }()

    <-sig
    fmt.Println("Exiting…")
}

Why uprobe is suitable

Uprobes can be attached to any user‑space instruction, unlike static kernel tracepoints. By targeting runtime.casgstatus, which is the only place where the runtime changes a goroutine’s state, the probe captures every transition without source‑level instrumentation.

Register layout for the probe

On AMD64 the Go ABI passes the first three integer arguments in RAX, RBX, and RCX. A small test program that calls runtime.casgstatus confirms the mapping of gp, old, and new to those registers.

register layout
register layout

PID vs PGID filtering

The probe compares pid_target against the process‑group ID (PGID) obtained from bpf_get_current_pid_tgid(). Because all threads of a Go process share the same PGID, filtering by PGID allows monitoring the entire process while ignoring unrelated processes.

Limitations

The technique depends on internal details of the Go runtime: the existence and signature of runtime.casgstatus and the offset of the goid field inside the goroutine struct. These internals may change across Go versions, so the eBPF program must be rebuilt and validated against the exact runtime version of the target binary.

References

runtime2.go#L37 – https://github.com/golang/go/blob/097b7162adeab8aad0095303aff8a045bbbfa6e0/src/runtime/runtime2.go#L37

Go ABI specification – https://go.googlesource.com/go/+/refs/heads/dev.regabi/src/cmd/compile/internal-abi.md

Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Sign in to view source
Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactadmin@besthub.devand we will review it promptly.

ObservabilityGoeBPFuprobekernel tracingRing Buffergoroutine tracing
BirdNest Tech Talk
Written by

BirdNest Tech Talk

Author of the rpcx microservice framework, original book author, and chair of Baidu's Go CMC committee.

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.