Backend Development 21 min read

Implementing a Service Registry and Discovery in Go: Design, Code, and Practices

This article explains why service registration and discovery are needed in microservice architectures, outlines the design principles of a registry, provides Go data structures and functions for registering, fetching, renewing, and evicting services, discusses self‑protection mechanisms, and shows how to expose the registry via a Gin‑based HTTP API.

High Availability Architecture
High Availability Architecture
High Availability Architecture
Implementing a Service Registry and Discovery in Go: Design, Code, and Practices

Microservice architectures require a reliable way for services to locate each other. Hard‑coding hostnames or using static DNS becomes unmanageable as services scale, leading to the need for automated service registration and discovery.

Why introduce service registration and discovery

When services are split, manual configuration creates bottlenecks: adding new services, handling instance failures, and updating configurations are error‑prone. A registry allows providers to announce themselves and consumers to query the registry for up‑to‑date addresses.

Registry implementation principles

The registry must support basic functions: register a service, deregister (cancel) a service, fetch service information, renew (heartbeat) a service, and evict stale instances.

Data structures

type Registry struct {
    apps map[string]*Application
    lock sync.RWMutex
}
type Application struct {
    appid           string
    instances       map[string]*Instance
    latestTimestamp int64
    lock            sync.RWMutex
}
type Instance struct {
    Env            string `json:"env"`
    AppId          string `json:"appid"`
    Hostname       string `json:"hostname"`
    Addrs          []string `json:"addrs"`
    Version        string `json:"version"`
    Status         uint32 `json:"status"`
    RegTimestamp   int64  `json:"reg_timestamp"`
    UpTimestamp    int64  `json:"up_timestamp"`
    RenewTimestamp int64  `json:"renew_timestamp"`
    DirtyTimestamp int64  `json:"dirty_timestamp"`
    LatestTimestamp int64 `json:"latest_timestamp"`
}

Core operations

Register

func NewRegistry() *Registry {
    return &Registry{apps: make(map[string]*Application)}
}

func NewInstance(req *RequestRegister) *Instance {
    now := time.Now().UnixNano()
    return &Instance{Env: req.Env, AppId: req.AppId, Hostname: req.Hostname, Addrs: req.Addrs, Version: req.Version, Status: req.Status, RegTimestamp: now, UpTimestamp: now, RenewTimestamp: now, DirtyTimestamp: now, LatestTimestamp: now}
}

func (r *Registry) Register(instance *Instance, latestTimestamp int64) (*Application, error) {
    key := getKey(instance.AppId, instance.Env)
    r.lock.RLock()
    app, ok := r.apps[key]
    r.lock.RUnlock()
    if !ok {
        app = NewApplication(instance.AppId)
    }
    _, isNew := app.AddInstance(instance, latestTimestamp)
    if isNew {
        // new instance logic
    }
    r.lock.Lock()
    r.apps[key] = app
    r.lock.Unlock()
    return app, nil
}

Fetch (service discovery)

func (r *Registry) Fetch(env, appid string, status uint32, latestTime int64) (*FetchData, error) {
    app, ok := r.getApplication(appid, env)
    if !ok {
        return nil, errcode.NotFound
    }
    return app.GetInstance(status, latestTime)
}

Cancel (service deregistration)

func (r *Registry) Cancel(env, appid, hostname string, latestTimestamp int64) (*Instance, error) {
    app, ok := r.getApplication(appid, env)
    if !ok {
        return nil, errcode.NotFound
    }
    instance, ok, insLen := app.Cancel(hostname, latestTimestamp)
    if !ok {
        return nil, errcode.NotFound
    }
    if insLen == 0 {
        r.lock.Lock()
        delete(r.apps, getKey(appid, env))
        r.lock.Unlock()
    }
    return instance, nil
}

Renew (heartbeat)

func (r *Registry) Renew(env, appid, hostname string) (*Instance, error) {
    app, ok := r.getApplication(appid, env)
    if !ok {
        return nil, errcode.NotFound
    }
    return app.Renew(hostname)
}

Eviction of stale instances

func (r *Registry) evictTask() {
    ticker := time.Tick(configs.CheckEvictInterval)
    for {
        select {
        case <-ticker:
            r.evict()
        }
    }
}

func (r *Registry) evict() {
    now := time.Now().UnixNano()
    var expired []*Instance
    for _, app := range r.getAllApplications() {
        for _, ins := range app.GetAllInstances() {
            if now-ins.RenewTimestamp > int64(configs.InstanceExpireDuration) {
                expired = append(expired, ins)
            }
        }
    }
    // random eviction using Knuth‑shuffle
    // ...
}

Self‑protection mechanism

To avoid mass eviction during temporary network glitches, the registry tracks expected renew counts (needRenewCount) and actual renews (renewCount). If the renewal ratio falls below a threshold (default 85%), eviction is suppressed.

type Guard struct {
    renewCount      int64
    lastRenewCount  int64
    needRenewCount  int64
    threshold       int64
    lock            sync.RWMutex
}

func (gd *Guard) incrNeed() {
    gd.lock.Lock()
    defer gd.lock.Unlock()
    gd.needRenewCount += int64(configs.CheckEvictInterval / configs.RenewInterval)
    gd.threshold = int64(float64(gd.needRenewCount) * configs.SelfProtectThreshold)
}

func (gd *Guard) incrCount() {
    atomic.AddInt64(&gd.renewCount, 1)
}

func (gd *Guard) selfProtectStatus() bool {
    return atomic.LoadInt64(&gd.lastRenewCount) < atomic.LoadInt64(&gd.threshold)
}

Exposing the registry via HTTP (Gin)

func RegisterHandler(c *gin.Context) {
    var req model.RequestRegister
    if err := c.ShouldBindJSON(&req); err != nil {
        c.JSON(http.StatusOK, gin.H{"code": errcode.ParamError.Code(), "message": errcode.ParamError.Error()})
        return
    }
    instance := model.NewInstance(&req)
    global.Discovery.Registry.Register(instance, req.LatestTimestamp)
    c.JSON(http.StatusOK, gin.H{"code": 200, "message": "", "data": ""})
}

The main function loads configuration, creates a global discovery object, starts the Gin router, and adds graceful shutdown handling.

func main() {
    c := flag.String("c", "", "config file path")
    flag.Parse()
    cfg, err := configs.LoadConfig(*c)
    if err != nil { log.Fatal(err) }
    global.Discovery = model.NewDiscovery(cfg)
    router := api.InitRouter()
    srv := &http.Server{Addr: cfg.HttpServer, Handler: router}
    go srv.ListenAndServe()
    // graceful shutdown
    quit := make(chan os.Signal)
    signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
    <-quit
    ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
    defer cancel()
    srv.Shutdown(ctx)
}

Project structure

api – HTTP routes and handlers

cmd – entry point (main)

configs – global configuration and constants

global – global variables (Discovery)

model – core registry models and logic

Conclusion and future work

The article delivers a single‑node service registry written in Go, covering registration, discovery, health checking, eviction, and self‑protection. To achieve high availability, the next step is to build a cluster using peer‑to‑peer synchronization (e.g., Raft, Gossip) to keep registries consistent across nodes.

microservicesbackend developmentgolangservice discoveryRegistryself-protection
High Availability Architecture
Written by

High Availability Architecture

Official account for High Availability Architecture.

0 followers
Reader feedback

How this landed with the community

login Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.