Build a High‑Performance Go + Playwright Browser Automation Framework
Learn how to create a production‑grade, high‑throughput browser automation service in Go using Playwright, featuring browser‑context pooling, proxy rotation, task scheduling with watchdogs, Prometheus metrics, and a WebUI, enabling thousands of concurrent tasks, robust monitoring, and easy scalability.
Project Background
Traditional browser automation launches a new browser per task, causing high memory consumption, provides no monitoring for blocked or crashed tasks, and makes proxy and context management cumbersome. It also lacks built‑in task timeout and retry mechanisms.
Solution Overview
A production‑grade framework built with Go and Playwright offers:
BrowserContext pooling for reuse
Proxy rotation and management
TaskRunner with scheduling and watchdog monitoring
Prometheus metrics collection
Web UI for visual management
Core Implementations
BrowserContext Pool
// BrowserPool manages multiple browser instances and their contexts
type BrowserPool struct {
mu sync.RWMutex // protects shared resources
browsers []playwright.Browser // all browser instances
contexts []*ContextPool // one context pool per browser
idx int // round‑robin index
factory func() (playwright.Browser, error) // factory to create browsers
}
// NewBrowserPool creates a pool with a given initial size
func NewBrowserPool(factory func() (playwright.Browser, error), initial int) (*BrowserPool, error) {
p := &BrowserPool{factory: factory}
for i := 0; i < initial; i++ {
b, err := factory()
if err != nil {
return nil, fmt.Errorf("failed to create browser %d: %w", i, err)
}
p.browsers = append(p.browsers, b)
// each browser gets a context pool with capacity 5
p.contexts = append(p.contexts, NewContextPool(b, 5))
}
return p, nil
}
// RentContext returns a browser context using round‑robin selection
func (p *BrowserPool) RentContext() (playwright.BrowserContext, int, error) {
p.mu.RLock()
if len(p.browsers) == 0 {
p.mu.RUnlock()
return nil, -1, errors.New("no browsers available")
}
i := p.idx % len(p.browsers)
cp := p.contexts[i]
p.mu.RUnlock()
ctx, err := cp.Rent()
if err != nil {
return nil, -1, fmt.Errorf("failed to rent context from browser %d: %w", i, err)
}
p.mu.Lock()
p.idx = (p.idx + 1) % len(p.browsers)
p.mu.Unlock()
return ctx, i, nil
}TaskRunner
// executeTask runs a single task with retry logic
func (tr *TaskRunner) executeTask(t task, workerID int) {
attempts := 0
startTime := time.Now()
for attempts < tr.MaxRetries {
attempts++
ctxObj, idx, err := tr.BrowserPool.RentContext()
if err != nil {
log.Printf("worker[%d] rent context error: %v", workerID, err)
TaskRetries.Inc()
continue
}
ctx, cancel := context.WithTimeout(context.Background(), tr.TaskTimeout)
page, err := ctxObj.NewPage()
if err != nil {
cancel()
tr.BrowserPool.ReturnContext(CtxRef{BrowserIndex: idx, Ctx: ctxObj})
log.Printf("worker[%d] create page error: %v", workerID, err)
TaskRetries.Inc()
continue
}
done := make(chan struct{})
go func() {
select {
case <-done:
return
case <-time.After(tr.HardTimeout):
log.Printf("worker[%d] hard timeout, force close page", workerID)
page.Close()
cancel()
}
}()
err = t(ctx, page)
close(done)
page.Close()
cancel()
tr.BrowserPool.ReturnContext(CtxRef{BrowserIndex: idx, Ctx: ctxObj})
duration := time.Since(startTime)
TaskDuration.Observe(duration.Seconds())
if err == nil {
log.Printf("worker[%d] task completed successfully after %d attempts, took %v", workerID, attempts, duration)
return
}
log.Printf("worker[%d] task attempt %d failed: %v", workerID, attempts, err)
if attempts < tr.MaxRetries {
TaskRetries.Inc()
backoff := time.Duration(rand.Intn(500)) * time.Millisecond
time.Sleep(backoff)
}
}
duration := time.Since(startTime)
log.Printf("worker[%d] task failed after %d attempts, took %v", workerID, attempts, duration)
}ProxyPool
// ProxyPool manages a list of HTTP proxy addresses
type ProxyPool struct {
proxies []string // proxy list
idx int // round‑robin index
}
func NewProxyPool(proxies []string) *ProxyPool { return &ProxyPool{proxies: proxies} }
func (p *ProxyPool) Pick() string {
if len(p.proxies) == 0 { return "" }
proxy := p.proxies[p.idx%len(p.proxies)]
p.idx++
return proxy
}BrowserPool API (Health & Status Endpoints)
// Health check
r.GET("/health", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok"})
})
// Proxy count endpoint
r.GET("/proxy_count", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"available_proxies": pp.Count()})
})
// Browser pool status endpoint
r.GET("/browser_pool_status", func(c *gin.Context) {
bp.mu.RLock()
browserCount := len(bp.browsers)
contextPools := make([]map[string]interface{}, len(bp.contexts))
for i, cp := range bp.contexts {
contextPools[i] = map[string]interface{}{"index": i, "size": cp.Size(), "available": cp.Available()}
}
bp.mu.RUnlock()
c.JSON(http.StatusOK, gin.H{"browser_count": browserCount, "context_pools": contextPools})
})Prometheus Metrics
var TaskDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "automation_task_duration_seconds",
Help: "Task duration histogram",
})
func StartMetricsServer(addr string) {
http.Handle("/metrics", promhttp.Handler())
http.ListenAndServe(addr, nil)
}Demo Task
// ExampleTask shows a simple automation flow
func ExampleTask(ctx context.Context, page playwright.Page) error {
if err := page.SetViewportSize(1920, 1080); err != nil { return err }
if _, err := page.Goto("https://www.baidu.com", playwright.PageGotoOptions{WaitUntil: playwright.WaitUntilStateNetworkidle}); err != nil { return err }
time.Sleep(2 * time.Second)
if _, err := page.Screenshot(playwright.PageScreenshotOptions{Path: playwright.String("example.png")}); err != nil { return err }
title, err := page.Title()
if err != nil { return err }
log.Printf("Page title: %s", title)
return nil
}Running the Example
Initialize modules: go mod tidy Start the service: go run . Launch Prometheus & Grafana containers: docker-compose up -d Enqueue a task: curl -X POST http://localhost:8080/enqueue Metrics are exposed at /metrics and can be visualized in Grafana.
Source Code
GitHub: https://github.com/louis-xie-programmer/playwright-go-crossbrowser-template
Gitee: https://gitee.com/louis_xie/playwright-go-crossbrowser-template
References
Playwright documentation: https://playwright.dev/
Medium article on robust browser pools: https://medium.com/@devcriston/building-a-robust-browser-pool-for-web-automation-with-playwright-2c750eb0a8e7
Architecture Diagram
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Code Wrench
Focuses on code debugging, performance optimization, and real-world engineering, sharing efficient development tips and pitfall guides. We break down technical challenges in a down-to-earth style, helping you craft handy tools so every line of code becomes a problem‑solving weapon. 🔧💻
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
