Operations 28 min read

Why TCP’s Three‑Way Handshake Matters: Deep Dive into States, Tuning, and Real‑World Pitfalls

This article explains the TCP three‑way handshake in depth, covering the state machine, kernel‑level packet analysis, performance tuning, security hardening, real‑world case studies such as SYN‑Flood mitigation and TIME_WAIT overload, and provides complete C and Python examples, monitoring metrics, troubleshooting steps, and backup procedures for production environments.

Raymond Ops
Raymond Ops
Raymond Ops
Why TCP’s Three‑Way Handshake Matters: Deep Dive into States, Tuning, and Real‑World Pitfalls

TCP Three‑Way Handshake Overview

The TCP three‑way handshake establishes a reliable connection by exchanging three packets:

Client (CLOSED)                Server (LISTEN)
    |                               |
    | SYN, seq=x                    |
    |------------------------------>|
    |                               | (SYN_RCVD)
    |               SYN+ACK, seq=y, ack=x+1 |
    |<------------------------------|
    | ACK, seq=x+1, ack=y+1          |
    |------------------------------>|

Key fields:

SYN: SYN=1, ACK=0, seq=ISN SYN+ACK: SYN=1, ACK=1, seq=ISN_server, ack=ISN_client+1 ACK:

SYN=0, ACK=1, seq=ISN_client+1, ack=ISN_server+1

State Machine

Typical state transitions for a client and a server are:

Client: CLOSED → SYN_SENT → ESTABLISHED
Server: LISTEN → SYN_RCVD → ESTABLISHED

After data exchange the connection moves through FIN_WAIT_1 → FIN_WAIT_2 → TIME_WAIT → CLOSED. The TIME_WAIT state lasts for 2 MSL (maximum segment lifetime) to ensure all delayed packets are discarded.

Kernel Parameters for Handshake Tuning

# /etc/sysctl.d/99-tcp-tuning.conf
net.ipv4.tcp_max_syn_backlog = 8192   # half‑open queue size
net.core.somaxconn = 4096            # max listen backlog
net.ipv4.tcp_syn_retries = 3        # client SYN retries
net.ipv4.tcp_synack_retries = 2     # server SYN+ACK retries
net.ipv4.tcp_syncookies = 1         # enable SYN‑Cookies (DDoS protection)
net.ipv4.tcp_tw_reuse = 1           # allow reuse of TIME_WAIT sockets (client side)
net.ipv4.tcp_tw_recycle = 0         # deprecated, keep disabled
net.ipv4.tcp_fin_timeout = 30       # FIN_WAIT_2 timeout (seconds)
net.ipv4.tcp_fastopen = 3          # enable TCP Fast Open for both ends

Apply the settings with:

sudo sysctl -p /etc/sysctl.d/99-tcp-tuning.conf

C Example: Simple TCP Server

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>

#define PORT 8080
#define BACKLOG 128

int main() {
    int listen_fd = socket(AF_INET, SOCK_STREAM, 0);
    if (listen_fd < 0) { perror("socket failed"); exit(EXIT_FAILURE); }
    int reuse = 1;
    setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse));
    int defer = 5; // TCP_DEFER_ACCEPT 5 s
    setsockopt(listen_fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &defer, sizeof(defer));
    struct sockaddr_in server_addr = {0};
    server_addr.sin_family = AF_INET;
    server_addr.sin_addr.s_addr = INADDR_ANY;
    server_addr.sin_port = htons(PORT);
    bind(listen_fd, (struct sockaddr*)&server_addr, sizeof(server_addr));
    listen(listen_fd, BACKLOG);
    printf("Server listening on port %d (backlog=%d)
", PORT, BACKLOG);
    while (1) {
        struct sockaddr_in client_addr;
        socklen_t client_len = sizeof(client_addr);
        int conn_fd = accept(listen_fd, (struct sockaddr*)&client_addr, &client_len);
        if (conn_fd < 0) { perror("accept failed"); continue; }
        printf("Connection from %s:%d
", inet_ntoa(client_addr.sin_addr), ntohs(client_addr.sin_port));
        char buffer[1024];
        ssize_t n = read(conn_fd, buffer, sizeof(buffer)-1);
        if (n > 0) { buffer[n] = '\0'; printf("Received: %s
", buffer); write(conn_fd, "ACK
", 4); }
        close(conn_fd);
    }
    close(listen_fd);
    return 0;
}

C Example: Simple TCP Client with Timing

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <errno.h>
#include <sys/time.h>

#define SERVER_IP "127.0.0.1"
#define SERVER_PORT 8080

int main() {
    int sock_fd = socket(AF_INET, SOCK_STREAM, 0);
    if (sock_fd < 0) { perror("socket failed"); exit(EXIT_FAILURE); }
    int nodelay = 1;
    setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay));
    int quickack = 1;
    setsockopt(sock_fd, IPPROTO_TCP, TCP_QUICKACK, &quickack, sizeof(quickack));
    struct sockaddr_in server_addr = {0};
    server_addr.sin_family = AF_INET;
    server_addr.sin_port = htons(SERVER_PORT);
    inet_pton(AF_INET, SERVER_IP, &server_addr.sin_addr);
    struct timeval start, end;
    gettimeofday(&start, NULL);
    if (connect(sock_fd, (struct sockaddr*)&server_addr, sizeof(server_addr)) < 0) { perror("connect failed"); exit(EXIT_FAILURE); }
    gettimeofday(&end, NULL);
    double elapsed = (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0;
    printf("Connection established in %.2f ms
", elapsed);
    const char *msg = "Hello, TCP!";
    write(sock_fd, msg, strlen(msg));
    char buffer[1024];
    ssize_t n = read(sock_fd, buffer, sizeof(buffer)-1);
    if (n > 0) { buffer[n] = '\0'; printf("Server response: %s
", buffer); }
    close(sock_fd);
    return 0;
}

Python TCP Fast Open Example

import socket

# Server side (requires Linux kernel >= 3.7)
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.setsockopt(socket.IPPROTO_TCP, socket.TCP_FASTOPEN, 5)  # backlog for TFO cookies
server.bind(('0.0.0.0', 8080))
server.listen(128)

# Client side – send data together with SYN using MSG_FASTOPEN
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.sendto(b'GET / HTTP/1.1
Host: example.com

',
              socket.MSG_FASTOPEN,
              ('127.0.0.1', 8080))

SYN Flood Mitigation

Typical production steps:

Enable SYN Cookies: sudo sysctl -w net.ipv4.tcp_syncookies=1 Enlarge the half‑open queue:

sudo sysctl -w net.ipv4.tcp_max_syn_backlog=16384

Reduce SYN+ACK retries to free resources quickly: sudo sysctl -w net.ipv4.tcp_synack_retries=1 Rate‑limit new SYN packets with iptables:

sudo iptables -A INPUT -p tcp --syn -m limit --limit 10/s --limit-burst 20 -j ACCEPT
sudo iptables -A INPUT -p tcp --syn -j DROP

TIME_WAIT Exhaustion Handling

High‑frequency short connections can fill the TIME_WAIT table, causing "Cannot assign requested address" errors. Mitigation strategies:

Enable reuse on the client side: sudo sysctl -w net.ipv4.tcp_tw_reuse=1 Make sure timestamps are enabled (required for reuse): sudo sysctl -w net.ipv4.tcp_timestamps=1 Reduce FIN_WAIT_2 timeout: sudo sysctl -w net.ipv4.tcp_fin_timeout=15 Use connection pooling at the application layer (e.g., requests.adapters.HTTPAdapter in Python) to keep long‑lived sockets.

Performance Monitoring (Prometheus / Node Exporter)

# Example queries
node_netstat_Tcp_CurrEstab                # ESTABLISHED connections
node_netstat_Tcp_AttemptFails             # connection attempts that failed
rate(node_netstat_Tcp_AttemptFails[5m]) / rate(node_netstat_Tcp_ActiveOpens[5m]) > 0.01   # failure‑rate alert
node_netstat_TcpExt_TCPSynRetrans        # SYN retransmissions
node_netstat_TcpExt_ListenOverflows       # listen queue overflows
node_netstat_Tcp_CurrEstab{state="time-wait"} > 30000   # TIME_WAIT overload

Troubleshooting Workflow

Measure handshake latency: time nc -zv 192.168.1.100 80 Capture packets with tcpdump and verify SYN/SYN+ACK/ACK sequence:

sudo tcpdump -i any -nn 'tcp[tcpflags] & tcp-syn != 0' -w handshake.pcap

Check kernel logs for TCP errors:

sudo dmesg | grep -i tcp
sudo journalctl -k | grep -i tcp

Inspect socket states with ss:

ss -tanp state close-wait   # detect lingering CLOSE_WAIT sockets
ss -tan state time-wait | wc -l   # count TIME_WAIT sockets

Use strace to trace system calls of the server or client if needed.

Backup and Restore Script

#!/bin/bash
# backup_tcp_config.sh
BACKUP_DIR="/data/backups/tcp_config"
DATE=$(date +%Y%m%d)
mkdir -p "$BACKUP_DIR"
# Backup sysctl TCP parameters
sysctl -a | grep -E "tcp|net.core" > "$BACKUP_DIR/sysctl_$DATE.conf"
# Backup iptables rules
sudo iptables-save > "$BACKUP_DIR/iptables_$DATE.rules"
# Backup application config (example: nginx)
tar -czf "$BACKUP_DIR/nginx_$DATE.tar.gz" /etc/nginx/
# Cleanup old backups (keep 30 days)
find "$BACKUP_DIR" -name "*.conf" -mtime +30 -delete

Restore steps:

Reload sysctl parameters:

sudo sysctl -p /data/backups/tcp_config/sysctl_20240115.conf

Restore firewall rules:

sudo iptables-restore < /data/backups/tcp_config/iptables_20240115.rules

Verify connectivity:

nc -zv localhost 80
ab -n 10000 -c 100 http://localhost/

Best‑Practice Checklist

Size tcp_max_syn_backlog and somaxconn according to expected concurrent connections.

Enable tcp_syncookies during DDoS attacks; disable when not needed to keep TCP options.

Turn on tcp_fastopen for latency‑sensitive services.

Prefer long‑lived connections or connection pools to reduce handshake overhead.

Monitor the key metrics listed above and set alerts for failure‑rate, SYN retransmissions, listen‑queue overflow, and TIME_WAIT count.

PerformanceTCPLinuxNetworking
Raymond Ops
Written by

Raymond Ops

Linux ops automation, cloud-native, Kubernetes, SRE, DevOps, Python, Golang and related tech discussions.

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.