What Really Happens Inside the Linux Kernel During a TCP Three‑Way Handshake?
This article dives deep into the Linux kernel implementation of the TCP three‑way handshake, explaining the roles of listen, connect, SYN/ACK processing, queue management, timers, and accept, while providing concrete code snippets and diagrams to help backend engineers master the underlying mechanics.
1. Server listen
Before a server can accept connections it must call listen. The kernel allocates and initializes both the half‑connection (syn‑backlog) and full‑connection queues, calculates their lengths, and reserves memory for them.
int reqsk_queue_alloc(struct request_sock_queue *queue, unsigned int nr_table_entries) {
size_t lopt_size = sizeof(struct listen_sock);
struct listen_sock *lopt;
// calculate half‑connection queue length
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
// allocate memory for half‑connection queue
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
lopt = vzalloc(lopt_size);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
// initialise full‑connection queue head
queue->rskq_accept_head = NULL;
// set half‑connection queue parameters
lopt->nr_table_entries = nr_table_entries;
queue->listen_opt = lopt;
...
}The half‑connection queue is stored as a hash table for fast lookup, while the full‑connection queue head is initially NULL.
2. Client connect
The client creates a socket and calls connect, which sets the socket state to TCP_SYN_SENT, selects a local port, builds a SYN packet and sends it, then starts a retransmission timer.
// file: net/ipv4/tcp_ipv4.c
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) {
// set socket state to SYN_SENT
tcp_set_state(sk, TCP_SYN_SENT);
// dynamically choose a port
err = inet_hash_connect(&tcp_death_row, sk);
// build and send SYN packet
err = tcp_connect(sk);
} // file: net/ipv4/tcp_output.c
int tcp_connect(struct sock *sk) {
tcp_connect_init(sk);
// allocate skb and construct SYN packet
...
// add to send queue
tcp_connect_queue_skb(sk, buff);
// actually transmit SYN
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
// start retransmission timer (initial timeout 1 s in 3.10 kernel)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
}3. Server responds to SYN
Incoming packets are processed by tcp_v4_do_rcv. When the listening socket receives a SYN, it looks up the half‑connection queue via tcp_v4_hnd_req. If the queue is empty, the request proceeds to tcp_v4_conn_request.
// file: net/ipv4/tcp_ipv4.c
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) {
// drop if half‑connection queue is full and syncookies are disabled
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
if (!want_cookie)
goto drop;
}
// drop if full‑connection queue is full and there are young ACKs
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
NET_INC_STATS_BH(sock_net(sk), LIN...);
goto drop;
}
// allocate request_sock object
req = inet_reqsk_alloc(&tcp_request_sock_ops);
// construct SYN+ACK packet
skb_synack = tcp_make_synack(sk, dst, req,
fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
if (likely(!do_fastopen)) {
// send SYN+ACK
err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
ireq->rmt_addr, ireq->opt);
// add to half‑connection queue and start timer
inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
} else {
...
}
return 0;
}young_ack is a counter in the half‑connection queue that records SYN packets that have arrived but whose SYN+ACK has not been retransmitted and the three‑way handshake has not completed.
If the half‑connection queue is full and syncookies are not enabled, the SYN is dropped. The same applies when the full‑connection queue is full and there are pending young ACKs.
4. Client processes SYN‑ACK
When the client receives the SYN‑ACK, tcp_rcv_state_process dispatches to the TCP_SYN_SENT case, invoking tcp_rcv_synsent_state_process.
// file: net/ipv4/tcp_input.c
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, unsigned int len) {
...
tcp_ack(sk, skb, FLAG_SLOWPATH);
// finish connection
tcp_finish_connect(sk, skb);
if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong)
// delayed ACK handling
...
else {
tcp_send_ack(sk);
}
} // file: net/ipv4/tcp_input.c
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) {
// set socket state to ESTABLISHED
tcp_set_state(sk, TCP_ESTABLISHED);
// initialise congestion control
tcp_init_congestion_control(sk);
// start keep‑alive timer if needed
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
} // file: net/ipv4/tcp_output.c
void tcp_send_ack(struct sock *sk) {
// allocate and build ACK packet
buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
...
// transmit ACK
tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
}The client clears the retransmission timer, sets its state to ESTABLISHED, starts the keep‑alive timer, and sends the final ACK.
5. Server processes ACK
The ACK arrives at tcp_v4_do_rcv. Because the socket is now in TCP_SYN_RECV, the kernel looks up the matching half‑connection request, creates a child socket, removes the request from the half‑connection queue, and adds it to the full‑connection queue.
// file: net/ipv4/tcp_minisocks.c
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, struct request_sock **prev,
bool fastopen) {
// create child socket
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
// clean half‑connection queue
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
// add to full‑connection queue
inet_csk_reqsk_queue_add(sk, req, child);
return child;
}Finally the server sets the new socket state to TCP_ESTABLISHED.
// file: net/ipv4/tcp_input.c
case TCP_SYN_RECV:
tcp_set_state(sk, TCP_ESTABLISHED);
...6. Server accept
The accept system call retrieves the first socket from the full‑connection queue.
// file: net/ipv4/inet_connection_sock.c
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) {
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
req = reqsk_queue_remove(queue);
newsk = req->sk;
return newsk;
}The removal simply returns the head of the queue and updates the head/tail pointers.
Summary of the handshake internals
Server listen calculates and allocates memory for full/half‑connection queues.
Client connect sets state to TCP_SYN_SENT, chooses a port, sends SYN and starts a retransmission timer.
Server receives SYN, checks queue limits, possibly drops the request, otherwise sends SYN‑ACK, creates a request_sock, adds it to the half‑connection queue and starts a timer.
Client processes SYN‑ACK, clears the retransmission timer, moves to ESTABLISHED, starts keep‑alive, and sends the final ACK.
Server processes ACK, removes the half‑connection entry, creates a child socket, adds it to the full‑connection queue, and sets state to ESTABLISHED. accept pulls the established socket from the full‑connection queue and returns it to the application.
If packet loss or queue overflow occurs, the kernel retries according to tcp_syn_retries and tcp_synack_retries, with exponential back‑off (1 s, 2 s, 4 s … in kernel 3.10, 3 s initial timeout in older kernels).
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Liangxu Linux
Liangxu, a self‑taught IT professional now working as a Linux development engineer at a Fortune 500 multinational, shares extensive Linux knowledge—fundamentals, applications, tools, plus Git, databases, Raspberry Pi, etc. (Reply “Linux” to receive essential resources.)
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
