ACK的pingpong模式,用于TCP两端的套接口为交互模式时,数据在两个方向交叉发送,所以pingpong模式可减少单独ACK报文的发送。
由于ACK的pingpong模式与quick模式互斥,应用层可通过setsockopt系统调用的TCP_QUICKACK选项来开启和关闭pingpong模式,即如果应用层开启了快速ACK模式,将清除pingpong模式,反之,如果关闭了快速ACK模式,将开启pingpong模式。需要注意的是,如果设置setsockopt中TCP_QUICKACK选项的值为偶数的话,如2,内核将关闭pingpong模式,检查是否需要发送ACK报文(tcp_cleanup_rbuf),之后将重新开启pingpong模式。相当于quick模式仅开启一次。
static int do_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen)
{
switch (optname) {
case TCP_QUICKACK:
if (!val) {
icsk->icsk_ack.pingpong = 1;
} else {
icsk->icsk_ack.pingpong = 0;
if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) {
icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
tcp_cleanup_rbuf(sk, 1);
if (!(val & 1))
icsk->icsk_ack.pingpong = 1;
}
如果当前数据报文的发送时间与最近一次接收到对端数据的时间之差值,小于ACK超时时间ATO(Ack TimeOut),意味着本地可在如此短的时间内回复对端数据,不需要单独的ACK确认报文,开启ACK的pingpong模式。
/* Congestion state accounting after a packet has been sent. */
static void tcp_event_data_sent(struct tcp_sock *tp, struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_jiffies32;
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
tp->lsndtime = now;
/* If it is a reply for ato after last received packet, enter pingpong mode. */
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
icsk->icsk_ack.pingpong = 1;
}
在接收到对端发送的FIN报文之后,如果本端的TCP套接口状态处于TCP_SYN_RECV或者TCP_ESTABLISHED状态,开启ACK的pingpong模式,因为此时对端已经不再接收任何数据,不需要向其发送ACK确认报文。
void tcp_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
inet_csk_schedule_ack(sk);
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
case TCP_SYN_RECV:
case TCP_ESTABLISHED:
tcp_set_state(sk, TCP_CLOSE_WAIT); /* Move to CLOSE_WAIT */
inet_csk(sk)->icsk_ack.pingpong = 1;
break;
}
前节已经说明ACK的quick模式与pingpong模式互斥,所以通过setsockopt开启quick模式时,将退出pingpong模式;或者在内核中开启quick模式时,pingpong模式将关闭,将函数tcp_enter_quickack_mode。quick模式的启动可参考:https://blog.csdn.net/sinat_20184565/article/details/90085616。在接收到重传报文、乱序报文等的情况下进入quick模式,以便快速回复对端ACK。
static void tcp_enter_quickack_mode(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_incr_quickack(sk);
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
在延迟ACK定时器的超时处理函数中,如果检查到套接口开启了pingpong模式,将执行关闭。可见在ACK超时之前,本地并没有发送任何数据到对端,表明套接口可能并非交互式应用。
void tcp_delack_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
if (inet_csk_ack_scheduled(sk)) {
if (!icsk->icsk_ack.pingpong) {
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); /* Delayed ACK missed: inflate ATO. */
} else {
/* Delayed ACK missed: leave pingpong mode and deflate ATO. */
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
tcp_mstamp_refresh(tcp_sk(sk));
tcp_send_ack(sk);
}
}
在TCP三次握手过程中,当客户端套接口接收到服务端回复的SYN+ACK报文后,如果客户端套接口设置了ACK的pingpong模式,表明马上会有数据发送,将延后ACK的回复,等待和数据一起发送。
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (th->ack) {
if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_ack.pingpong) {
inet_csk_schedule_ack(sk);
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX);
discard:
tcp_drop(sk, skb);
return 0;
} else {
tcp_send_ack(sk);
}
return -1;
}
}
在应用层读取套接口的数据之后,如果ACK的pending状态为ICSK_ACK_PUSHED,并且pingpong模式处于关闭状态,才有可能执行一次ACK发送操作。在以上的setsockopt函数中,如果开启quick模式,内核调用函数tcp_cleanup_rbuf(sk, 1),传入的copied参数为1,所以只要套接口的接收缓存sk_rmem_alloc为空(读取完所有数据),内核将执行一次ACK发送。
static void tcp_cleanup_rbuf(struct sock *sk, int copied)
{
struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false;
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
if (inet_csk_ack_scheduled(sk)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
/* Delayed ACKs frequently hit locked sockets during bulk receive. */
if (icsk->icsk_ack.blocked ||
tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
/*
* If this read emptied read buffer, we send ACK, if connection is not bidirectional, user drained
* receive buffer and there was a small segment in queue.
*/
(copied > 0 &&
((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && !icsk->icsk_ack.pingpong)) &&
!atomic_read(&sk->sk_rmem_alloc)))
time_to_ack = true;
}
在套接口进入延迟ACK模式时,如果套接口的ACK处于pingpong模式,意味着可以承受最长时间的ACK发送延迟,以保证尽可能的等待本地发送数据,避免单独的ACK报文发送。将ATO时间上限设置为TCP_DELACK_MAX(200毫秒)。
void tcp_send_delayed_ack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int ato = icsk->icsk_ack.ato;
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2;
if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
max_ato = TCP_DELACK_MAX;
if (tp->srtt_us) {
int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), TCP_DELACK_MIN);
if (rtt < max_ato)
max_ato = rtt;
}
ato = min(ato, max_ato);
}
timeout = jiffies + ato;
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
内核版本 4.15