默认情况下sysctl_tcp_recovery的值为1(TCP_RACK_LOSS_DETECTION),也可通过PROC文件:/proc/sys/net/ipv4/tcp_recovery进行修改,如果设置了标志位TCP_RACK_NO_DUPTHRESH,表明RACK不使用重复ACK阈值(DupAck Threshold),默认未设置此标志。
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */
$ cat /proc/sys/net/ipv4/tcp_recovery
1
如下tcp_rack_reo_wnd函数,如果当前套接口没有观察到乱序发生,并且当前处于拥塞恢复状态(TCP_CA_Recovery)或者TCP_CA_Loss拥塞丢失状态,返回乱序窗口零,以便尽快的确认丢失报文,触发重传。反之,如果拥塞状态不处于以上的两种状态,RACK开启了DUPTHRESH的支持,并且SACK确认报文数量超过了乱序等级,很有可能发生了丢包,接下来可能会进入TCP_CA_Recovery或者TCP_CA_Loss状态,也返回值为零的乱序窗口,以便RACK作出快速响应。
否则,函数tcp_rack_reo_wnd返回正常的乱序窗口。
static u32 tcp_rack_reo_wnd(const struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->reord_seen) {
/* If reordering has not been observed, be aggressive during
* the recovery or starting the recovery by DUPACK threshold.
*/
if (inet_csk(sk)->icsk_ca_state >= TCP_CA_Recovery)
return 0;
if (tp->sacked_out >= tp->reordering &&
!(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
return 0;
}
/* To be more reordering resilient, allow min_rtt/4 settling delay.
* Use min_rtt instead of the smoothed RTT because reordering is
* often a path property and less related to queuing or delayed ACKs.
* Upon receiving DSACKs, linearly increase the window up to the
* smoothed RTT.
*/
return min((tcp_min_rtt(tp) >> 2) * tp->rack.reo_wnd_steps, tp->srtt_us >> 3);
如下丢失报文检查函数,如果当前报文的发送时间戳加上最近测量的RTT和乱序窗口时长,小于当前TCP时间,即认为此报文已经丢失。以上函数将乱序窗口时长设置为0,报文更容易被认定为丢失。
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
*reo_timeout = 0;
reo_wnd = tcp_rack_reo_wnd(sk);
list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
...
/* A packet is lost if it has not been s/acked beyond
* the recent RTT plus the reordering window.
*/
remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
if (remaining <= 0) {
tcp_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
如下tcp_rack_mark_lost函数,根据以上tcp_rack_detect_loss函数返回的timeout超时时间值,设置ICSK_TIME_REO_TIMEOUT定时器。
void tcp_rack_mark_lost(struct sock *sk)
{
...
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
如下定时器到期处理函数,如果tcp_rack_detect_loss函数标记了新的丢失报文,函数tcp_xmit_retransmit_queue将进行立即重传。
void tcp_rack_reo_timeout(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, prior_inflight;
prior_inflight = tcp_packets_in_flight(tp);
tcp_rack_detect_loss(sk, &timeout);
if (prior_inflight != tcp_packets_in_flight(tp)) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
tcp_enter_recovery(sk, false);
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_cwnd_reduction(sk, 1, 0);
}
tcp_xmit_retransmit_queue(sk);
除了以上的乱序超时处理函数之外,在函数tcp_identify_packet_loss中,也调用tcp_rack_mark_lost标记丢失报文。
static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_rtx_queue_empty(sk))
return;
if (unlikely(tcp_is_reno(tp))) {
...
} else if (tcp_is_rack(sk)) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk);
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
以上函数的调用位于ACK处理函数tcp_fastretrans_alert中,对应于最开始的函数tcp_rack_reo_wnd,如果套接口位于TCP_CA_Recovery或者TCP_CA_Loss状态,标记丢失报文尽快重传。否则套接口处于其他的拥塞状态,tcp_time_to_recover函数将尽快判断进入TCP_CA_Recovery状态。
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
int num_dupack, int *ack_flag, int *rexmit)
{
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
...
tcp_identify_packet_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, num_dupack, rexmit);
tcp_identify_packet_loss(sk, ack_flag);
...
/* fall through */
default:
...
tcp_identify_packet_loss(sk, ack_flag);
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag);
return;
}
...
/* Otherwise enter Recovery state */
tcp_enter_recovery(sk, (flag & FLAG_ECE));
fast_rexmit = 1;
}
...
*rexmit = REXMIT_LOST;
如果在tcp_rack_detect_loss函数中标记了丢失报文,lost_out有值,进入TCP_CA_Recovery拥塞状态,重传丢失报文(REXMIT_LOST)。
static bool tcp_time_to_recover(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Trick#1: The loss is proven. */
if (tp->lost_out)
return true;
内核版本 5.0