问题：

不遵守TCP保持活动参数

林承悦

2023-03-14

我正在我的Linux box上试验TCP keep alive，并编写了以下小型服务器：

#include <iostream>
#include <cstring>

#include <netinet/in.h>
#include <arpa/inet.h>  // inet_ntop
#include <netinet/tcp.h>
#include <netdb.h>          // addrinfo stuff

using namespace std;

typedef int SOCKET;

int main(int argc, char *argv []) 
{
    struct sockaddr_in sockaddr_IPv4;
    memset(&sockaddr_IPv4, 0, sizeof(struct sockaddr_in));
    sockaddr_IPv4.sin_family = AF_INET;
    sockaddr_IPv4.sin_port = htons(58080);

    if (inet_pton(AF_INET, "10.6.186.24", &sockaddr_IPv4.sin_addr) != 1)
        return -1;

    SOCKET serverSock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);

    if (bind(serverSock, (sockaddr*)&sockaddr_IPv4, sizeof(sockaddr_IPv4)) != 0 || listen(serverSock, SOMAXCONN) != 0) 
    { 
        cout << "Failed to setup listening socket!\n";
    }

    SOCKET clientSock = accept(serverSock, 0, 0);
    if (clientSock == -1) 
        return -1;

    // Enable keep-alive on the client socket
    const int nVal = 1;
    if (setsockopt(clientSock, SOL_SOCKET, SO_KEEPALIVE, &nVal, sizeof(nVal)) < 0)
    {
        cout << "Failed to set keep-alive!\n";
        return -1;
    }

    // Get the keep-alive options that will be used on the client socket

    int nProbes, nTime, nInterval;
    socklen_t nOptLen = sizeof(int);
    bool bError = false;

    if (getsockopt(clientSock, IPPROTO_TCP, TCP_KEEPIDLE, &nTime, &nOptLen) < 0) { bError = true; }
    nOptLen = sizeof(int);

    if (getsockopt(clientSock, IPPROTO_TCP, TCP_KEEPCNT, &nProbes, &nOptLen) < 0) {bError = true; }
    nOptLen = sizeof(int);

    if (getsockopt(clientSock, IPPROTO_TCP, TCP_KEEPINTVL, &nInterval, &nOptLen) < 0) { bError = true; }

    cout << "Keep alive settings are: time: " << nTime << ", interval: " << nInterval << ", number of probes: " << nProbes << "\n";

    if (bError) 
    {
        // Failed to retrieve values
        cout << "Failed to get keep-alive options!\n";
        return -1;
    }

    int nRead = 0;
    char buf[128];
    do 
    {
        nRead = recv(clientSock, buf, 128, 0);
    } while (nRead != 0);


    return 0;
}

然后我将系统范围的TCP保持活动状态设置调整为：

# cat /proc/sys/net/ipv4/tcp_keepalive_time
20
# cat /proc/sys/net/ipv4/tcp_keepalive_intvl
30

然后我从Windows连接到服务器，并运行Wireshark跟踪以查看保持活动的数据包。下图显示了结果。

# cat /proc/sys/net/ipv4/tcp_keepalive_time
30
# cat /proc/sys/net/ipv4/tcp_keepalive_intvl
20

公良凯

2023-03-14

粗略地说，它的工作方式是每tcp_keepalive_time秒发送一条keepalive消息。如果未接收到ACK，则它将每隔tcp_keepalive_intvl秒探测一次。如果在tcp_keepalive_probes之后未收到ack,则连接将中止。因此，连接最多在

    tcp_keepalive_time + tcp_keepalive_probes * tcp_keepalive_intvl

没有响应的秒数。请参阅内核文档。

我们可以使用netcat keepalive轻松地观察这一工作，netcat的一个版本允许我们设置tcp keepalive参数（sysctl keepalive参数是默认值，但可以在tcp_sock结构中基于每个套接字重写它们）。

    $ ./nckl-linux -K -O 5 -I 1 -P 4 -l 8888 >/dev/null &

    $ sudo iptables -A OUTPUT -p tcp --dport 8888 \
    >   --tcp-flags SYN,ACK,RST,FIN ACK \
    >   -m statistic --mode random --probability 0.5 \
    >   -j DROP

现在让我们连接并观察vanilla netcat（它将使用sysctl keepalive值）：

    $ nc localhost 8888

以下是捕获：

如您所见，它在接收ack后等待5秒，然后再发送另一条keepalive消息。如果它在1秒内没有收到ACK，它将发送另一个探测，如果它在4个探测后没有收到ACK，它将中止连接。这正是keepalive的工作原理。

 if (sock_flag(sk, SOCK_KEEPOPEN))
        inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));

接下来，让我们看看定时器在tcp_keepalive_timer中是如何处理的：

  elapsed = keepalive_time_elapsed(tp);

  if (elapsed >= keepalive_time_when(tp)) {
          /* If the TCP_USER_TIMEOUT option is enabled, use that
           * to determine when to timeout instead.
           */
          if ((icsk->icsk_user_timeout != 0 &&
              elapsed >= icsk->icsk_user_timeout &&
              icsk->icsk_probes_out > 0) ||
              (icsk->icsk_user_timeout == 0 &&
              icsk->icsk_probes_out >= keepalive_probes(tp))) {
                  tcp_send_active_reset(sk, GFP_ATOMIC);
                  tcp_write_err(sk);
                  goto out;
          }
          if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
                  icsk->icsk_probes_out++;
                  elapsed = keepalive_intvl_when(tp);
          } else {
                  /* If keepalive was lost due to local congestion,
                   * try harder.
                   */
                  elapsed = TCP_RESOURCE_PROBE_INTERVAL;
          }
  } else {
          /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
          elapsed = keepalive_time_when(tp) - elapsed;
  }

  sk_mem_reclaim(sk);

resched:
  inet_csk_reset_keepalive_timer (sk, elapsed);
  goto out;

当keepalive_time_when大于keepalive_itvl_when时，此代码按预期工作。然而，当它不是的时候，你看到你观察到的行为。

当初始计时器（在TCP连接建立时设置）在1秒后过期时，我们将延长计时器，直到elapsed大于keepalive_time_when。此时，我们将发送一个探测，并将定时器设置为keepalive_intvl_when，即5秒。当这个计时器过期时，如果最近1秒没有收到任何东西(keepalive_time_when)，我们将发送一个探测，然后再次将计时器设置为keepalive_intvl_when，再过5秒唤醒，依此类推。

不遵守TCP保持活动参数

共有1个答案

相关问答

相关文章

相关阅读

相关工具

相关文档