当前位置: 首页 > 工具软件 > ulogd > 使用案例 >

Linux内核netfilter子系统ulogd项目性能调优记录

傅乐湛
2023-12-01
  1. 使用ULOGD打SYSLOG到SYSLOG-NG,当NFLOG拿到的数据包在6K左右时,CPU有两个核心占了15%左右

  2. 使用修改过的ULOGD直接打TCP,CPU只有一个核心占15%左右(修改版在https://github.com/InsZVA/ulogd-with-syslog-tcp)

  3. 使用perf top查看,

   8.89%  libc-2.12.so                  [.] vfprintf                                                                                                                                         
   3.49%  libc-2.12.so                  [.] _IO_default_xsputn                                                                                                                               
   2.10%  [kernel]                      [k] find_busiest_group                                                                                                                               
   1.94%  libc-2.12.so                  [.] _IO_vfscanf                                                                                                                                      
   1.70%  libc-2.12.so                  [.] __strchrnul                                                                                                                                      
   1.66%  libpython2.7.so.1.0           [.] PyEval_EvalFrameEx                                                                                                                               
   1.64%  libc-2.12.so                  [.] _itoa_word                                                                                                                                       
   1.45%  [kernel]                      [k] schedule                                                                                                                                         
   1.13%  [kernel]                      [k] ixgbe_clean_rx_irq                                                                                                                               
   1.08%  ulogd                         [.] ulogd_propagate_results                                                                                                                          
   1.00%  [kernel]                      [k] _spin_lock                                                                                                                                       
   0.98%  [kernel]                      [k] ipt_do_table                                                                                                                                     
   0.97%  [kernel]                      [k] net_rx_action                                                                                                                                    
   0.83%  [kernel]                      [k] memcpy                                                                                                                                           
   0.76%  [kernel]                      [k] _spin_unlock_irqrestore                                                                                                                          
   0.72%  [kernel]                      [k] menu_select                                                                                                                                      
   0.71%  libc-2.12.so                  [.] _IO_str_init_static_internal                                                                                                                     
   0.70%  [kernel]                      [k] irq_entries_start                                                                                                                                
   0.65%  libc-2.12.so                  [.] _IO_vsprintf                                                                                                                                     
   0.59%  [kernel]                      [k] hrtimer_interrupt                                                                                                                                
   0.57%  [kernel]                      [k] __hrtimer_start_range_ns                                                                                                                         
   0.55%  [kernel]                      [k] ixgbe_poll                                                                                                                                       
   0.54%  [kernel]                      [k] rb_erase                                                                                                                                         
   0.54%  libc-2.12.so                  [.] _IO_old_init                                                                                                                                     
   0.53%  [kernel]                      [k] find_next_bit                                                                                                                                    
   0.53%  [kernel]                      [k] getnstimeofday                                                                                                                                   
   0.49%  [kernel]                      [k] _spin_lock_irqsave                                                                                                                               
   0.47%  [kernel]                      [k] ktime_get_real                                                                                                                                   
   0.47%  [kernel]                      [k] tick_nohz_stop_sched_tick                                                                                                                        
   0.46%  libpython2.7.so.1.0           [.] lookdict_string                                                                                                                                  
   0.46%  [kernel]                      [k] nf_conntrack_in                                                                                                                                  
   0.45%  traffic_server                [.] NetHandler::mainNetEvent                                                                                                                         
   0.45%  [kernel]                      [k] native_read_tsc                                                                                                                                  
   0.45%  [kernel]                      [k] kfree                                                                                                                                            
   0.43%  libc-2.12.so                  [.] sprintf                                                                                                                                          
   0.42%  [kernel]                      [k] __audit_syscall_exit                                                                                                                             
   0.42%  [kernel]                      [k] __switch_to                                                                                                                                      
   0.40%  [kernel]                      [k] ip_route_input                                                                                                                                   
   0.40%  [kernel]                      [k] __remove_hrtimer                                                                                                                                 
   0.38%  [kernel]                      [k] cpumask_next_and                                                                                                                                 
   0.37%  libc-2.12.so                  [.] __offtime                                                                                                                                        
   0.37%  libc-2.12.so                  [.] free                 

大量CPU消耗在标准输入输出库函数中,那么看来应该是解析字符串的问题了。

stack=log3:NFLOG,base1:BASE,ifi1:IFINDEX,ip2str1:IP2STR,print1:PRINTPKT,sys1:SYSLOGTCP

写了个代码测sprintf的性能:

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define NULL 0

#define TIME_ELAPSED(codeToTime) do{ \
    struct timeval beginTime, endTime; \
    gettimeofday(&beginTime, NULL); \
    {codeToTime;} \
    gettimeofday(&endTime, NULL); \
    long secTime  = endTime.tv_sec - beginTime.tv_sec; \
    long usecTime = endTime.tv_usec - beginTime.tv_usec; \
    printf("[%s(%d)]Elapsed Time: SecTime = %lds, UsecTime = %ldus!\n", __FILE__, __LINE__, secTime, usecTime); \
}while(0)

uint16_t randu16() {
    return (uint16_t)rand();
}

uint32_t randu32() {
    return (uint32_t)rand();
}

char* f_puts(char* buf, const char* data) {
    while((*buf++ = *data++));
    return --buf;
}

char* f_putn(char* buf, int n) {
    while (n) {
        int digit = n % 10;
        *buf++ = (char)('0' + digit);
        n /= 10;
    }
    return --buf;
}

int optimized() {
    char buf[1024];
    char* buf_cur = buf;
    buf_cur = f_puts(buf_cur, "PROTO=TCP SPT=");

    buf_cur = f_putn(buf_cur, randu16());
    buf_cur = f_puts(buf_cur, " DPT=");
    buf_cur = f_putn(buf_cur, randu16());

    buf_cur = f_puts(buf_cur, " SEQ=");
    buf_cur = f_putn(buf_cur, randu32());
    buf_cur = f_puts(buf_cur, " ACK=");
    buf_cur = f_putn(buf_cur, randu32());

    buf_cur = f_puts(buf_cur, " WINDOW=");
    buf_cur = f_putn(buf_cur, randu16());

    buf_cur = f_puts(buf_cur, " URG ACK PSH RST SYN FIN URGP=");
    buf_cur = f_putn(buf_cur, randu16());
    buf_cur = f_puts(buf_cur, " ");

    return (int)(buf_cur - buf);
}

int work() {
    char buf[1024];
    char* buf_cur = buf;
    buf_cur += sprintf(buf_cur, "PROTO=TCP ");

    buf_cur += sprintf(buf_cur, "SPT=%u DPT=%u ",
                       randu16(),
                       randu16());
    /* FIXME: config */
    buf_cur += sprintf(buf_cur, "SEQ=%u ACK=%u ",
                       randu32(),
                       randu32());

    buf_cur += sprintf(buf_cur, "WINDOW=%u ",
                       randu16());

    buf_cur += sprintf(buf_cur, "URG ");

    buf_cur += sprintf(buf_cur, "ACK ");

    buf_cur += sprintf(buf_cur, "PSH ");

    buf_cur += sprintf(buf_cur, "RST ");

    buf_cur += sprintf(buf_cur, "SYN ");

    buf_cur += sprintf(buf_cur, "FIN ");

    buf_cur += sprintf(buf_cur, "URGP=%u ",
                       randu16());
    return (int)(buf_cur - buf);
}

int main() {
    TIME_ELAPSED(
        for (int i = 0; i < 1000; i++)
                work();
    );
    TIME_ELAPSED(
            for (int i = 0; i < 1000; i++)
                optimized();
    );
    return 0;
}

结果:

[main.c(99)]Elapsed Time: SecTime = 0s, UsecTime = 1158us!
[main.c(103)]Elapsed Time: SecTime = 0s, UsecTime = 159us!

注意:

千万当心itoa、sprinf等库函数,性能可能低的可怕。

 类似资料: