安装
基于ubuntu16.04安装参考博客: https://blog.csdn.net/weixin_43455581/article/details/103899362
https://blog.csdn.net/hbxiang200/article/details/86244772
Cyclictest 测试
带RT-linux测试结果:
yaru@yaru-NUC:~$ sudo cyclictest -t 4 -p 80 -n
[sudo] password for yaru:
# /dev/cpu_dma_latency set to 0us
policy: fifo: loadavg: 0.91 0.68 0.57 1/491 12130
T: 0 (12124) P:80 I:1000 C: 79481 Min: 4 Act: 11 Avg: 13 Max: 46
T: 1 (12125) P:80 I:1500 C: 52987 Min: 4 Act: 10 Avg: 13 Max: 62
T: 2 (12126) P:80 I:2000 C: 39740 Min: 4 Act: 9 Avg: 12 Max: 59
T: 3 (12127) P:80 I:2500 C: 31792 Min: 4 Act: 11 Avg: 11 Max: 43
不带RT-linux测试结果:
yaru@yaru-NUC:~$ sudo cyclictest -t 4 -p 80 -n
[sudo] password for yaru:
# /dev/cpu_dma_latency set to 0us
policy: fifo: loadavg: 0.03 0.20 0.13 1/393 2328
T: 0 ( 2298) P:80 I:1000 C: 248093 Min: 2 Act: 9 Avg: 9 Max: 100
T: 1 ( 2299) P:80 I:1500 C: 165395 Min: 3 Act: 9 Avg: 9 Max: 48
T: 2 ( 2300) P:80 I:2000 C: 124046 Min: 3 Act: 9 Avg: 8 Max: 108
T: 3 ( 2301) P:80 I:2500 C: 99237 Min: 2 Act: 9 Avg: 9 Max: 90
cyclictest各个命令参数的使用
cyclictest: option requires an argument -- 'h'
cyclictest V 0.93
Usage:
cyclictest <options>
-a [CPUSET] --affinity Run thread #N on processor #N, if possible, or if CPUSET
given, pin threads to that set of processors in round-
robin order. E.g. -a 2 pins all threads to CPU 2,
but -a 3-5,0 -t 5 will run the first and fifth
threads on CPU (0),thread #2 on CPU 3, thread #3
on CPU 4, and thread #5 on CPU 5.在N#处理器上运行N#线程
-A USEC --aligned=USEC align thread wakeups to a specific offset
-b USEC --breaktrace=USEC send break trace command when latency > USEC#当延时大于USEC指定的值时,发送停止跟踪。USEC,单位为谬秒
-B --preemptirqs both preempt and irqsoff tracing (used with -b)#和 -b一起使用。preempt(抢占)和 irqsoff同时跟踪。cyclictest -b 100 -B
-c CLOCK --clock=CLOCK select clock
0 = CLOCK_MONOTONIC (default)
1 = CLOCK_REALTIME#选择时钟 cyclictest -c 1
-C --context context switch tracing (used with -b)#上下文切换跟踪(和-b一起使用)
-d DIST --distance=DIST distance of thread intervals in us default=500#线程间隔(默认为500)
-D --duration=t specify a length for the test run
default is in seconds, but 'm', 'h', or 'd' maybe added
to modify value to minutes, hours or days#指定要测试多长时间。默认单位是秒,但是也可以指定m(分),h(小时),d(天)
--latency=PM_QOS write PM_QOS to /dev/cpu_dma_latency
-E --event event tracing (used with -b)#事件跟踪,和 -b一起使用
-f --ftrace function trace (when -b is active)#函数跟踪(-b 为激活的)
-F --fifo=<path> create a named pipe at path and write stats to it
-h --histogram=US dump a latency histogram to stdout after the run
(with same priority about many threads)
US is the max time to be be tracked in microseconds#在执行完后在标准输出设备上画出延迟的直方图(很多线程有相同的权限)US为最大的跟踪时间限制(单位为毫秒)。
-H --histofall=US same as -h except with an additional summary column
-i INTV --interval=INTV base interval of thread in us default=1000#基本线程间隔,默认为1000us
-I --irqsoff Irqsoff tracing (used with -b)#中断请求关闭跟踪
-l LOOPS --loops=LOOPS number of loops: default=0(endless)#循环的个数,默认为0(无穷个)
--laptop Save battery when running cyclictest
This will give you poorer realtime results
but will not drain your battery so quickly
-m --mlockall lock current and future memory allocations#锁定当前和将来的内存分配
-M --refresh_on_max delay updating the screen until a new max latency is hit#延迟更新屏幕直到新的延时周期的到来
-n --nanosleep use clock_nanosleep
--notrace suppress tracing
-N --nsecs print results in ns instead of us (default us)#每ns打印一次结果,而不是us(默认是us)
-o RED --oscope=RED oscilloscope mode, reduce verbose output by RED#示波器模式,减少冗长的输出通过RED
-O TOPT --traceopt=TOPT trace option
-p PRIO --priority=PRIO priority of highest prio thread#最高优先级线程的优先级
-P --preemptoff Preempt off tracing (used with -b)
-q --quiet print only a summary on exit#退出时只打印概要内容
--priospread spread priority levels starting at specified value
-r --relative use relative timer instead of absolute#使用相对时间而非绝对时间
-R --resolution check clock resolution, calling clock_gettime() many
times. list of clock_gettime() values will be
reported with -X
--secaligned [USEC] align thread wakeups to the next full second,
and apply the optional offset
-s --system use sys_nanosleep and sys_setitimer#使用 sys_nanosleep 和 sys_setitimer
-S --smp Standard SMP testing: options -a -t -n and
same priority of all threads
-t --threads one thread per available processor#每个可用的处理器一个线程
-t [NUM] --threads=NUM number of threads:
without NUM, threads = max_cpus
without -t default = 1
-T TRACE --tracer=TRACER set tracing function
configured tracers: hwlat blk mmiotrace function_graph wakeup_dl wakeup_rt wakeup function nop
-u --unbuffered force unbuffered output for live processing#对活动的进程强制为无缓冲输出,n=任务个数 c=计数 v=数值(单位:us)
-U --numa Standard NUMA testing (similar to SMP option)
thread data structures allocated from local node
-v --verbose output values on stdout for statistics
format: n:c:v n=tasknum c=count v=value in us#把统计数据输出到标准输出
-w --wakeup task wakeup tracing (used with -b)#任务唤醒跟踪(和 -b 一起使用)
-W --wakeuprt rt task wakeup tracing (used with -b)#实时任务唤醒跟踪
--dbg_cyclictest print info useful for debugging cyclictest
--policy=POLI policy of realtime thread, POLI may be fifo(default) or rr
format: --policy=fifo(default) or --policy=rr#实时线程的调度规则,可以是 fifo(默认) 或者 rr 格式为: --ploicy=fifo 或者 rr
cyclictest代码分析
下载cyclictest,可以选择最新的版本
https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git/
下载后打开rt-tests/src/cyclictest/cyclictest.c
从main()函数入口 line 2009
int main(int argc, char **argv)
{
sigset_t sigset;
int signum = SIGALRM;
int mode;
int cpu;
int max_cpus = sysconf(_SC_NPROCESSORS_ONLN);/*获取本机的CPU数*/
int i, ret = -1;
int status;
process_options(argc, argv, max_cpus);/*解析输入参数*/
....
status = pthread_create(&stat->thread, &attr, timerthread, par);/*创建线程*/
....
}
从main()里面可以看到主要定义了一些变量,然后通过pthread_create()函数创建线程来测试,并通过void *timerthread(void *param)函数来具体测试和记录:
/*创建线程*/
status = pthread_create(&stat->thread, &attr, timerthread, par);
if (status)
fatal("failed to create thread %d: %s\n", i, strerror(status));
创建线程后执行void *timerthread(void *param);函数
static void *timerthread(void *param)
{
struct thread_param *par = param;
struct sched_param schedp;
struct sigevent sigev;
sigset_t sigset;
timer_t timer;
struct timespec now, next, interval, stop;
struct itimerval itimer;
struct itimerspec tspec;
struct thread_stat *stat = par->stats;
int stopped = 0;
cpu_set_t mask;
pthread_t thread;
unsigned long smi_now, smi_old = 0;
memset(&stop, 0, sizeof(stop));
/* if we're running in numa mode, set our memory node */
if (par->node != -1)
rt_numa_set_numa_run_on_node(par->node, par->cpu);
if (par->cpu != -1) {
CPU_ZERO(&mask);
CPU_SET(par->cpu, &mask);
thread = pthread_self();
if (pthread_setaffinity_np(thread, sizeof(mask), &mask) != 0)
warn("Could not set CPU affinity to CPU #%d\n",
par->cpu);
}
....
}
此函数中首先获取线程当前的时间
/* Get current time */
if (aligned || secaligned) {
pthread_barrier_wait(&globalt_barr);
if (par->tnum == 0) {
clock_gettime(par->clock, &globalt);
if (secaligned) {
/* Ensure that the thread start timestamp is not
in the past */
if (globalt.tv_nsec > 900000000)
globalt.tv_sec += 2;
else
globalt.tv_sec++;
globalt.tv_nsec = 0;
}
}
pthread_barrier_wait(&align_barr);
now = globalt;
if (offset) {
if (aligned)
now.tv_nsec += offset * par->tnum;
else
now.tv_nsec += offset;
tsnorm(&now);
}
} else
clock_gettime(par->clock, &now);
然后将值赋给next
/*当前时间值加上间隔时间付给下次循环的时间值next*/
next = now;
next.tv_sec += interval.tv_sec;
next.tv_nsec += interval.tv_nsec;
tsnorm(&next);
一个完整的线程迭代
while (!shutdown) {
uint64_t diff;
unsigned long diff_smi = 0;
int sigs, ret;
/* Wait for next period */
switch (par->mode) {
case MODE_CYCLIC:
case MODE_SYS_ITIMER:
if (sigwait(&sigset, &sigs) < 0)
goto out;
break;
case MODE_CLOCK_NANOSLEEP:
if (par->timermode == TIMER_ABSTIME) {
ret = clock_nanosleep(par->clock, TIMER_ABSTIME,
&next, NULL);
if (ret != 0) {
if (ret != EINTR)
warn("clock_nanosleep failed. errno: %d\n", errno);
goto out;
}
....
}
如果有max_cycles最大循环次数,就会进入循环直到结束,否则一直执行线程迭代
if (par->max_cycles && par->max_cycles == stat->cycles)
break;
在line:807中,在等待休眠时间后计算休眠结束时间
ret = clock_gettime(par->clock, &now);
if (ret != 0) {
if (ret != EINTR)
warn("clock_gettime() failed. errno: %d\n",
errno);
goto out;
}
最后计算休眠结束时间与当前时间的差值
/*实际休眠结束时间now与期望休眠结束时间next之间的差值*/
if (use_nsecs)
diff = calcdiff_ns(now, next);
else
diff = calcdiff(now, next);
计算得出平均差值
if (diff < stat->min)/*假如延时比min 小,将min 改为这个更小的延时值diff*/
stat->min = diff;
if (diff > stat->max) {/*假如延时比max 大,将max 改为这个更大的延时值diff*/
stat->max = diff;
if (refresh_on_max)
pthread_cond_signal(&refresh_on_max_cond);
}
stat->avg += (double) diff;/*计算新的平均延时*/
循环次数计数+1并进入下次循环计算
stat->cycles++;
next.tv_sec += interval.tv_sec;
next.tv_nsec += interval.tv_nsec;
if (par->mode == MODE_CYCLIC) {
int overrun_count = timer_getoverrun(timer);
next.tv_sec += overrun_count * interval.tv_sec;
next.tv_nsec += overrun_count * interval.tv_nsec;
}
tsnorm(&next);
总结