CELF(The Consumer Electronics Linux Forum)论坛
CELF已经提出了一整套针对消费类电子产品所使用的嵌入式Linux的启动优化方案
CELF- CE Linux Forum 在kernel提交了一个“Preset LPJ”的patch,这个patch可以通过预设LPJ来减少linux kernel启动过程中校正loops_per_jiffy所需花费的时间。CELF这个patch,其实就是增加了一个kernel 参数,使得用户可以在kernel启动时预设lpj,无须复杂的计算
正常启动后记录下内核信息中的“Calibrating Delay”数值后就可以在启动参数中以下面的形式强制指定LPJ值了:
lpj=9600700
CELF Developer Wiki
ShortIDEDelays - 缩短IDE探测时长(我的应用场景中不包含硬盘,所以用不上)
KernelXIP - 直接在ROM或Flash中运行内核(考虑到兼容性因素,未采用)
IDENoProbe - 跳过未连接设备的IDE口
OptimizeRCScripts - 优化initrd中的linuxrc脚本(我采用了BusyBox更简洁的linuxrc)
CELF Developer Wiki
CELF已有专项的优化方案:“RTCNoSync”和“PresetLPJ”。
前者通过屏蔽启动过程中所进行的RTC时钟同步或者将这一过程放到启动后进行(视具体应用对时钟精度的需求而定),实现起来比较容易,但需要为内核打补丁。似乎CELF目前的工作仅仅是去掉了该过程,而没有实现所提到的“延后”处理RTC时钟的同步。考虑到这个原因,我的方案中暂时没有引入这一优化(毕竟它所带来的时间漂移已经达到了“秒”级),继续关注中。
后者是通过在启动参数中强制指定LPJ值而跳过实际的计算过程,这是基于LPJ值在硬件条件不变的情况下不会变化的考虑。所以在正常启动后记录下内核信息中的“Calibrating Delay”数值后就可以在启动参数中以下面的形式强制指定LPJ值了:
lpj=9600700
上面分析结果中的 4、5 两项都是SMP初始化的一部分,因此不在CELF研究的范畴(或许将来会有采用多核的MP4出现?……),只能自力更生了。研究了一下SMP的初始化代码,发现“Migration Cost”其实也可以像“Calibrating Delay”采用预置的方式跳过校准时间。方法类似,最后在内核启动参数中增加:
migration_cost=4000,4000
//
init/main.c/start_kernel函数会调用此函数
/*
* This is the number of bits of precision for the loops_per_jiffy. Each
* bit takes on average 1.5/HZ seconds. This (like the original) is a little
* better than 1%
* For the boot cpu we can skip the delay calibration and assign it a value
* calculated based on the timer frequency.
* For the rest of the CPUs we cannot assume that the timer frequency is same as
* the cpu frequency, hence do the calibration for those.
*/
void __cpuinit calibrate_delay(void)
{
unsigned long ticks, loopbit;
int lps_precision = LPS_PREC;
static bool printed;
if (preset_lpj) {
pr_info("##########calibrate.c calibrate_delay 00\n");
loops_per_jiffy = preset_lpj;
if (!printed)
pr_info("Calibrating delay loop (skipped) "
"preset value.. ");
} else if ((!printed) && lpj_fine) {
pr_info("##########calibrate.c calibrate_delay 11\n");
loops_per_jiffy = lpj_fine;
pr_info("Calibrating delay loop (skipped), "
"value calculated using timer frequency.. ");
} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
pr_info("##########calibrate.c calibrate_delay 22");
if (!printed)
pr_info("Calibrating delay using timer "
"specific routine.. ");
} else {
pr_info("##########calibrate.c calibrate_delay 33");
loops_per_jiffy = (1<<12);
if (!printed)
pr_info("Calibrating delay loop... ");
while ((loops_per_jiffy <<= 1) != 0) {
/* wait for "start of" clock tick */
ticks = jiffies;
while (ticks == jiffies)
/* nothing */;
/* Go .. */
ticks = jiffies;
__delay(loops_per_jiffy);
ticks = jiffies - ticks;
if (ticks)
break;
}
/*
* Do a binary approximation to get loops_per_jiffy set to
* equal one clock (up to lps_precision bits)
*/
loops_per_jiffy >>= 1;
loopbit = loops_per_jiffy;
while (lps_precision-- && (loopbit >>= 1)) {
loops_per_jiffy |= loopbit;
ticks = jiffies;
while (ticks == jiffies)
/* nothing */;
ticks = jiffies;
__delay(loops_per_jiffy);
if (jiffies != ticks) /* longer than 1 tick */
loops_per_jiffy &= ~loopbit;
}
}
if (!printed)
pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
pr_info("loops_per_jiffy = %d \n",loops_per_jiffy);
printed = true;
}
/
//串口输出信息如下
Console: colour dummy device 80x30
##########calibrate.c calibrate_delay 33
Calibrating delay loop... 226.09 BogoMIPS (lpj=1130496)
loops_per_jiffy = 1130496
pid_max: default: 32768 minimum: 301
Security Framework initialized
Mount-cache hash table entries: 512
CPU: Testing write buffer coherency: ok
regulator: core version 0.5
///
static unsigned long __cpuinit calibrate_delay_direct(void)
{
unsigned long pre_start, start, post_start;
unsigned long pre_end, end, post_end;
unsigned long start_jiffies;
unsigned long timer_rate_min, timer_rate_max;
unsigned long good_timer_sum = 0;
unsigned long good_timer_count = 0;
int i;
if (read_current_timer(&pre_start) < 0 )
return 0;
/*
* A simple loop like
* while ( jiffies < start_jiffies+1)
* start = read_current_timer();
* will not do. As we don't really know whether jiffy switch
* happened first or timer_value was read first. And some asynchronous
* event can happen between these two events introducing errors in lpj.
*
* So, we do
* 1. pre_start
* 2. check jiffy switch
* 3. start
* 4. post_start
*
* Note, we don't know anything about order of 2 and 3.
* Now, by looking at post_start and pre_start difference, we can
* check whether any asynchronous event happened or not
*/
for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
pre_start = 0;
read_current_timer(&start);
start_jiffies = jiffies;
while (jiffies <= (start_jiffies + 1)) {
pre_start = start;
read_current_timer(&start);
}
read_current_timer(&post_start);
pre_end = 0;
end = post_start;
while (jiffies <=
(start_jiffies + 1 + DELAY_CALIBRATION_TICKS)) {
pre_end = end;
read_current_timer(&end);
}
read_current_timer(&post_end);
timer_rate_max = (post_end - pre_start) /
DELAY_CALIBRATION_TICKS;
timer_rate_min = (pre_end - post_start) /
DELAY_CALIBRATION_TICKS;
/*
* If the upper limit and lower limit of the timer_rate is
* >= 12.5% apart, redo calibration.
*/
if (pre_start != 0 && pre_end != 0 &&
(timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
good_timer_count++;
good_timer_sum += timer_rate_max;
}
}
if (good_timer_count)
return (good_timer_sum/good_timer_count);
printk(KERN_WARNING "calibrate_delay_direct() failed to get a good "
"estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n");
// Consider using \"lpj=\" boot option.\n这个应该就是内核启动项命令行传参数
return 0;
}
#else
static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
#endif