crash cdump_1.elf vmlinux
其中cdump_1.elf为保存在SD卡中dump文件,vmlinux为发生crash的image对应的符号表,两者必须对应。
crash 7.0.0
Copyright (C) 2002-2013 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb (GDB) 7.6
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "--host=i686-pc-linux-gnu --target=arm-elf-linux"...
KERNEL: vmlinux
DUMPFILE: cdump_1
CPUS: 2
DATE: Fri Aug 13 07:32:22 1909
UPTIME: 00:00:09
LOAD AVERAGE: 0.16, 0.03, 0.01
TASKS: 94
NODENAME: localhost
RELEASE: 3.4.0-g0359334-dirty
VERSION: #67 SMP Mon Jun 24 17:09:09 CST 2013
MACHINE: armv7l (unknown Mhz)
MEMORY: 792 MB
PANIC: "Internal error: Oops: 805 [#1] SMP ARM" (check log for details)
PID: 1176
COMMAND: "sh"
TASK: ee268120 [THREAD_INFO: eda90000]
CPU: 1
STATE: TASK_RUNNING (PANIC)
crash>
crash> log 输出内核log
crash> log | grep -10 "PC is"
init: waitpid returned pid 1418, status = 00007f00
init: process 'bootanim', pid 1418 exited
_________ioctl=fbio_waitforvsync
SysRq : Trigger a crash
Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = edaec000
[00000000] *pgd=00000000
Internal error: Oops: 805 [#1] SMP ARM
Modules linked in:
CPU: 1 Tainted: G W (3.4.0-g0359334-dirty #67)
PC is at sysrq_handle_crash+0x38/0x48
LR is at l2x0_cache_sync+0x34/0x3c
pc : [<c01dd9a0>] lr : [<c0019ab4>] psr: 60000093
sp : eda91ec8 ip : eda91eb0 fp : eda91ed4
r10: ed8acb2c r9 : eda90000 r8 : 00000000
r7 : 60000013 r6 : 00000063 r5 : 00000004 r4 : c05db038
r3 : 00000000 r2 : 00000001 r1 : a0000093 r0 : c05f54d8
Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user
Control: 10c53c7d Table: adaec04a DAC: 00000015
PC: 0xc01dd920:
从上可知PC is at sysrq_handle_crash+0x38/0x48
crash> dis sysrq_handle_crash
0xc01dd968 <sysrq_handle_crash>: mov r12, sp
0xc01dd96c <sysrq_handle_crash+0x4>: push {r11, r12, lr, pc}
0xc01dd970 <sysrq_handle_crash+0x8>: sub r11, r12, #4
0xc01dd974 <sysrq_handle_crash+0xc>: ldr r3, [pc, #44] ; 0xc01dd9a8 <sysrq_handle_crash+0x40>
0xc01dd978 <sysrq_handle_crash+0x10>: mov r2, #1
0xc01dd97c <sysrq_handle_crash+0x14>: str r2, [r3]
0xc01dd980 <sysrq_handle_crash+0x18>: dsb sy
0xc01dd984 <sysrq_handle_crash+0x1c>: ldr r3, [pc, #32] ; 0xc01dd9ac <sysrq_handle_crash+0x44>
0xc01dd988 <sysrq_handle_crash+0x20>: ldr r3, [r3, #24]
0xc01dd98c <sysrq_handle_crash+0x24>: cmp r3, #0
0xc01dd990 <sysrq_handle_crash+0x28>: beq 0xc01dd998 <sysrq_handle_crash+48>
0xc01dd994 <sysrq_handle_crash+0x2c>: blx r3
0xc01dd998 <sysrq_handle_crash+0x30>: mov r3, #0
0xc01dd99c <sysrq_handle_crash+0x34>: mov r2, #1
0xc01dd9a0 <sysrq_handle_crash+0x38>: strb r2, [r3]
0xc01dd9a4 <sysrq_handle_crash+0x3c>: ldm sp, {r11, sp, pc}
0xc01dd9a8 <sysrq_handle_crash+0x40>: subsgt r5, pc, r8, lsr #17
0xc01dd9ac <sysrq_handle_crash+0x44>: ldrhgt r10, [r12], #-232 ; 0xffffff18
可以看到sysrq_handle_crash+0x38/0x48对应的代码是strb r2, [r3]
此时r3的内容为0,r2的内容为1.
strb r2, [r3]的意思是把1写入地址0处,这样就发生了
Unable to handle kernel NULL pointer dereference at virtual address 00000000。
>表示当前正在运行的 process.
> 0 0 0 c05ce538 RU 0.0 0 0 [swapper/0]
> 1176 1 1 ee268120 RU 0.1 816 460 sh
crash 到底发生在那个 process上,亦即发生在那个CPU上?
crash> log | grep CPU
CPU: 1 Tainted: G W (3.4.0-g0359334-dirty #67)
CPU 0 will stop doing anything useful since another CPU has crashed
从上可知在CPU1上发生了 crash,即:
> 1176 1 1 ee268120 RU 0.1 816 460 sh
得到进程的1176的 backtrace.
crash> set 1176
PID: 1176
COMMAND: "sh"
TASK: ee268120 [THREAD_INFO: eda90000]
CPU: 1
STATE: TASK_RUNNING (PANIC)
crash> bt
PID: 1176 TASK: ee268120 CPU: 1 COMMAND: "sh"
#0 [<c01dd96c>] (sysrq_handle_crash) from [<c01de0ac>]
#1 [<c01de004>] (__handle_sysrq) from [<c01de188>]
#2 [<c01de15c>] (write_sysrq_trigger) from [<c00f2468>]
#3 [<c00f23d0>] (proc_reg_write) from [<c00ad368>]
#4 [<c00ad2b4>] (vfs_write) from [<c00ad4b8>]
#5 [<c00ad478>] (sys_write) from [<c000df00>]
pc : [<400f7d94>] lr : [<400ab257>] psr: 20000010
sp : be8d78e0 ip : 400c0f38 fp : 400c5404
r10: 4056163c r9 : 00000000 r8 : 40561664
r7 : 00000004 r6 : 00000001 r5 : 00000002 r4 : 00000003
r3 : ffffffff r2 : 00000002 r1 : 40561664 r0 : 00000001
Flags: nzCv IRQs on FIQs on Mode USER_32 ISA ARM
crash> dis -l sysrq_handle_crash
/home/wenshuai/code/3.4kernel/linux_kernel/drivers/tty/sysrq.c: 127
0xc01dd968 <sysrq_handle_crash>: mov r12, sp
0xc01dd96c <sysrq_handle_crash+0x4>: push {r11, r12, lr, pc}
0xc01dd970 <sysrq_handle_crash+0x8>: sub r11, r12, #4
/home/wenshuai/code/3.4kernel/linux_kernel/drivers/tty/sysrq.c: 130
0xc01dd974 <sysrq_handle_crash+0xc>: ldr r3, [pc, #44] ; 0xc01dd9a8 <sysrq_handle_crash+0x40>
0xc01dd978 <sysrq_handle_crash+0x10>: mov r2, #1
0xc01dd97c <sysrq_handle_crash+0x14>: str r2, [r3]
/home/wenshuai/code/3.4kernel/linux_kernel/drivers/tty/sysrq.c: 131
0xc01dd980 <sysrq_handle_crash+0x18>: dsb sy
/home/wenshuai/code/3.4kernel/linux_kernel/arch/arm/include/asm/outercache.h: 114
0xc01dd984 <sysrq_handle_crash+0x1c>: ldr r3, [pc, #32] ; 0xc01dd9ac <sysrq_handle_crash+0x44>
0xc01dd988 <sysrq_handle_crash+0x20>: ldr r3, [r3, #24]
0xc01dd98c <sysrq_handle_crash+0x24>: cmp r3, #0
0xc01dd990 <sysrq_handle_crash+0x28>: beq 0xc01dd998 <sysrq_handle_crash+48>
/home/wenshuai/code/3.4kernel/linux_kernel/arch/arm/include/asm/outercache.h: 115
0xc01dd994 <sysrq_handle_crash+0x2c>: blx r3
/home/wenshuai/code/3.4kernel/linux_kernel/drivers/tty/sysrq.c: 132
0xc01dd998 <sysrq_handle_crash+0x30>: mov r3, #0
0xc01dd99c <sysrq_handle_crash+0x34>: mov r2, #1
0xc01dd9a0 <sysrq_handle_crash+0x38>: strb r2, [r3]
/home/wenshuai/code/3.4kernel/linux_kernel/drivers/tty/sysrq.c: 133
0xc01dd9a4 <sysrq_handle_crash+0x3c>: ldm sp, {r11, sp, pc}
0xc01dd9a8 <sysrq_handle_crash+0x40>: subsgt r5, pc, r8, lsr #17
0xc01dd9ac <sysrq_handle_crash+0x44>: ldrhgt r10, [r12], #-232 ; 0xffffff18
我们可以知道函数所在的文件是drivers/tty/sysrq.c,即:
static void sysrq_handle_crash(int key)
{
char *killer = NULL;
panic_on_oops = 1; /* force panic */
wmb();
*killer = 1;
}
/*如何看一个全局变量的值*/
crash> panic_on_oops
panic_on_oops = $9 = 0x1
/*如何看变量的地址*/
crash> sym panic_on_oops
c05f58a8 (B) panic_on_oops
/*如何知道变量的地址,读他的值*/
crash> rd c05f58a8
c05f58a8: 00000001
crash> dis sysrq_handle_crash
0xc01dd968 <sysrq_handle_crash>: mov r12, sp