本文以OpenSBI 0.9版本为参考进行讲解
以fw_jump为例,从fw_jump.elf.ldS看到入口函数就是ENTRY(_start), _start定义在fw_base.S里面。
主要的启动流程都在fw_base.S里面,另外的三个文件主要是提供一些回调函数,不同的fw回调函数不同的实现方式。
3.2 初始化的具体流程和代码
下面分析具体的代码:
#define BOOT_STATUS_RELOCATE_DONE 1
#define BOOT_STATUS_BOOT_HART_DONE 2
.macro MOV_3R __d0, __s0, __d1, __s1, __d2, __s2 /* 定义一个汇编函数,用来保存3组寄存器值 */
add \__d0, \__s0, zero
add \__d1, \__s1, zero
add \__d2, \__s2, zero
.endm
.macro MOV_5R __d0, __s0, __d1, __s1, __d2, __s2, __d3, __s3, __d4, __s4 /* 定义一个汇编函数,用来保存5组寄存器值 */
add \__d0, \__s0, zero
add \__d1, \__s1, zero
add \__d2, \__s2, zero
add \__d3, \__s3, zero
add \__d4, \__s4, zero
.endm
/*
* If __start_reg <= __check_reg and __check_reg < __end_reg then
* jump to __pass
*/
.macro BRANGE __start_reg, __end_reg, __check_reg, __jump_lable /* 定义一个汇编函数,对比参数跳转callback */
blt \__check_reg, \__start_reg, 999f
bge \__check_reg, \__end_reg, 999f
j \__jump_lable
999:
.endm
.section .entry, "ax", %progbits /*从链接脚本看,这里就是程序的开始位置*/
.align 3
.globl _start
.globl _start_warm
_start:
/* Find preferred boot HART id */
MOV_3R s0, a0, s1, a1, s2, a2 /*MOV_3R是上面定义的一个汇编函数,就是把a0 a1 a2的值保存在s0 s1 s2里面,下面的fw_boot_hart要用a0,a1,a2*/
/*
* 调用这个函数获取boot hart的id,对于fw_dynamic, fw_jump, fw_payload
* 三种类型的fw, 获取boot hart id的方法是不一样的,fw_jump和fw_payload这里直接返回
* -1, fw_dynamic这种fw情况有点不一样,启动fw_dynamic的上一级程序会把
* 参数放到struct fw_dynamic_info里,用a2传给opensbi,根据具体代码可以
* 看出,根据fw_dynamic_info version不同,boot hart是不一样的,version1
* 是固定的boot hart, version2返回-1,没有定义boot hart。
*/
call fw_boot_hart /*fw_boot_hart 定义在fw_jump.S fw_dynamic.S fw_payload.S里面,fw_base.S是整个.S中的公共部分,所以摘出来了。*/
add a6, a0, zero /* boot hart id放到a6里 */
MOV_3R a0, s0, a1, s1, a2, s2 /* 这里恢复a0,a1,a2的值 */
li a7, -1
beq a6, a7, _try_lottery /* 如果boot hart id是-1,使用lottery的方式选择boot hart */
/* Jump to relocation wait loop if we are not boot hart */
/*
* 如果是固定的boot hart, a6是上一级定义的boot hart,a0是当前hart id,
* 这里可以参考qemu的代码:hw/riscv/boot.c, reset_vec定义的地方,第三条
* 指令获取当前hart的hart id值。两者不相等,说明不是指定boot hart,跳到
* 重定位结束的位置。
*/
bne a0, a6, _wait_relocate_copy_done
_try_lottery:
/* Jump to relocation wait loop if we don't get relocation lottery */
/*
* 多个核执行到了这里,只要启动核去做二进制的重定位就好。其他核等待着重定位结束。
* 和kernel的初始化是一样的
*/
la a6, _relocate_lottery
li a7, 1
amoadd.w a6, a7, (a6)
bnez a6, _wait_relocate_copy_done
/* Save load address */
/* t0是_load_start这个符号的地址,这个地址存的是_fw_start的值,在fw_base.ldS中定义的,应该是FW_TEXT_START */
* _load_start:
* RISCV_PTR _fw_start //#define RISCV_PTR .dword
*/
la t0, _load_start
/*
* t1是_start这个符号的地址,三种fw中,_start这个符号都在二进制
* 的起始位置,所以,这里取_start的运行时的实际物理地址,其实就是二进制实际
*。 被加载到的地址。
*/
la t1, _start
REG_S t1, 0(t0) /* REG_S 就是汇编指令sd, 把t1中的数据也就是_start的地址,存入到t0寄存器的地址加0的地址中也就是_load_start中 */
/* Relocate if load address != link address */
/* 搬移的目的就是把数据从实际工作的地址搬移到link时候确定的地址上面 */
_relocate:
la t0, _link_start /* RISCV_PTR _fw_start */
REG_L t0, 0(t0) /* REG_L:ld */
la t1, _link_end /* _fw_reloc_end */
REG_L t1, 0(t1)
la t2, _load_start /* 应该是前面存入的_start的地址, 即fw实际运行的物理地址 */
REG_L t2, 0(t2)
sub t3, t1, t0 /* t3中存放 fw的size */
add t3, t3, t2 /* t3中存放 fw结束的物理地址 */
/*
* 到这里,t0:link 的起始地址, t1: link的结束地址; t2: 实际运行的起始地址, t3:实际运行的结束地址
*/
beq t0, t2, _relocate_done /* 如果link address和load address相等,不需要relocate了 */
la t4, _relocate_done
sub t4, t4, t2 /* t4是_relocate_done - _load_start(_start),计算出偏移 */
add t4, t4, t0 /* 根据偏移和_link_start的地址,计算出新的也就是relocate后的_relocate_done地址 */
blt t2, t0, _relocate_copy_to_upper /* 实际运行地址小于link start地址,那么就要往高地址搬移 */
_relocate_copy_to_lower:
ble t1, t2, _relocate_copy_to_lower_loop /* 地址没有重叠 */
la t3, _relocate_lottery /* 下面是处理地址重叠的问题 */
BRANGE t2, t1, t3, _start_hang /* hart wfi 挂起*/
la t3, _boot_status
BRANGE t2, t1, t3, _start_hang
la t3, _relocate
la t5, _relocate_done
BRANGE t2, t1, t3, _start_hang
BRANGE t2, t1, t5, _start_hang
BRANGE t3, t5, t2, _start_hang
_relocate_copy_to_lower_loop: /* 开始进行数据的拷贝 */
REG_L t3, 0(t2)
REG_S t3, 0(t0) /* t2内存里面的数据放到t3里面,然后从t3里面再放到 t0里面,也就是从实际运行start地址拷贝到link start地址 */
add t0, t0, __SIZEOF_POINTER__
add t2, t2, __SIZEOF_POINTER__
blt t0, t1, _relocate_copy_to_lower_loop /* 没有拷贝完,接着循环,否则跳到_relocate_done新的返回地址*/
jr t4
_relocate_copy_to_upper:
ble t3, t0, _relocate_copy_to_upper_loop /* 地址没有重叠 */
la t2, _relocate_lottery /* 类似上面,也是用来处理重叠的 */
BRANGE t0, t3, t2, _start_hang
la t2, _boot_status
BRANGE t0, t3, t2, _start_hang
la t2, _relocate
la t5, _relocate_done
BRANGE t0, t3, t2, _start_hang
BRANGE t0, t3, t5, _start_hang
BRANGE t2, t5, t0, _start_hang
_relocate_copy_to_upper_loop:
add t3, t3, -__SIZEOF_POINTER__ /* 和拷贝到低地址不一样,拷贝到低地址是从低地址开始,拷贝着加着,这里反之 */
add t1, t1, -__SIZEOF_POINTER__
REG_L t2, 0(t3)
REG_S t2, 0(t1)
blt t0, t1, _relocate_copy_to_upper_loop /* 没有拷贝完,接着循环,否则跳到_relocate_done新的返回地址*/
jr t4
_wait_relocate_copy_done:
la t0, _start
la t1, _link_start
REG_L t1, 0(t1)
beq t0, t1, _wait_for_boot_hart /* 如果_start和_link_start相等,说明编译结束了,跳到_wait_for_boot_hart*/
la t2, _boot_status
la t3, _wait_for_boot_hart
sub t3, t3, t0 /* 找到_wait_for_boot_hart的偏移地址 */
add t3, t3, t1 /* 偏移地址加上_link_start的地址,变成重定位后的新地址 */
1:
/* waitting for relocate copy done (_boot_status == 1) */
li t4, BOOT_STATUS_RELOCATE_DONE /* == 1*/
REG_L t5, 0(t2)
/* Reduce the bus traffic so that boot hart may proceed faster */
nop
nop
nop
/*
* t5表示当前hart的状态,t4是relocate完成状态, hart一开始_boot_status
* 是0,relocate done的状态是1,当relocate拷贝完时,_boot_statue的值
* 变成1,跳出这个等待。
* 主核在完成relocate后会把_boot_statue赋1。
*/
bgt t4, t5, 1b /* 如果BOOT_STATUS_RELOCATE_DONE=1 > _boot_status, 说明copy 没有结束,反复检测_boot_status的值知道结束 */
jr t3 /* 跳转到 _wait_for_boot_hart,这里也是一个条件循环,等待 _boot_status的值 = BOOT_STATUS_BOOT_HART_DONE, 也就是设置完scrash,重定向fdt之后了 */
_relocate_done: /* 代码重定位结束, 设置boot status, clear bss */
/*
* Mark relocate copy done
* Use _boot_status copy relative to the load address
*/
la t0, _boot_status
la t1, _link_start
REG_L t1, 0(t1)
la t2, _load_start
REG_L t2, 0(t2)
sub t0, t0, t1
add t0, t0, t2
li t1, BOOT_STATUS_RELOCATE_DONE /* _boot_status赋值为BOOT_STATUS_RELOCATE_DONE=1 */
REG_S t1, 0(t0)
fence rw, rw
/* At this point we are running from link address */
/*
* 如下是主核的启动逻辑, 从核会跳到wait_for_boot_hart,
* 等主核把如下的这些公共逻辑执行一下,等到主核完全启动后,从核从
* _start_warm继续跑。
*
* 为啥要等主核完全启动?
*/
/* Reset all registers for boot HART */
li ra, 0
call _reset_regs
/* Zero-out BSS 清零bss段 */
la s4, _bss_start
la s5, _bss_end
_bss_zero:
REG_S zero, (s4)
add s4, s4, __SIZEOF_POINTER__
blt s4, s5, _bss_zero
/* Setup temporary trap handler */
la s4, _start_hang
csrw CSR_MTVEC, s4
/* Setup temporary stack */
la s4, _fw_end
li s5, (SBI_SCRATCH_SIZE * 2) /* SBI_SCRATCH_SIZE is 4k */
add sp, s4, s5 /* 从opensbi二进制加载结束位置开始的8K设置为栈,栈向低地址生长 */
/* Allow main firmware to save info */
MOV_5R s0, a0, s1, a1, s2, a2, s3, a3, s4, a4
/*
* 把上一个阶段的启动信息,放到这个阶段和下一个阶段通信的数据结构里,
* 目前,只有fw_dynamic会通过一个数据结构向内核传递启动参数。
*/
call fw_save_info
MOV_5R a0, s0, a1, s1, a2, s2, a3, s3, a4, s4
/* opensbi支持配置dtb进来 */
#ifdef FW_FDT_PATH
/* Override previous arg1 */
la a1, fw_fdt_bin
#endif
/*
* Initialize platform
* Note: The a0 to a4 registers passed to the
* firmware are parameters to this function.
*/
MOV_5R s0, a0, s1, a1, s2, a2, s3, a3, s4, a4 /* 保存a0-a4的寄存器,为调用fw_platform_init做准备 */
call fw_platform_init /* 在下面的汇编中没看到实质上做什么,这个函数也可以自己实现 */
add t0, a0, zero
MOV_5R a0, s0, a1, s1, a2, s2, a3, s3, a4, s4
add a1, t0, zero
/* Preload HART details
* s7 -> HART Count
* s8 -> HART Stack Size
*/
la a4, platform
#if __riscv_xlen == 64
/* platform的地址加上SBI_PLATFORM_HART_COUNT_OFFSET(0x50)后写入到s7里面,platform是 struct sbi_platform 类型,便宜后刚好是结构体里面的变量hart_count*/
/* platform 是在文件夹platform/nuclei/demosoc/platform.c里面实现的 */
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4) /* 获取core的数量 */
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4) /* offset is (0x54), 获取堆栈大小 */
#else
lw s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lw s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#endif
/* Setup scratch space for all the HARTs*/
la tp, _fw_end
mul a5, s7, s8 /* core的数量 X 每一个core的堆栈,计算出总的数量 */
add tp, tp, a5
/* Keep a copy of tp */
add t3, tp, zero
/* Counter */
li t2, 1
/* hartid 0 is mandated by ISA */
li t1, 0
_scratch_init: /* 这里会被循环重复调用,直到所有的core的scratch都初始化完成 */
add tp, t3, zero /* t3 里面存的是所有core的堆栈大小加上fw end,也就是申请的内存的最高地址 */
mul a5, s8, t1 /* 第一次的t1是0,前两行传下来的,后面的core就会根据id变化,t1存的就是core id, a5=t1x堆栈size,a5还是0 */
sub tp, tp, a5 /* 当前tp还是fw_end + core_cnt *core_stack_size */
li a5, SBI_SCRATCH_SIZE /* SBI_SCRATCH_SIZE is 4KB */
sub tp, tp, a5 /* tp用来保存当前core的scrash的开始地址 */
/*
* 通过上面计算出了地址,下面就是把需要的变量和函数的地址都放在scratch的内存里面。
* 每个core有8KB的堆栈内存,其中从堆栈最开始(高地址)的位置开始划分出4KB用来存放scratch,低地址的4KB用来做堆栈。
* 假如core的堆栈范围是0-(8k-1),那么0-(4K-1)是堆栈,4k -(8k-1)放scrash
* 这里设置scrash使用的是index,不是core id,下面的_start_warm初始化中会通过core id找到index,进而找到对应的内存地址
*/
/* Initialize scratch space */
/* Store fw_start and fw_size in scratch space */
la a4, _fw_start
la a5, _fw_end
mul t0, s7, s8
add a5, a5, t0
sub a5, a5, a4
REG_S a4, SBI_SCRATCH_FW_START_OFFSET(tp)
REG_S a5, SBI_SCRATCH_FW_SIZE_OFFSET(tp)
/* Store next arg1 in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
/*
* 三种fw的函数个不相同,fw_dynamic对上和对下都有固定的参数传递
* 数据结构;fw_jump和fw_payload的处理一样,可以是编译配置的参数,也可以
* 是从a1传递的参数。a0是如下函数的返回值,下面的很多函数定义在fw_jump.S 或者 fw_dynamic.S 或者fw_payload.S里面 。
*/
call fw_next_arg1
REG_S a0, SBI_SCRATCH_NEXT_ARG1_OFFSET(tp) /* arg1 == FW_PAYLOAD_FDT_ADDR /
MOV_3R a0, s0, a1, s1, a2, s2
/* Store next address in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_addr
REG_S a0, SBI_SCRATCH_NEXT_ADDR_OFFSET(tp) /* next_addr == payload_bin (payload 地址) */
MOV_3R a0, s0, a1, s1, a2, s2
/* Store next mode in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_mode
REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp) /* next_mode == PRV_S : 即下一阶段(比如u-boot / linux) 运行在S mode */
MOV_3R a0, s0, a1, s1, a2, s2
/* Store warm_boot address in scratch space */
la a4, _start_warm /* 下面主要是保存几个函数的基地址 */
REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp)
/* Store platform address in scratch space */
la a4, platform
REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp)
/* Store hartid-to-scratch function address in scratch space */
la a4, _hartid_to_scratch
REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp)
/* Store trap-exit function address in scratch space */
la a4, _trap_exit
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(tp)
/* Clear tmp0 in scratch space */
REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp)
/* Store firmware options in scratch space */
MOV_3R s0, a0, s1, a1, s2, a2
#ifdef FW_OPTIONS
li a0, FW_OPTIONS
#else
call fw_options
#endif
REG_S a0, SBI_SCRATCH_OPTIONS_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2
/* Move to next scratch space */
add t1, t1, t2 /* t2里面保存的是数字1, 这加一,换下一个core的id*/
blt t1, s7, _scratch_init /* 如果还不到最大的core id,那就继续设置下一个core的 scrash, 直到所有的core设置完成 */
/*
* Relocate Flatened Device Tree (FDT)
* source FDT address = previous arg1
* destination FDT address = next arg1
*
* Note: We will preserve a0 and a1 passed by
* previous booting stage.
*/
beqz a1, _fdt_reloc_done /* a1 存储的是fw_fdt_bin所在的地址, 如果a1是0是不是说明没有fw_fdt_bin 吗?? */
/* Mask values in a3 and a4 */
li a3, ~(__SIZEOF_POINTER__ - 1)
li a4, 0xff
/* t1 = destination FDT start address */
MOV_3R s0, a0, s1, a1, s2, a2
call fw_next_arg1 /*--------------------------------www */
add t1, a0, zero /* 获取fdt的需要搬移到的新的开始地址 */
MOV_3R a0, s0, a1, s1, a2, s2
beqz t1, _fdt_reloc_done /* 如果t1是0,就不需要搬移了吗 这个可能要查看fw_next_arg1 */
beq t1, a1, _fdt_reloc_done
and t1, t1, a3 /* 做地址的对齐 */
/* t0 = source FDT start address */
add t0, a1, zero
and t0, t0, a3
/* t2 = source FDT size in big-endian */
#if __riscv_xlen == 64
lwu t2, 4(t0)
#else
lw t2, 4(t0)
#endif
/* 获取fdt的size, t2 = source FDT size in big-endian,需要进行大小端转换 ,大端转为小端 */
/* t3 = bit[15:8] of FDT size */
add t3, t2, zero
srli t3, t3, 16
and t3, t3, a4
slli t3, t3, 8
/* t4 = bit[23:16] of FDT size */
add t4, t2, zero
srli t4, t4, 8
and t4, t4, a4
slli t4, t4, 16
/* t5 = bit[31:24] of FDT size */
add t5, t2, zero
and t5, t5, a4
slli t5, t5, 24
/* t2 = bit[7:0] of FDT size */
srli t2, t2, 24
and t2, t2, a4
/* t2 = FDT size in little-endian */
or t2, t2, t3
or t2, t2, t4
or t2, t2, t5
/* t2 = destination FDT end address */
add t2, t1, t2 /* t1保存的是fdt的新的开始地址,t2是size,计算后t2就是fdt的新的结束地址 */
/* FDT copy loop */
ble t2, t1, _fdt_reloc_done
_fdt_reloc_again:
REG_L t3, 0(t0) /* t0 = source FDT start address , 也就是原始的开始地址,来自于a1(fw_fdt_bin的地址),要把它们搬到新的 t1开始的地址 */
REG_S t3, 0(t1)
add t0, t0, __SIZEOF_POINTER__
add t1, t1, __SIZEOF_POINTER__
blt t1, t2, _fdt_reloc_again
_fdt_reloc_done:
/* mark boot hart done,设置boot状态,开始进行下一步工作 */
li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status
REG_S t0, 0(t1)
fence rw, rw
j _start_warm
/* waiting for boot hart to be done (_boot_status == 2) */
_wait_for_boot_hart: /* 其他的core在这里等待,直到_boot_status == BOOT_STATUS_BOOT_HART_DONE, 然后每一个core都会运行_start_warm及之后的代码 */
li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status
REG_L t1, 0(t1)
/* Reduce the bus traffic so that boot hart may proceed faster */
nop
nop
nop
bne t0, t1, _wait_for_boot_hart
_start_warm:
/* Reset all registers for non-boot HARTs */
li ra, 0
call _reset_regs
/* Disable and clear all interrupts */
csrw CSR_MIE, zero
csrw CSR_MIP, zero
/* Find HART count and HART stack size */
/* 下面是从struct sbi_platform platform的变量里面获取需要的数据 */
la a4, platform
#if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4) /* 获取core的数量 */
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4) /* 获取core的堆栈大小 */
#else
lw s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lw s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
#endif
REG_L s9, SBI_PLATFORM_HART_INDEX2ID_OFFSET(a4) /* 获取所有core的index和id的映射关系 */
/* Find HART id, 获取当前核的hart id, 下面这几行代码就是获取当前core id对应的index */
csrr s6, CSR_MHARTID
/* Find HART index */
beqz s9, 3f /* 当没有这个数组,也就是hart_index2id域段是0的时候,表示一一映射*/
li a4, 0 /* 先从index=0 开始 */
1:
#if __riscv_xlen == 64
lwu a5, (s9) /* s9数组的第一个,也就是index对应的hart id */
#else
lw a5, (s9)
#endif
beq a5, s6, 2f /* 如果当前index 对应的id 和从core里面读出来的一致, 就把index放到s6里面*/
add s9, s9, 4 /* 下一组映射关系 */
add a4, a4, 1 /* index++ */
blt a4, s7, 1b /* check有没有查出core的数量,没有且没有找到当前core id的index则继续查找,否则a4 设为-1,相当于出错了,没找到 */
li a4, -1
2: add s6, a4, zero
3: bge s6, s7, _start_hang
/* Find the scratch space based on HART index */
/* scrash 初始化使用的是index,这里就是通过core id,找到映射关系,进而找到对应的内存地址 */
la tp, _fw_end
mul a5, s7, s8
add tp, tp, a5
mul a5, s8, s6
sub tp, tp, a5
li a5, SBI_SCRATCH_SIZE
sub tp, tp, a5
/* update the mscratch */
csrw CSR_MSCRATCH, tp /* tp指向scratch内存的基地址 */
/* Setup stack */
add sp, tp, zero
/* Setup trap handler,设置好M mode下的异常处理向量,_trap_handler在本文的下面定义 */
la a4, _trap_handler
#if __riscv_xlen == 32
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_handler_rv32_hyp
la a4, _trap_handler_rv32_hyp
_skip_trap_handler_rv32_hyp:
#endif
csrw CSR_MTVEC, a4
#if __riscv_xlen == 32
/* Override trap exit for H-extension */
csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1
beq a5, zero, _skip_trap_exit_rv32_hyp
la a4, _trap_exit_rv32_hyp
csrr a5, CSR_MSCRATCH
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(a5)
_skip_trap_exit_rv32_hyp:
#endif
/* Initialize SBI runtime */
csrr a0, CSR_MSCRATCH /* 读取mscratch 的地址到a0, 作为sbi_init的参数 */
call sbi_init /* 到这里,汇编的初始化结束,进入c函数继续init,定义在 lib/sbi/sbi_init.c */
/* We don't expect to reach here hence just hang */
j _start_hang
/* 定义一个dword的空间,整体上看,这里相当于定义了一些全局变量 */
.align 3
_relocate_lottery:
RISCV_PTR 0
_boot_status:
RISCV_PTR 0
_load_start:
RISCV_PTR _fw_start
_link_start:
RISCV_PTR _fw_start
_link_end:
RISCV_PTR _fw_reloc_end
.section .entry, "ax", %progbits
.align 3
.globl _hartid_to_scratch
_hartid_to_scratch:
/*
* a0 -> HART ID (passed by caller)
* a1 -> HART Index (passed by caller)
* t0 -> HART Stack Size
* t1 -> HART Stack End
* t2 -> Temporary
*/
la t2, platform
#if __riscv_xlen == 64
lwu t0, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(t2)
lwu t2, SBI_PLATFORM_HART_COUNT_OFFSET(t2)
#else
lw t0, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(t2)
lw t2, SBI_PLATFORM_HART_COUNT_OFFSET(t2)
#endif
sub t2, t2, a1
mul t2, t2, t0
la t1, _fw_end
add t1, t1, t2
li t2, SBI_SCRATCH_SIZE
sub a0, t1, t2
ret
.section .entry, "ax", %progbits
.align 3
.globl _start_hang
_start_hang:
wfi
j _start_hang
/* 下面定义的很多函数,都是weak修饰的,可以自己重新实现 */
.section .entry, "ax", %progbits
.align 3
.globl fw_platform_init
.weak fw_platform_init
fw_platform_init:
add a0, a1, zero
ret
.macro TRAP_SAVE_AND_SETUP_SP_T0
/* Swap TP and MSCRATCH */
csrrw tp, CSR_MSCRATCH, tp
/* Save T0 in scratch space */
REG_S t0, SBI_SCRATCH_TMP0_OFFSET(tp)
/*
* Set T0 to appropriate exception stack
*
* Came_From_M_Mode = ((MSTATUS.MPP < PRV_M) ? 1 : 0) - 1;
* Exception_Stack = TP ^ (Came_From_M_Mode & (SP ^ TP))
*
* Came_From_M_Mode = 0 ==> Exception_Stack = TP
* Came_From_M_Mode = -1 ==> Exception_Stack = SP
*/
csrr t0, CSR_MSTATUS
srl t0, t0, MSTATUS_MPP_SHIFT
and t0, t0, PRV_M
slti t0, t0, PRV_M
add t0, t0, -1
xor sp, sp, tp
and t0, t0, sp
xor sp, sp, tp
xor t0, tp, t0
/* Save original SP on exception stack */
REG_S sp, (SBI_TRAP_REGS_OFFSET(sp) - SBI_TRAP_REGS_SIZE)(t0)
/* Set SP to exception stack and make room for trap registers */
add sp, t0, -(SBI_TRAP_REGS_SIZE)
/* Restore T0 from scratch space */
REG_L t0, SBI_SCRATCH_TMP0_OFFSET(tp)
/* Save T0 on stack */
REG_S t0, SBI_TRAP_REGS_OFFSET(t0)(sp)
/* Swap TP and MSCRATCH */
csrrw tp, CSR_MSCRATCH, tp
.endm
.macro TRAP_SAVE_MEPC_MSTATUS have_mstatush
/* Save MEPC and MSTATUS CSRs */
csrr t0, CSR_MEPC
REG_S t0, SBI_TRAP_REGS_OFFSET(mepc)(sp)
csrr t0, CSR_MSTATUS
REG_S t0, SBI_TRAP_REGS_OFFSET(mstatus)(sp)
.if \have_mstatush
csrr t0, CSR_MSTATUSH
REG_S t0, SBI_TRAP_REGS_OFFSET(mstatusH)(sp)
.else
REG_S zero, SBI_TRAP_REGS_OFFSET(mstatusH)(sp)
.endif
.endm
.macro TRAP_SAVE_GENERAL_REGS_EXCEPT_SP_T0
/* Save all general regisers except SP and T0 */
REG_S zero, SBI_TRAP_REGS_OFFSET(zero)(sp)
REG_S ra, SBI_TRAP_REGS_OFFSET(ra)(sp)
REG_S gp, SBI_TRAP_REGS_OFFSET(gp)(sp)
REG_S tp, SBI_TRAP_REGS_OFFSET(tp)(sp)
REG_S t1, SBI_TRAP_REGS_OFFSET(t1)(sp)
REG_S t2, SBI_TRAP_REGS_OFFSET(t2)(sp)
REG_S s0, SBI_TRAP_REGS_OFFSET(s0)(sp)
REG_S s1, SBI_TRAP_REGS_OFFSET(s1)(sp)
REG_S a0, SBI_TRAP_REGS_OFFSET(a0)(sp)
REG_S a1, SBI_TRAP_REGS_OFFSET(a1)(sp)
REG_S a2, SBI_TRAP_REGS_OFFSET(a2)(sp)
REG_S a3, SBI_TRAP_REGS_OFFSET(a3)(sp)
REG_S a4, SBI_TRAP_REGS_OFFSET(a4)(sp)
REG_S a5, SBI_TRAP_REGS_OFFSET(a5)(sp)
REG_S a6, SBI_TRAP_REGS_OFFSET(a6)(sp)
REG_S a7, SBI_TRAP_REGS_OFFSET(a7)(sp)
REG_S s2, SBI_TRAP_REGS_OFFSET(s2)(sp)
REG_S s3, SBI_TRAP_REGS_OFFSET(s3)(sp)
REG_S s4, SBI_TRAP_REGS_OFFSET(s4)(sp)
REG_S s5, SBI_TRAP_REGS_OFFSET(s5)(sp)
REG_S s6, SBI_TRAP_REGS_OFFSET(s6)(sp)
REG_S s7, SBI_TRAP_REGS_OFFSET(s7)(sp)
REG_S s8, SBI_TRAP_REGS_OFFSET(s8)(sp)
REG_S s9, SBI_TRAP_REGS_OFFSET(s9)(sp)
REG_S s10, SBI_TRAP_REGS_OFFSET(s10)(sp)
REG_S s11, SBI_TRAP_REGS_OFFSET(s11)(sp)
REG_S t3, SBI_TRAP_REGS_OFFSET(t3)(sp)
REG_S t4, SBI_TRAP_REGS_OFFSET(t4)(sp)
REG_S t5, SBI_TRAP_REGS_OFFSET(t5)(sp)
REG_S t6, SBI_TRAP_REGS_OFFSET(t6)(sp)
.endm
.macro TRAP_CALL_C_ROUTINE
/* Call C routine */
add a0, sp, zero
call sbi_trap_handler
.endm
.macro TRAP_RESTORE_GENERAL_REGS_EXCEPT_SP_T0
/* Restore all general regisers except SP and T0 */
REG_L ra, SBI_TRAP_REGS_OFFSET(ra)(sp)
REG_L gp, SBI_TRAP_REGS_OFFSET(gp)(sp)
REG_L tp, SBI_TRAP_REGS_OFFSET(tp)(sp)
REG_L t1, SBI_TRAP_REGS_OFFSET(t1)(sp)
REG_L t2, SBI_TRAP_REGS_OFFSET(t2)(sp)
REG_L s0, SBI_TRAP_REGS_OFFSET(s0)(sp)
REG_L s1, SBI_TRAP_REGS_OFFSET(s1)(sp)
REG_L a0, SBI_TRAP_REGS_OFFSET(a0)(sp)
REG_L a1, SBI_TRAP_REGS_OFFSET(a1)(sp)
REG_L a2, SBI_TRAP_REGS_OFFSET(a2)(sp)
REG_L a3, SBI_TRAP_REGS_OFFSET(a3)(sp)
REG_L a4, SBI_TRAP_REGS_OFFSET(a4)(sp)
REG_L a5, SBI_TRAP_REGS_OFFSET(a5)(sp)
REG_L a6, SBI_TRAP_REGS_OFFSET(a6)(sp)
REG_L a7, SBI_TRAP_REGS_OFFSET(a7)(sp)
REG_L s2, SBI_TRAP_REGS_OFFSET(s2)(sp)
REG_L s3, SBI_TRAP_REGS_OFFSET(s3)(sp)
REG_L s4, SBI_TRAP_REGS_OFFSET(s4)(sp)
REG_L s5, SBI_TRAP_REGS_OFFSET(s5)(sp)
REG_L s6, SBI_TRAP_REGS_OFFSET(s6)(sp)
REG_L s7, SBI_TRAP_REGS_OFFSET(s7)(sp)
REG_L s8, SBI_TRAP_REGS_OFFSET(s8)(sp)
REG_L s9, SBI_TRAP_REGS_OFFSET(s9)(sp)
REG_L s10, SBI_TRAP_REGS_OFFSET(s10)(sp)
REG_L s11, SBI_TRAP_REGS_OFFSET(s11)(sp)
REG_L t3, SBI_TRAP_REGS_OFFSET(t3)(sp)
REG_L t4, SBI_TRAP_REGS_OFFSET(t4)(sp)
REG_L t5, SBI_TRAP_REGS_OFFSET(t5)(sp)
REG_L t6, SBI_TRAP_REGS_OFFSET(t6)(sp)
.endm
.macro TRAP_RESTORE_MEPC_MSTATUS have_mstatush
/* Restore MEPC and MSTATUS CSRs */
REG_L t0, SBI_TRAP_REGS_OFFSET(mepc)(sp)
csrw CSR_MEPC, t0
REG_L t0, SBI_TRAP_REGS_OFFSET(mstatus)(sp)
csrw CSR_MSTATUS, t0
.if \have_mstatush
REG_L t0, SBI_TRAP_REGS_OFFSET(mstatusH)(sp)
csrw CSR_MSTATUSH, t0
.endif
.endm
.macro TRAP_RESTORE_SP_T0
/* Restore T0 */
REG_L t0, SBI_TRAP_REGS_OFFSET(t0)(sp)
/* Restore SP */
REG_L sp, SBI_TRAP_REGS_OFFSET(sp)(sp)
.endm
.section .entry, "ax", %progbits
.align 3
.globl _trap_handler
_trap_handler:
TRAP_SAVE_AND_SETUP_SP_T0
TRAP_SAVE_MEPC_MSTATUS 0
TRAP_SAVE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_CALL_C_ROUTINE /* 中断/异常调用的c处理函数 */
TRAP_RESTORE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_RESTORE_MEPC_MSTATUS 0
TRAP_RESTORE_SP_T0
mret
.section .entry, "ax", %progbits
.align 3
.globl _trap_exit
_trap_exit:
add sp, a0, zero
TRAP_RESTORE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_RESTORE_MEPC_MSTATUS 0
TRAP_RESTORE_SP_T0
mret
#if __riscv_xlen == 32
.section .entry, "ax", %progbits
.align 3
.globl _trap_handler_rv32_hyp
_trap_handler_rv32_hyp:
TRAP_SAVE_AND_SETUP_SP_T0
TRAP_SAVE_MEPC_MSTATUS 1
TRAP_SAVE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_CALL_C_ROUTINE
TRAP_RESTORE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_RESTORE_MEPC_MSTATUS 1
TRAP_RESTORE_SP_T0
mret
.section .entry, "ax", %progbits
.align 3
.globl _trap_exit_rv32_hyp
_trap_exit_rv32_hyp:
add sp, a0, zero
TRAP_RESTORE_GENERAL_REGS_EXCEPT_SP_T0
TRAP_RESTORE_MEPC_MSTATUS 1
TRAP_RESTORE_SP_T0
mret
#endif
.section .entry, "ax", %progbits
.align 3
.globl _reset_regs
_reset_regs:
/* flush the instruction cache */
fence.i
/* Reset all registers except ra, a0, a1 and a2 */
li sp, 0
li gp, 0
li tp, 0
li t0, 0
li t1, 0
li t2, 0
li s0, 0
li s1, 0
li a3, 0
li a4, 0
li a5, 0
li a6, 0
li a7, 0
li s2, 0
li s3, 0
li s4, 0
li s5, 0
li s6, 0
li s7, 0
li s8, 0
li s9, 0
li s10, 0
li s11, 0
li t3, 0
li t4, 0
li t5, 0
li t6, 0
csrw CSR_MSCRATCH, 0
ret
#ifdef FW_FDT_PATH
.section .rodata
.align 4
.globl fw_fdt_bin
fw_fdt_bin:
.incbin FW_FDT_PATH
#ifdef FW_FDT_PADDING
.fill FW_FDT_PADDING, 1, 0
#endif
#endif