OpenSBI简介-4

管翼

2023-12-01

本文以OpenSBI 0.9版本为参考进行讲解

现在分析sbi_init源码，进入sbi_init时，scratch是唯一的参数。在进入sbi_init前，每个cpu会把_trap_handler的地址设置到mtvec里面。

sbi_init函数
void __noreturn sbi_init(struct sbi_scratch *scratch)
{
bool next_mode_supported = FALSE;
bool coldboot = FALSE;
u32 hartid = current_hartid();
const struct sbi_platform *plat = sbi_platform_ptr(scratch);

 if ((SBI_HARTMASK_MAX_BITS <= hartid) ||  //hartid错误，超过了最大值，当前core 进入了wfi状态
 	sbi_platform_hart_invalid(plat, hartid))
 	sbi_hart_hang();

 switch (scratch->next_mode) {
 case PRV_M:
 	next_mode_supported = TRUE;
 	break;
 case PRV_S: /* 对于jump模式，会走这里 */
 	if (misa_extension('S'))  /* 查看当前模式是否支持 */
 		next_mode_supported = TRUE;
 	break;
 case PRV_U:
 	if (misa_extension('U'))
 		next_mode_supported = TRUE;
 	break;
 default:
 	sbi_hart_hang();
 }

 /*
  * Only the HART supporting privilege mode specified in the
  * scratch->next_mode should be allowed to become the coldboot
  * HART because the coldboot HART will be directly jumping to
  * the next booting stage.
  *
  * We use a lottery mechanism to select coldboot HART among
  * HARTs which satisfy above condition.
  */

 /* 
  * atomic_xchg作用就是原子操作，设置coldboot_lottery新值返回旧值，第一次设置，设置的是1，返回的是0，
  * 所以说第一个设置的core就是负责boot的core，那么执行的就是coldboot，其他的core执行是warmboot
  * 下面约定主core就是负责coldboot的core，其他的是从core,主core不一定是core 0
  */
 if (next_mode_supported && atomic_xchg(&coldboot_lottery, 1) == 0)
 	coldboot = TRUE;

 if (coldboot)
 	/* 全局数据的初始化由主core完成，coldboot初始化的会比warmboot多，sbi的打印默认是由主core输出的，从core没有log */
 	init_coldboot(scratch, hartid); 
 else
 	/* 从core会在sbi_hsm_init->sbi_hsm_hart_wait里面wfi，直到被下一级boot唤醒，从core只初始化自己独有的数据或者寄存器 */
 	init_warmboot(scratch, hartid);

}

init_coldboot(scratch, hartid);
static void __noreturn init_coldboot(struct sbi_scratch *scratch, u32 hartid)
{
int rc;
unsigned long *init_count;
const struct sbi_platform *plat = sbi_platform_ptr(scratch);

 /* Note: This has to be first thing in coldboot init sequence */
 /* 创建hartid_to_scratch_table映射表，也就是hart id和scrash内存绑定的表；记录last_hartid_having_scratch值，也就是core的最大值index(core count -1) */
 rc = sbi_scratch_init(scratch);  ------ 详见2.1
 if (rc)
 	sbi_hart_hang();

 /* Note: This has to be second thing in coldboot init sequence */
 /* 
  * 初始化动态加载的镜像的模块，并把mem region注册到hartid_to_domain_table数组里面。
  * 这里的mem region会在sbi_hart_pmp_configure中进行配置保护 
  * 这里注册了两个mem regions，一个是fw自己所占的内存，一个是任意的地址
  */
 rc = sbi_domain_init(scratch, hartid);
 if (rc)
 	sbi_hart_hang();

 /* 
  * 每一个hart id的scrash内存里面，从SBI_SCRATCH_EXTRA_SPACE_OFFSET这个地方偏移开始申请__SIZEOF_POINTER__大小的内存，并memset为0 
  * 从core也会判断init_count_offset是否正确，内存是否分配成功
  */
 init_count_offset = sbi_scratch_alloc_offset(__SIZEOF_POINTER__,  ---------------- 详见2.2
 						 "INIT_COUNT");
 if (!init_count_offset)
 	sbi_hart_hang();

 /* 
  *每一个core申请hart data内存，并把每一个core的state设置为SBI_HART_STARTING，这个函数主从core都会调用，
  * 主core申请内存设置状态，从core在wfi，直到自己的state设置为SBI_HART_STARTING
  * hsm应该是hart state mechine的简写。
  */
 rc = sbi_hsm_init(scratch, hartid, TRUE);  -------------------详见2.3
 if (rc)
 	sbi_hart_hang();

 /* 
  * 平台相关的早期的初始化，调用platform.c里面设置的回调函数：early_init，用户可以把需要初始化的代码放到里面， 
  * 从这里可以看出platform.c这个文件很重要，都是平台的私有代码
  */
 rc = sbi_platform_early_init(plat, TRUE);
 if (rc)
 	sbi_hart_hang();

 /*
  * 检测记录core支持的feature，初始化浮点相关的寄存器，将部分中断和异常转到S mode
  */
 rc = sbi_hart_init(scratch, TRUE); ------------- 详见2.4
 if (rc)
 	sbi_hart_hang();

 /*通过调用平台函数console_init，初始化控制台，串口可以使用了 */
 rc = sbi_console_init(scratch);
 if (rc)
 	sbi_hart_hang();

 /* 开始第一行打印，输出sbi的版本信息和logo */
 sbi_boot_print_banner(scratch);

 /*
  * 通过调用fdt_irqchip_init来初始化中断，zhu主core会通过fdt来初始化列表中的irqchip驱动，
  * 并获取保存irqchip的current_driver，接着每一个core都会调用current_driver的warm_init进行初始化。
  */
 rc = sbi_platform_irqchip_init(plat, TRUE);
 if (rc) {
 	sbi_printf("%s: platform irqchip init failed (error %d)\n",
 		   __func__, rc);
 	sbi_hart_hang();
 }

 /*
  * 实现核间软中断IPI的初始化，初始化全局的ipi_ops_array数组，赋值opt操作函数，
  * 中断发生后调用其process函数来处理该软中断。
  */
 rc = sbi_ipi_init(scratch, TRUE);
 if (rc) {
 	sbi_printf("%s: ipi init failed (error %d)\n", __func__, rc);
 	sbi_hart_hang();
 }

 /*
  * 主core为每个hart分配了三块变量区域tlb_sync, tlb_fifo, tlb_fifo_mem，这些变量都是为实现remote fence准备的
  * 延续了上面ipi的初始化，注册了tlb_ops ipi的处理流程。
  */
 rc = sbi_tlb_init(scratch, TRUE);
 if (rc) {
 	sbi_printf("%s: tlb init failed (error %d)\n", __func__, rc);
 	sbi_hart_hang();
 }

 /*
  * 主core会为从core申请对应的内存，然后调用平台对应的init函数进行初始化，具体的初始化函数需要平台提供或者使用通用的函数
  * 和上面的初始化机制类似， 通过fdt获取到对应的driver， 然后条用warm_init进行初始化，如果driver是fdt_timer_init，
  * 会在最后的clint_warm_timer_init，设置time_cmp为-1
  */
 rc = sbi_timer_init(scratch, TRUE);
 if (rc) {
 	sbi_printf("%s: timer init failed (error %d)\n", __func__, rc);
 	sbi_hart_hang();
 }

 /*
  * sbi_ecall_init负责组织一个双链表，链表中每个元素都是一个sbi_ecall_extension结构体，对应一个riscv sbi extension。
  * 下面的函数注册了很多extension的处理函数，如：ecall_time，ecall_rfence，ecall_ipi，ecall_base，ecall_hsm，ecall_srst...
  * 比如前面提到的remote fence调用，对应的就是这里的ecall_rfence。
  * 在S态软件调用ecall陷入opensbi时，opensbi就会根据a7寄存器保存的extension id，在这个双链表中查找，来确定S态软件希望调用哪个extension。
  */
 rc = sbi_ecall_init();
 if (rc) {
 	sbi_printf("%s: ecall init failed (error %d)\n", __func__, rc);
 	sbi_hart_hang();
 }

 /* 打印一些系统信息，如：Platform details，Firmware details， SBI details */
 sbi_boot_print_general(scratch);

 /*
  * Note: Finalize domains after HSM initialization so that we
  * can startup non-root domains.
  * Note: Finalize domains before HART PMP configuration so
  * that we use correct domain for configuring PMP.
  */
 rc = sbi_domain_finalize(scratch, hartid);
 if (rc) {
 	sbi_printf("%s: domain finalize failed (error %d)\n",
 		   __func__, rc);
 	sbi_hart_hang();
 }

 /* 打印Domain details的信息 */
 sbi_boot_print_domains(scratch);

 /*
  * 初始化pmp，对前面注册的memory regions进行保护，设置读写执行权限，以及可以访问的mode
  * 前面的初始化会把mem region注册在hartid_to_domain_table数组里面，下面的函数就是一一遍历进行设置
  */
 rc = sbi_hart_pmp_configure(scratch);
 if (rc) {
 	sbi_printf("%s: PMP configure failed (error %d)\n",
 		   __func__, rc);
 	sbi_hart_hang();
 }

 /*
  * Note: Platform final initialization should be last so that
  * it sees correct domain assignment and PMP configuration.
  * 调用platform.c里面的final_init，做一些收尾工作，主要是对fdt做一些check和fix，
  * 对于sbi发现的问题在fdt中进行修复，或者disable当前的node，或者增加node或者属性。
  * 这些都是由主core来操作的，从core直接返回。
  * fdt_cpu_fixup： 把sbi检测到的错误的cpu，修改fdt把当前cpu设置为disable
  */
 rc = sbi_platform_final_init(plat, TRUE);
 if (rc) {
 	sbi_printf("%s: platform final init failed (error %d)\n",
 		   __func__, rc);
 	sbi_hart_hang();
 }

 /* 打印Boot HART的详细信息 */
 sbi_boot_print_hart(scratch, hartid);

 /*
  * 负责把coldboot_done置1，对每一个core发送ipi，这时warm boot的cpu就会从wait_for_coldboot函数中退出，
  * 陷入到sbi_hsm_hart_wait的循环等待中
  */
 wake_coldboot_harts(scratch, hartid);

 init_count = sbi_scratch_offset_ptr(scratch, init_count_offset);
 (*init_count)++;

 /* check下boot的state */
 sbi_hsm_prepare_next_jump(scratch, hartid);

 /*
  * 主core在sbi_hart_switch_mode中调用mret，控制权传递给下一级bootloader。
  * a0寄存器存放主core的hartid，a1存放fdt的地址。
  * 这时候从core也会调用sbi_hart_switch_mode，进入下一级boot/OS，
  * 同样下一级的boot也会选一个主core进行初始化，其他的core wfi，直到唤醒操作
  * 函数会将跳转地址写入RISCV的特殊寄存器csr寄存器然后执行mret指令切换到S模式或U模式
  * 主core的打印：sbi_hart_switch_mode: mstatus:0x8000000000006800 next_add:0xa0200000 next_mode:1 arg0=0x3 arg1=0xa8000000 
  * next_arg1存放的是FW_PAYLOAD_FDT_ADDR， next_addr要跳转的地址也就是下一级boot的地址
  */
 sbi_hart_switch_mode(hartid, scratch->next_arg1, scratch->next_addr,
 			 scratch->next_mode, FALSE);

}

2.1
/*
* 该函数仅有冷启动的cpu执行，枚举可能的hartid, 有hart_index2id就从映射表里返回，没有就直接返回hartid。
* 根据前面的内存布局建立了hartid -> struct scratch的映射，记录最后一个合法的hartid为last_hartid_having_scratch，
* 以后每次枚举hartid枚举到last_hartid_having_scratch即可，也就是记录了最大的core的index，从0开始。
*/
int sbi_scratch_init(struct sbi_scratch *scratch)
{
u32 i;
const struct sbi_platform *plat = sbi_platform_ptr(scratch);

	for (i = 0; i < SBI_HARTMASK_MAX_BITS; i++) {
		if (sbi_platform_hart_invalid(plat, i))
			continue;
		hartid_to_scratch_table[i] =
			((hartid2scratch)scratch->hartid_to_scratch)(i,
					sbi_platform_hart_index(plat, i));
		if (hartid_to_scratch_table[i])
			last_hartid_having_scratch = i;
	}

	return 0;
}

2.2
/*
* struct scratch只占前88个字节，后面的字节用来供该函数临时分配内存，
* 内存分配的逻辑很简单，全局变量extra_offset指向未分配的内存，
* 每次分配内存时就把extra_offset前移需要分配的字节（因此从不回收）。
* 所有cpu同时分配，因此只需要一个extra_offset变量。
*/
unsigned long sbi_scratch_alloc_offset(unsigned long size, const char *owner)
{
u32 i;
void *ptr;
unsigned long ret = 0;
struct sbi_scratch *rscratch;

	/*
	 * We have a simple brain-dead allocator which never expects
	 * anything to be free-ed hence it keeps incrementing the
	 * next allocation offset until it runs-out of space.
	 *
	 * In future, we will have more sophisticated allocator which
	 * will allow us to re-claim free-ed space.
	 */

	if (!size)
		return 0;

	if (size & (__SIZEOF_POINTER__ - 1))
		size = (size & ~(__SIZEOF_POINTER__ - 1)) + __SIZEOF_POINTER__;

	spin_lock(&extra_lock);

	if (SBI_SCRATCH_SIZE < (extra_offset + size))
		goto done;

	ret = extra_offset;
	extra_offset += size;

done:
	spin_unlock(&extra_lock);

	if (ret) {
		for (i = 0; i < sbi_scratch_last_hartid(); i++) {
			rscratch = sbi_hartid_to_scratch(i);
			if (!rscratch)
				continue;
			ptr = sbi_scratch_offset_ptr(rscratch, ret);
			sbi_memset(ptr, 0, size);
		}
	}

	return ret;
}

2.3
int sbi_hsm_init(struct sbi_scratch *scratch, u32 hartid, bool cold_boot)
{
u32 i;
struct sbi_scratch *rscratch;
struct sbi_hsm_data *hdata;

	if (cold_boot) {
		hart_data_offset = sbi_scratch_alloc_offset(sizeof(*hdata), /* 从scratch里面申请一段内存，每一个core都会申请，返回申请到的地址的offset  */
								"HART_DATA");
		if (!hart_data_offset)
			return SBI_ENOMEM;

		/* Initialize hart state data for every hart */
		for (i = 0; i <= sbi_scratch_last_hartid(); i++) {
			rscratch = sbi_hartid_to_scratch(i);
			if (!rscratch)
				continue;

			/* 获取到上面申请的地址，然后把主core状态设置为            SBI_HART_STARTING         */
			hdata = sbi_scratch_offset_ptr(rscratch,
							   hart_data_offset);
			ATOMIC_INIT(&hdata->state,
			(i == hartid) ? SBI_HART_STARTING : SBI_HART_STOPPED);
		}
	} else {
		sbi_hsm_hart_wait(scratch, hartid); /* 如果是从core，会在这里面wfi，  直到当前core的启动状态设置为 SBI_HART_STARTING，其实是主core在上面设置的 */
	}

	return 0;
}

2.4
int sbi_hart_init(struct sbi_scratch *scratch, bool cold_boot)
{
int rc;

	if (cold_boot) {
		if (misa_extension('H'))
			sbi_hart_expected_trap = &__sbi_expected_trap_hext;

		/* 为每个core分配 hart feature 空间 */
		hart_features_offset = sbi_scratch_alloc_offset(
						sizeof(struct hart_features),
						"HART_FEATURES");
		if (!hart_features_offset)
			return SBI_ENOMEM;
	}

	/*
	 * 检测当前core支持的feature，使用的方法就是，读对应feature对应的控制寄存器，
	 * 根据是否发生异常来确认是否支持当前feature，检测的feature有：CSR_PMPADDR0，CSR_SCOUNTEREN，CSR_MCOUNTEREN和CSR_TIME
	 * 并把检测结果写到hfeatures->features里面，hfeatures也就是上面申请的内存。
	 */
	hart_detect_features(scratch);

	/*
	 * 初始化了一些系统寄存器，如mstatus, mie, satp等，使能了 用户mode或者surpervisor mode使用的perf counters
	 */
	mstatus_init(scratch);

	/* 初始化浮点相关的寄存器 */
	rc = fp_init(scratch);
	if (rc)
		return rc;

	/* 通过设置系统寄存器MIDELEG, MIDELEG，将部分中断和异常转到S mode */
	rc = delegate_traps(scratch);
	if (rc)
		return rc;

	return 0;
}

2.5
void attribute((noreturn))
sbi_hart_switch_mode(unsigned long arg0, unsigned long arg1,
unsigned long next_addr, unsigned long next_mode,
bool next_virt)
{
#if __riscv_xlen == 32
unsigned long val, valH;
#else
unsigned long val;
#endif

	/* check 下一阶段的mode，支持M S U三种mode */
	switch (next_mode) {
	case PRV_M:
		break;
	case PRV_S:
		if (!misa_extension('S'))
			sbi_hart_hang();
		break;
	case PRV_U:
		if (!misa_extension('U'))
			sbi_hart_hang();
		break;
	default:
		sbi_hart_hang();
	}

	val = csr_read(CSR_MSTATUS);
	val = INSERT_FIELD(val, MSTATUS_MPP, next_mode);
	val = INSERT_FIELD(val, MSTATUS_MPIE, 0);
#if __riscv_xlen == 32
	if (misa_extension('H')) {
		valH = csr_read(CSR_MSTATUSH);
		if (next_virt)
			valH = INSERT_FIELD(valH, MSTATUSH_MPV, 1);
		else
			valH = INSERT_FIELD(valH, MSTATUSH_MPV, 0);
		csr_write(CSR_MSTATUSH, valH);
	}
#else
	if (misa_extension('H')) {
		if (next_virt)
			val = INSERT_FIELD(val, MSTATUS_MPV, 1);
		else
			val = INSERT_FIELD(val, MSTATUS_MPV, 0);
	}
#endif
	csr_write(CSR_MSTATUS, val);  //Machine status register.
	csr_write(CSR_MEPC, next_addr);  //Machine exception program counter.

	if (next_mode == PRV_S) {
		csr_write(CSR_STVEC, next_addr);  //Supervisor trap handler base address
		csr_write(CSR_SSCRATCH, 0);
		csr_write(CSR_SIE, 0); //Supervisor interrupt-enable register
		csr_write(CSR_SATP, 0);
	} else if (next_mode == PRV_U) {
		if (misa_extension('N')) {
			csr_write(CSR_UTVEC, next_addr);
			csr_write(CSR_USCRATCH, 0);
			csr_write(CSR_UIE, 0);
		}
	}

	// 主core的打印：sbi_hart_switch_mode: mstatus:0x8000000000006800 next_add:0xa0200000 next_mode:1 arg0=0x3 arg1=0xa8000000 
	sbi_printf("%s: mstatus:0x%lx next_add:0x%lx next_mode:%lx arg0=0x%lx arg1=0x%lx \n", __func__, val, next_addr, next_mode, arg0, arg1);

	/* Check whether SMP present via mcfg_info and L2 enabled if yes, flush L2 cache */
	val = csr_read(0xFC2);
	if ((val & 0x10800) == 0x10800) { // IREGION and SMP Present
		val = ((csr_read(0x7F7) >> 10) << 10); // read csr mirgb_info and get iregion base address
		val = val + 0x40000; // get smp cluster cache base address
		/* Flush L1 Cache */
		__asm__ __volatile__("fence");
		__asm__ __volatile__("fence.i");
		/* Flush L2 Cache */
		*(volatile unsigned int *)(val + 0x14) = 0x7; // flush and invalid l2 cache lines
	} else {
		/* Flush L1 Cache */
		__asm__ __volatile__("fence");
		__asm__ __volatile__("fence.i");
	}

	register unsigned long a0 asm("a0") = arg0;
	register unsigned long a1 asm("a1") = arg1;
	/*
	 * 机器模式异常返回，将pc设置为CSR_MEPC里面的地址，也就是next_addr，下一级boot的入口地址，
	 * 将特权级设置成CSR_MSTATUS里面的mode，也就是next_mode，uboot/kernel设置的是S mode。
	 */
	__asm__ __volatile__("mret" : : "r"(a0), "r"(a1));  
	__builtin_unreachable();  //通知编译器CPU运行时永远不会到达这一行，这样就可以提前防止很多编译报错。
}

init_warmboot(scratch, hartid);
warmboot调用的函数coldboot都调用过，具体的含义可以参考coldboot。
static void __noreturn init_warmboot(struct sbi_scratch *scratch, u32 hartid)
{
int rc;
unsigned long *init_count;
const struct sbi_platform *plat = sbi_platform_ptr(scratch);

 wait_for_coldboot(scratch, hartid);/* 从core等待，知道主core 完成cold boot*/

 if (!init_count_offset)
 	sbi_hart_hang();
 
 /* 下面的初始化讲解可以参考 init_coldboot ，只多不少 */

 /* 
  * 从core会在sbi_hsm_init->sbi_hsm_hart_wait里面wfi，直到core的state设置成SBI_HART_STARTING 
  * 谁会去设置这个state呢？ 一般是下一级的boot，下一级boot会通过ecall命令，调用sbi，
  * 在主core初始化的时候，会调用 sbi_ecall_init 注册，很多的ecall handler处理函数，其中sbi_ecall_register_extension(&ecall_hsm);就是其中一个
  * ecall_hsm的handler就是 sbi_ecall_hsm_handler，后面便会调用 sbi_hsm_hart_start。
  * 通过funcid：SBI_EXT_HSM_HART_START来调用sbi_hsm_hart_start->atomic_cmpxchg来设置state为SBI_HART_STARTING
  * 然后调用sbi_platform_ipi_send(plat, hartid);来触发从core，这时候才会接着往下走。
  * 发送命令的下一级boot一般是kernel，在kernel启动其他core的时候，会发送ecall指令。
  */
 rc = sbi_hsm_init(scratch, hartid, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_platform_early_init(plat, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_hart_init(scratch, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_platform_irqchip_init(plat, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_ipi_init(scratch, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_tlb_init(scratch, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_timer_init(scratch, FALSE);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_hart_pmp_configure(scratch);
 if (rc)
 	sbi_hart_hang();

 rc = sbi_platform_final_init(plat, FALSE);
 if (rc)
 	sbi_hart_hang();

 init_count = sbi_scratch_offset_ptr(scratch, init_count_offset);
 (*init_count)++;

 sbi_hsm_prepare_next_jump(scratch, hartid);
 sbi_hart_switch_mode(hartid, scratch->next_arg1,
 			 scratch->next_addr,
 			 scratch->next_mode, FALSE);

}

OpenSBI简介-4

相关阅读

相关文章

相关问答

相关文档