pcie bar资源的读取和remap
慕学海
2023-12-01
在pci_setup_device->pci_read_bases 中会读取bios 中配好的bar资源。
pci_read_bases(dev, 6, PCI_ROM_ADDRESS);可见bar资源最多有6个,spec中也是这么规定的.
static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
{
unsigned int pos, reg;
if (dev->non_compliant_bars)
return;
for (pos = 0; pos < howmany; pos++) {
struct resource *res = &dev->resource[pos];
reg = PCI_BASE_ADDRESS_0 + (pos << 2);
pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
}
}
采用循环的方式调用__pci_read_base,第一个bar的地址是PCI_BASE_ADDRESS_0(0x10),随后调用
int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
struct resource *res, unsigned int pos)
{
u32 l, sz, mask;
u64 l64, sz64, mask64;
u16 orig_cmd;
struct pci_bus_region region, inverted_region;
mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
/* No printks while decoding is disabled! */
if (!dev->mmio_always_on) {
pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
if (orig_cmd & PCI_COMMAND_DECODE_ENABLE) {
pci_write_config_word(dev, PCI_COMMAND,
orig_cmd & ~PCI_COMMAND_DECODE_ENABLE);
}
}
res->name = pci_name(dev);
//读取resource的flag
pci_read_config_dword(dev, pos, &l);
pci_write_config_dword(dev, pos, l | mask);
pci_read_config_dword(dev, pos, &sz);
pci_write_config_dword(dev, pos, l);
/*
* All bits set in sz means the device isn't working properly.
* If the BAR isn't implemented, all bits must be 0. If it's a
* memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
* 1 must be clear.
*/
if (sz == 0xffffffff)
sz = 0;
/*
* I don't know how l can have all bits set. Copied from old code.
* Maybe it fixes a bug on some ancient platform.
*/
if (l == 0xffffffff)
l = 0;
if (type == pci_bar_unknown) {
res->flags = decode_bar(dev, l);
res->flags |= IORESOURCE_SIZEALIGN;
//根据flag判断是io还是mem
if (res->flags & IORESOURCE_IO) {
l64 = l & PCI_BASE_ADDRESS_IO_MASK;
sz64 = sz & PCI_BASE_ADDRESS_IO_MASK;
mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT;
} else {
l64 = l & PCI_BASE_ADDRESS_MEM_MASK;
sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK;
mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK;
}
} else {
res->flags |= (l & IORESOURCE_ROM_ENABLE);
l64 = l & PCI_ROM_ADDRESS_MASK;
sz64 = sz & PCI_ROM_ADDRESS_MASK;
mask64 = (u32)PCI_ROM_ADDRESS_MASK;
}
//如果是64 bit的话,就要继续读取高32bit
if (res->flags & IORESOURCE_MEM_64) {
pci_read_config_dword(dev, pos + 4, &l);
pci_write_config_dword(dev, pos + 4, ~0);
pci_read_config_dword(dev, pos + 4, &sz);
pci_write_config_dword(dev, pos + 4, l);
l64 |= ((u64)l << 32);
sz64 |= ((u64)sz << 32);
mask64 |= ((u64)~0 << 32);
}
if (!dev->mmio_always_on && (orig_cmd & PCI_COMMAND_DECODE_ENABLE))
pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
if (!sz64)
goto fail;
sz64 = pci_size(l64, sz64, mask64);
if (!sz64) {
dev_info(&dev->dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n",
pos);
goto fail;
}
//合法性检测
if (res->flags & IORESOURCE_MEM_64) {
if ((sizeof(pci_bus_addr_t) < 8 || sizeof(resource_size_t) < 8)
&& sz64 > 0x100000000ULL) {
res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
res->start = 0;
res->end = 0;
dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n",
pos, (unsigned long long)sz64);
goto out;
}
if ((sizeof(pci_bus_addr_t) < 8) && l) {
/* Above 32-bit boundary; try to reallocate */
res->flags |= IORESOURCE_UNSET;
res->start = 0;
res->end = sz64;
dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n",
pos, (unsigned long long)l64);
goto out;
}
}
//给region 赋值
region.start = l64;
region.end = l64 + sz64;
pcibios_bus_to_resource(dev->bus, res, ®ion);
pcibios_resource_to_bus(dev->bus, &inverted_region, res);
/*
* If "A" is a BAR value (a bus address), "bus_to_resource(A)" is
* the corresponding resource address (the physical address used by
* the CPU. Converting that resource address back to a bus address
* should yield the original BAR value:
*
* resource_to_bus(bus_to_resource(A)) == A
*
* If it doesn't, CPU accesses to "bus_to_resource(A)" will not
* be claimed by the device.
*/
if (inverted_region.start != region.start) {
res->flags |= IORESOURCE_UNSET;
res->start = 0;
res->end = region.end - region.start;
dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n",
pos, (unsigned long long)region.start);
}
goto out;
fail:
res->flags = 0;
out:
if (res->flags)
dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res);
return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
}
在
void pcibios_bus_to_resource(struct pci_bus *bus, struct resource *res,
struct pci_bus_region *region)
{
struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
struct resource_entry *window;
resource_size_t offset = 0;
resource_list_for_each_entry(window, &bridge->windows) {
struct pci_bus_region bus_region;
if (resource_type(res) != resource_type(window->res))
continue;
bus_region.start = window->res->start - window->offset;
bus_region.end = window->res->end - window->offset;
if (region_contains(&bus_region, region)) {
offset = window->offset;
break;
}
}
res->start = region->start + offset;
res->end = region->end + offset;
}
中会将region 专程resource,并通过region_contains判断当前的region是否和已有的region是否有重叠.这个时候对res的更新其实是更新到dev->resource中,这是因为在pci_read_bases 中有如下的赋值:
struct resource *res = &dev->resource[pos];
从pcibios_bus_to_resource 中得到resource资源后就调用pcibios_resource_to_bus 先判断当前的resouce是否和已经存在的resource相冲突,如果相冲突的话则会新建一个pci_bus_region *region,并让
region->start = res->start - offset;
region->end = res->end - offset;
void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
struct resource *res)
{
struct pci_host_bridge *bridge = pci_find_host_bridge(bus);
struct resource_entry *window;
resource_size_t offset = 0;
resource_list_for_each_entry(window, &bridge->windows) {
if (resource_contains(window->res, res)) {
offset = window->offset;
break;
}
}
region->start = res->start - offset;
region->end = res->end - offset;
}
最终在__pci_read_base 中会判断如果在pcibios_resource_to_bus 中如果offset 不为0,则下面的函数返回true
static inline bool resource_contains(struct resource *r1, struct resource *r2)
{
if (resource_type(r1) != resource_type(r2))
return false;
if (r1->flags & IORESOURCE_UNSET || r2->flags & IORESOURCE_UNSET)
return false;
return r1->start <= r2->start && r1->end >= r2->end;
}
最常见的case就是r1->start <= r2->start && r1->end >= r2->end,也就是这个dev的resource已经被其他dev 使用了,从pcibios_resource_to_bus 返回后就到了__pci_read_base,如果返回true,则下面的条件就不成里
if (inverted_region.start != region.start) {
res->flags |= IORESOURCE_UNSET;
res->start = 0;
res->end = region.end - region.start;
dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n",
pos, (unsigned long long)region.start);
}
然后就会重新更新res的start和end。
这里其实只是申明resource,device要使用这些resouce,这里的地址都是物理的,必须要在自己的probe函数中调用ioremap 来将这些地址map成虚拟的kernel才可以使用.
例如:在pci/host/pci-aardvark.c 中的probe函数就调用devm_ioremap_resource 来remap这些资源
static int advk_pcie_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct advk_pcie *pcie;
struct resource *res;
struct pci_bus *bus, *child;
struct msi_controller *msi;
struct device_node *msi_node;
int ret, irq;
pcie = devm_kzalloc(dev, sizeof(struct advk_pcie), GFP_KERNEL);
if (!pcie)
return -ENOMEM;
pcie->pdev = pdev;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
pcie->base = devm_ioremap_resource(dev, res);
if (IS_ERR(pcie->base))
return PTR_ERR(pcie->base);
}