From d367c5c6f2b56bcad0365042402aadaa5e743050 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Thu, 21 May 2026 10:08:19 -0700 Subject: [PATCH 1/6] lib: linux: preserve device-open errors The Linux bus open path may try more than one backend driver for a device. When a backend finds the device but fails while opening it, the common open loop currently discards that errno and returns -ENODEV after all drivers have been tried. Keep the first useful backend open error, preferring non-ENODEV failures over a plain miss. This preserves the existing not-found result while letting callers see real failures such as UIO map population errors. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index fcd423e06..f7b56b795 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -428,7 +428,8 @@ static int metal_linux_dev_open(struct metal_bus *bus, struct linux_bus *lbus = to_linux_bus(bus); struct linux_device *ldev = NULL; struct linux_driver *ldrv; - int error; + int error = -ENODEV; + int ret; ldev = malloc(sizeof(*ldev)); if (!ldev) @@ -448,9 +449,18 @@ static int metal_linux_dev_open(struct metal_bus *bus, ldev->device.bus = bus; /* Try and open the device. */ - error = ldrv->dev_open(lbus, ldev); - if (error) { - ldrv->dev_close(lbus, ldev); + ret = ldrv->dev_open(lbus, ldev); + if (ret) { + /* + * Preserve the first useful errno while still allowing + * clean backend misses to try the same device. + */ + if (ldrv->dev_close) + ldrv->dev_close(lbus, ldev); + if (error == -ENODEV) + error = ret; + if (ret != -ENODEV) + goto out; continue; } @@ -461,9 +471,10 @@ static int metal_linux_dev_open(struct metal_bus *bus, return 0; } +out: free(ldev); - return -ENODEV; + return error; } static void metal_linux_dev_close(struct metal_bus *bus, @@ -668,4 +679,3 @@ int metal_linux_get_device_property(struct metal_device *device, status = close(fd); return status < 0 ? -errno : 0; } - From e4b5c2b298f5a543a37f3096c3ba09378c6d6e36 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 20 May 2026 14:06:34 -0700 Subject: [PATCH 2/6] lib: linux: fix UIO mmap offset handling UIO map offsets identify the usable resource start inside the page-aligned mapping exposed by sysfs. The Linux backend previously exposed and unmapped the adjusted virtual address directly. Keep the raw mmap base and length for close, expose the usable virtual address as raw mapping plus offset, and derive the libmetal physical base and size from the usable portion of the UIO map. Use the sysfs map size as the mmap length. For an unaligned resource, UIO already reports a page-aligned address and a full mmap length, so adding the offset to that length can over-map the resource and fail. Reject offsets outside the system page size, reject offsets beyond the map size, and report overflow before attempting to mmap the region. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 244 ++++++++++++++++++++++++++++++++++---- 1 file changed, 219 insertions(+), 25 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index f7b56b795..a4eac73f7 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -14,6 +14,8 @@ #include #include +#include + #include "irq.h" #define MAX_DRIVERS 64 @@ -59,12 +61,34 @@ struct linux_device { char dev_path[PATH_MAX]; char cls_path[PATH_MAX]; metal_phys_addr_t region_phys[METAL_MAX_DEVICE_REGIONS]; + void *region_map_raw[METAL_MAX_DEVICE_REGIONS]; + size_t region_map_len[METAL_MAX_DEVICE_REGIONS]; struct linux_driver *ldrv; struct sysfs_device *sdev; struct sysfs_attribute *override; int fd; }; +/** + * @internal + * + * @brief UIO map attributes and derived libmetal region information. + * + * UIO sysfs reports a full mmap() extent plus a separate offset to the + * usable resource. This structure keeps those inputs together while converting + * them into the libmetal physical address, mmap length, and exported region + * size. + */ +struct metal_uio_map_info { + const char *dev_name; + metal_phys_addr_t map_addr; + unsigned long map_size; + unsigned long offset; + metal_phys_addr_t *phys; + size_t *map_len; + size_t *region_size; +}; + static struct linux_bus *to_linux_bus(struct metal_bus *bus) { return metal_container_of(bus, struct linux_bus, bus); @@ -100,6 +124,94 @@ static int metal_uio_read_map_attr(struct linux_device *ldev, return 0; } +/** + * @internal + * + * @brief Validate the sysfs map offset before applying it to the mmap() base. + * + * The Linux UIO ABI exposes one mmap slot per page-sized index, so the + * per-map offset must remain within a single host page. + * + * The offset is applied inside one page returned by mmap(). Larger offsets + * cannot be represented by adjusting the returned mapping. + * + * @param[in] dev_name Device name used for error reporting; may be NULL. + * @param[in] offset Offset to validate, in bytes. + * @return 0 on success, or -EINVAL if the offset exceeds the host page size. + */ +static int metal_linux_uio_validate_offset(const char *dev_name, + unsigned long offset) +{ + const unsigned long page_size = (unsigned long)getpagesize(); + + if (offset >= page_size) { + metal_log(METAL_LOG_ERROR, + "device %s has invalid UIO offset 0x%lx (page size 0x%lx)\n", + dev_name ? dev_name : "", offset, page_size); + return -EINVAL; + } + + return 0; +} + +/** + * @internal + * + * @brief Translate UIO sysfs map attributes into libmetal map information. + * + * This fills in the values required by libmetal: the mmap() length used for + * cleanup, the usable physical start address, and the usable I/O region size + * after skipping the map offset. + * + * @param[in,out] info Pointer to the map information structure to populate. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_linux_uio_map_info(struct metal_uio_map_info *info) +{ + int result; + + if (!info || !info->phys || !info->map_len || !info->region_size) + return -EINVAL; + + result = metal_linux_uio_validate_offset(info->dev_name, info->offset); + if (result) + return result; + + if (info->offset >= info->map_size) { + metal_log(METAL_LOG_ERROR, + "device %s has invalid UIO size 0x%lx for offset 0x%lx\n", + info->dev_name ? info->dev_name : "", + info->map_size, info->offset); + return -EINVAL; + } + + if (info->map_size > SIZE_MAX) { + metal_log(METAL_LOG_ERROR, + "device %s UIO size 0x%lx overflows size_t\n", + info->dev_name ? info->dev_name : "", + info->map_size); + return -EOVERFLOW; + } + + if (info->map_addr + info->offset < info->map_addr) { + metal_log(METAL_LOG_ERROR, + "device %s UIO physical address overflow (addr=0x%lx offset=0x%lx)\n", + info->dev_name ? info->dev_name : "", + (unsigned long)info->map_addr, info->offset); + return -EOVERFLOW; + } + + /* + * mmap() uses the full page-aligned map. libmetal clients see only the + * usable resource that starts at offset bytes into that mapping. + */ + *info->phys = info->map_addr + info->offset; + *info->map_len = (size_t)info->map_size; + *info->region_size = (size_t)(info->map_size - info->offset); + + return 0; +} + static int metal_uio_dev_bind(struct linux_device *ldev, struct linux_driver *ldrv) { @@ -155,11 +267,15 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) { char *instance, path[SYSFS_PATH_MAX]; struct linux_driver *ldrv = ldev->ldrv; - unsigned long *phys, offset = 0, size = 0; + unsigned long offset = 0, size = 0; + metal_phys_addr_t addr = 0, *phys; struct metal_io_region *io; + struct metal_uio_map_info map_info; + size_t map_len, region_size; struct dlist *dlist; int result, i; - void *virt; + unsigned int j; + void *raw, *virt; int irq_info; @@ -177,35 +293,48 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) result = metal_uio_dev_bind(ldev, ldrv); if (result) - return result; + goto fail; result = snprintf(path, sizeof(path), "%s/uio", ldev->sdev->path); - if (result >= (int)sizeof(path)) - return -EOVERFLOW; + if (result < 0 || result >= (int)sizeof(path)) { + result = -EOVERFLOW; + goto fail; + } dlist = sysfs_open_directory_list(path); if (!dlist) { metal_log(METAL_LOG_ERROR, "failed to scan class path %s\n", path); - return -errno; + result = -errno; + goto fail; } dlist_for_each_data(dlist, instance, char) { result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), "%s/%s", path, instance); - if (result >= (int)sizeof(ldev->cls_path)) - return -EOVERFLOW; + if (result < 0 || result >= (int)sizeof(ldev->cls_path)) { + result = -EOVERFLOW; + goto close_list; + } result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), "/dev/%s", instance); - if (result >= (int)sizeof(ldev->dev_path)) - return -EOVERFLOW; + if (result < 0 || result >= (int)sizeof(ldev->dev_path)) { + result = -EOVERFLOW; + goto close_list; + } break; } + result = 0; + +close_list: sysfs_close_list(dlist); + if (result) + goto fail; if (sysfs_path_is_dir(ldev->cls_path) != 0) { metal_log(METAL_LOG_ERROR, "invalid device class path %s\n", ldev->cls_path); - return -ENODEV; + result = -ENODEV; + goto fail; } i = 0; @@ -218,34 +347,72 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) if (i >= 1000) { metal_log(METAL_LOG_ERROR, "failed to open file %s, timeout.\n", ldev->dev_path); - return -ENODEV; + result = -ENODEV; + goto fail; } result = metal_open(ldev->dev_path, 0); if (result < 0) { metal_log(METAL_LOG_ERROR, "failed to open device %s\n", ldev->dev_path, strerror(-result)); - return result; + goto fail; } ldev->fd = result; metal_log(METAL_LOG_DEBUG, "opened %s:%s as %s\n", lbus->bus_name, ldev->dev_name, ldev->dev_path); - for (i = 0, result = 0; !result && i < METAL_MAX_DEVICE_REGIONS; i++) { + for (i = 0; i < METAL_MAX_DEVICE_REGIONS; i++) { phys = &ldev->region_phys[ldev->device.num_regions]; + result = metal_uio_read_map_attr(ldev, i, "offset", &offset); + /* + * A missing offset for the next map marks the end of the UIO + * map list. Other read errors are real open failures. + */ + if (result == -ENOENT) + break; + if (result) + goto fail; result = (result ? result : - metal_uio_read_map_attr(ldev, i, "offset", &offset)); - result = (result ? result : - metal_uio_read_map_attr(ldev, i, "addr", phys)); + metal_uio_read_map_attr(ldev, i, "addr", &addr)); result = (result ? result : metal_uio_read_map_attr(ldev, i, "size", &size)); - result = (result ? result : - metal_map(ldev->fd, i * getpagesize(), size, 0, 0, &virt)); - if (!result) { - io = &ldev->device.regions[ldev->device.num_regions]; - metal_io_init(io, virt, phys, size, -1, 0, NULL); - ldev->device.num_regions++; + if (result) + goto fail; + /* + * UIO sysfs reports addr/size/offset separately. Convert them + * before mmap() so the raw mapping and exposed region stay in + * sync for both normal access and close-time unmap. + */ + map_info.dev_name = ldev->dev_name; + map_info.map_addr = addr; + map_info.map_size = size; + map_info.offset = offset; + map_info.phys = phys; + map_info.map_len = &map_len; + map_info.region_size = ®ion_size; + result = metal_linux_uio_map_info(&map_info); + if (result) + goto fail; + result = metal_map(ldev->fd, i * getpagesize(), map_len, 0, 0, + &raw); + if (result) { + metal_log(METAL_LOG_ERROR, + "failed to mmap device %s map%u (len=0x%zx offset=0x%lx): %s\n", + ldev->dev_name, i, map_len, + (unsigned long)i * (unsigned long)getpagesize(), + strerror(-result)); + goto fail; } + virt = (void *)((char *)raw + offset); + /* + * Keep the raw mapping for munmap(); expose the adjusted + * address as the usable libmetal I/O region. + */ + io = &ldev->device.regions[ldev->device.num_regions]; + metal_io_init(io, virt, phys, region_size, -1, 0, NULL); + ldev->region_map_raw[ldev->device.num_regions] = raw; + ldev->region_map_len[ldev->device.num_regions] = map_len; + ldev->device.num_regions++; } irq_info = 1; @@ -262,6 +429,31 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) } return 0; + +fail: + for (j = 0; j < ldev->device.num_regions; j++) { + metal_unmap(ldev->region_map_raw[j], + ldev->region_map_len[j]); + ldev->region_map_raw[j] = NULL; + ldev->region_map_len[j] = 0; + } + ldev->device.num_regions = 0; + ldev->device.irq_num = 0; + ldev->device.irq_info = (void *)-1; + if (ldev->override) { + sysfs_write_attribute(ldev->override, "", 1); + ldev->override = NULL; + } + if (ldev->sdev) { + sysfs_close_device(ldev->sdev); + ldev->sdev = NULL; + } + if (ldev->fd >= 0) { + close(ldev->fd); + ldev->fd = -1; + } + + return result; } static void metal_uio_dev_close(struct linux_bus *lbus, @@ -271,8 +463,10 @@ static void metal_uio_dev_close(struct linux_bus *lbus, unsigned int i; for (i = 0; i < ldev->device.num_regions; i++) { - metal_unmap(ldev->device.regions[i].virt, - ldev->device.regions[i].size); + metal_unmap(ldev->region_map_raw[i], + ldev->region_map_len[i]); + ldev->region_map_raw[i] = NULL; + ldev->region_map_len[i] = 0; } if (ldev->override) { sysfs_write_attribute(ldev->override, "", 1); From 0679571c83d5055271dbe68ad4e7ae4396e66569 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Wed, 20 May 2026 14:07:17 -0700 Subject: [PATCH 3/6] lib: linux: clear UIO IRQ bookkeeping on close A UIO-backed device registers its file descriptor with the Linux IRQ controller so interrupt handling can find the owning metal device. Closing the device must clear that association before closing the fd. Add an internal unregister helper that detaches the device pointer after the IRQ consumer has disabled the IRQ. Keep IRQ handler and enable-state teardown owned by the standard IRQ disable and unregister paths. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 10 +++++++ lib/system/linux/irq.c | 58 +++++++++++++++++++++++++++++++++++++-- lib/system/linux/irq.h | 27 ++++++++++++++++++ 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index a4eac73f7..7c9ae95a6 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -462,12 +462,21 @@ static void metal_uio_dev_close(struct linux_bus *lbus, (void)lbus; unsigned int i; + if (ldev->fd >= 0) { + /* + * Disable first so unregister only removes device bookkeeping; + * IRQ handler teardown remains in the generic IRQ path. + */ + metal_irq_disable(ldev->fd); + metal_linux_irq_unregister_dev(ldev->fd); + } for (i = 0; i < ldev->device.num_regions; i++) { metal_unmap(ldev->region_map_raw[i], ldev->region_map_len[i]); ldev->region_map_raw[i] = NULL; ldev->region_map_len[i] = 0; } + ldev->device.num_regions = 0; if (ldev->override) { sysfs_write_attribute(ldev->override, "", 1); ldev->override = NULL; @@ -478,6 +487,7 @@ static void metal_uio_dev_close(struct linux_bus *lbus, } if (ldev->fd >= 0) { close(ldev->fd); + ldev->fd = -1; } } diff --git a/lib/system/linux/irq.c b/lib/system/linux/irq.c index 5d84ee015..3a83e1bd7 100644 --- a/lib/system/linux/irq.c +++ b/lib/system/linux/irq.c @@ -186,8 +186,8 @@ static void *metal_linux_irq_handling(void *args) int fd; fd = pfds[i].fd; - dev = irqs_devs[fd]; metal_mutex_acquire(&irq_lock); + dev = irqs_devs[fd]; if (metal_irq_handle(&irqs[fd], fd) == METAL_IRQ_HANDLED) irq_handled = 1; @@ -266,10 +266,64 @@ void metal_linux_irq_shutdown(void) void metal_linux_irq_register_dev(struct metal_device *dev, int irq) { - if (irq > MAX_IRQS) { + if (irq < 0 || irq >= MAX_IRQS) { metal_log(METAL_LOG_ERROR, "Failed to register device to irq %d\n", irq); return; } + metal_mutex_acquire(&irq_lock); irqs_devs[irq] = dev; + metal_mutex_release(&irq_lock); +} + +int metal_linux_irq_unregister_dev(int irq) +{ + int offset; + + if (irq < linux_irq_cntr.irq_base || + irq >= linux_irq_cntr.irq_base + linux_irq_cntr.irq_num) { + metal_log(METAL_LOG_ERROR, + "Failed to unregister device from irq %d\n", irq); + return -EINVAL; + } + + offset = irq - linux_irq_cntr.irq_base; + metal_mutex_acquire(&irq_lock); + /* + * Unregister only detaches the device association. The IRQ handler and + * enabled state remain owned by metal_irq_disable()/unregister(). + */ + if (metal_bitmap_is_bit_set(irqs_enabled, offset)) { + metal_mutex_release(&irq_lock); + return -EINVAL; + } + irqs_devs[irq] = NULL; + metal_mutex_release(&irq_lock); + + return 0; +} + +struct metal_device *metal_linux_irq_get_dev(int irq) +{ + struct metal_device *dev; + + if (irq < linux_irq_cntr.irq_base || + irq >= linux_irq_cntr.irq_base + linux_irq_cntr.irq_num) + return NULL; + + metal_mutex_acquire(&irq_lock); + dev = irqs_devs[irq]; + metal_mutex_release(&irq_lock); + + return dev; +} + +int metal_linux_irq_is_enabled(int irq) +{ + if (irq < linux_irq_cntr.irq_base || + irq >= linux_irq_cntr.irq_base + linux_irq_cntr.irq_num) + return 0; + + return metal_bitmap_is_bit_set(irqs_enabled, + irq - linux_irq_cntr.irq_base); } diff --git a/lib/system/linux/irq.h b/lib/system/linux/irq.h index ff02b7e48..b6dedb719 100644 --- a/lib/system/linux/irq.h +++ b/lib/system/linux/irq.h @@ -29,6 +29,33 @@ */ void metal_linux_irq_register_dev(struct metal_device *dev, int irq); +/** + * @brief Unregister the metal device associated with a Linux IRQ. + * + * Metal Linux internal function to clear device bookkeeping for an IRQ. The + * IRQ consumer must disable the IRQ before unregistering the device. + * + * @param[in] irq interrupt id + * @return 0 on success, or -errno on error. + */ +int metal_linux_irq_unregister_dev(int irq); + +/** + * @brief Get the metal device associated with a Linux IRQ. + * + * @param[in] irq interrupt id + * @return Registered metal device, or NULL if none is registered. + */ +struct metal_device *metal_linux_irq_get_dev(int irq); + +/** + * @brief Check whether a Linux IRQ is enabled. + * + * @param[in] irq interrupt id + * @return 1 if the IRQ is enabled, or 0 otherwise. + */ +int metal_linux_irq_is_enabled(int irq); + #endif /* METAL_INTERNAL */ #define __METAL_LINUX_IRQ__H__ From 4015471f56efd059894ee86bdeb1291fafccc71f Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 8 Jun 2026 09:47:50 -0700 Subject: [PATCH 4/6] lib: linux: factor common UIO populate path Split the UIO open flow into two stages. The parent-bus path still opens the platform or PCI sysfs device, binds it to the selected UIO driver, finds the child UIO class device, and records the resolved class and /dev paths. Move the common stage into metal_uio_populate(). That helper waits for the /dev/uioX node, opens it, reads each UIO map, maps the full mmap extent, exposes the usable region after the sysfs offset, and registers IRQ bookkeeping when the UIO fd supports interrupts. Keep close-time cleanup unchanged by storing the raw mmap address and length alongside the adjusted libmetal I/O region. On populate failure, unmap any regions mapped so far and close the UIO fd locally before the generic open path releases parent sysfs and driver override state. Also make local error paths close the temporary UIO child list before returning. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 259 ++++++++++++++++++++++++++------------ 1 file changed, 182 insertions(+), 77 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index 7c9ae95a6..fa0accb40 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -59,7 +59,13 @@ struct linux_device { struct metal_device device; char dev_name[PATH_MAX]; char dev_path[PATH_MAX]; + /* + * UIO sysfs class directory, such as /sys/class/uio/uio0. UIO map + * attributes are read relative to this path. + */ char cls_path[PATH_MAX]; + char uio_name[PATH_MAX]; + char uio_dev_name[PATH_MAX]; metal_phys_addr_t region_phys[METAL_MAX_DEVICE_REGIONS]; void *region_map_raw[METAL_MAX_DEVICE_REGIONS]; size_t region_map_len[METAL_MAX_DEVICE_REGIONS]; @@ -124,6 +130,40 @@ static int metal_uio_read_map_attr(struct linux_device *ldev, return 0; } +/** + * @internal + * + * @brief Read a string-valued UIO sysfs attribute. + * + * The value is explicitly terminated so callers can compare it as a C string. + * + * @param[in] path Path to the UIO sysfs attribute. + * @param[out] value Buffer that receives the attribute value. + * @param[in] len Size of value in bytes. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_uio_read_str_attr(const char *path, char *value, size_t len) +{ + struct sysfs_attribute *attr; + int result = 0; + + if (!value || !len) + return -EINVAL; + + attr = sysfs_open_attribute(path); + if (!attr || sysfs_read_attribute(attr) != 0) { + result = -errno; + goto close_attr; + } + + strncpy(value, attr->value, len - 1); + value[len - 1] = '\0'; + +close_attr: + sysfs_close_attribute(attr); + return result; +} + /** * @internal * @@ -263,81 +303,30 @@ static int metal_uio_dev_bind(struct linux_device *ldev, return 0; } -static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) +/** + * @internal + * + * @brief Populate common UIO device state from resolved UIO paths. + * + * Both parent-bus opens and class-name opens share the same mmap, IRQ + * registration, DMA, and close-time cleanup rules. + * + * @param[in] lbus Linux bus used for diagnostics. + * @param[in,out] ldev Linux device with cls_path and dev_path already set. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_uio_populate(struct linux_bus *lbus, struct linux_device *ldev) { - char *instance, path[SYSFS_PATH_MAX]; - struct linux_driver *ldrv = ldev->ldrv; unsigned long offset = 0, size = 0; metal_phys_addr_t addr = 0, *phys; struct metal_io_region *io; struct metal_uio_map_info map_info; size_t map_len, region_size; - struct dlist *dlist; - int result, i; + int result, i = 0; unsigned int j; void *raw, *virt; int irq_info; - - ldev->fd = -1; - ldev->device.irq_info = (void *)-1; - - ldev->sdev = sysfs_open_device(lbus->bus_name, ldev->dev_name); - if (!ldev->sdev) { - metal_log(METAL_LOG_ERROR, "device %s:%s not found\n", - lbus->bus_name, ldev->dev_name); - return -ENODEV; - } - metal_log(METAL_LOG_DEBUG, "opened sysfs device %s:%s\n", - lbus->bus_name, ldev->dev_name); - - result = metal_uio_dev_bind(ldev, ldrv); - if (result) - goto fail; - - result = snprintf(path, sizeof(path), "%s/uio", ldev->sdev->path); - if (result < 0 || result >= (int)sizeof(path)) { - result = -EOVERFLOW; - goto fail; - } - dlist = sysfs_open_directory_list(path); - if (!dlist) { - metal_log(METAL_LOG_ERROR, "failed to scan class path %s\n", - path); - result = -errno; - goto fail; - } - - dlist_for_each_data(dlist, instance, char) { - result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), - "%s/%s", path, instance); - if (result < 0 || result >= (int)sizeof(ldev->cls_path)) { - result = -EOVERFLOW; - goto close_list; - } - result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), - "/dev/%s", instance); - if (result < 0 || result >= (int)sizeof(ldev->dev_path)) { - result = -EOVERFLOW; - goto close_list; - } - break; - } - result = 0; - -close_list: - sysfs_close_list(dlist); - if (result) - goto fail; - - if (sysfs_path_is_dir(ldev->cls_path) != 0) { - metal_log(METAL_LOG_ERROR, "invalid device class path %s\n", - ldev->cls_path); - result = -ENODEV; - goto fail; - } - - i = 0; do { if (!access(ldev->dev_path, F_OK)) break; @@ -347,14 +336,13 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) if (i >= 1000) { metal_log(METAL_LOG_ERROR, "failed to open file %s, timeout.\n", ldev->dev_path); - result = -ENODEV; - goto fail; + return -ENODEV; } result = metal_open(ldev->dev_path, 0); if (result < 0) { - metal_log(METAL_LOG_ERROR, "failed to open device %s\n", + metal_log(METAL_LOG_ERROR, "failed to open device %s: %s\n", ldev->dev_path, strerror(-result)); - goto fail; + return result; } ldev->fd = result; @@ -440,14 +428,6 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) ldev->device.num_regions = 0; ldev->device.irq_num = 0; ldev->device.irq_info = (void *)-1; - if (ldev->override) { - sysfs_write_attribute(ldev->override, "", 1); - ldev->override = NULL; - } - if (ldev->sdev) { - sysfs_close_device(ldev->sdev); - ldev->sdev = NULL; - } if (ldev->fd >= 0) { close(ldev->fd); ldev->fd = -1; @@ -456,6 +436,131 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) return result; } +static void metal_uio_dev_close(struct linux_bus *lbus, + struct linux_device *ldev); + +/** + * @internal + * + * @brief Open a parent-bus device that exposes a UIO child. + * + * This path binds the parent device to a UIO driver before using the common + * UIO populate logic. + * + * @param[in] lbus Linux bus containing the parent device. + * @param[in,out] ldev Linux device to open. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) +{ + char *instance, path[SYSFS_PATH_MAX]; + struct linux_driver *ldrv = ldev->ldrv; + struct dlist *dlist; + int result; + + ldev->fd = -1; + ldev->device.irq_info = (void *)-1; + + ldev->sdev = sysfs_open_device(lbus->bus_name, ldev->dev_name); + if (!ldev->sdev) { + metal_log(METAL_LOG_ERROR, "device %s:%s not found\n", + lbus->bus_name, ldev->dev_name); + return -ENODEV; + } + metal_log(METAL_LOG_DEBUG, "opened sysfs device %s:%s\n", + lbus->bus_name, ldev->dev_name); + /* + * Error paths after this point clean up locally. The common open loop + * may call dev_close() again, so close must tolerate partial cleanup. + */ + + /* + * Parent-bus opens still need the requested platform or PCI device + * bound to the selected UIO driver before a /dev/uioX node can exist. + */ + result = metal_uio_dev_bind(ldev, ldrv); + if (result) + goto fail; + + /* + * A bound parent device exposes one UIO child below its sysfs device + * directory. Use that child name to derive both sysfs and /dev paths. + */ + result = snprintf(path, sizeof(path), "%s/uio", ldev->sdev->path); + if (result < 0 || result >= (int)sizeof(path)) { + result = -EOVERFLOW; + goto fail; + } + dlist = sysfs_open_directory_list(path); + if (!dlist) { + metal_log(METAL_LOG_ERROR, "failed to scan class path %s\n", + path); + result = -errno; + goto fail; + } + + dlist_for_each_data(dlist, instance, char) { + /* + * The first UIO child is the device node this parent-bus open + * will use for mmap, IRQ, and DMA operations. + */ + result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), + "%s/%s", path, instance); + if (result < 0 || result >= (int)sizeof(ldev->cls_path)) { + result = -EOVERFLOW; + goto close_list; + } + result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), + "/dev/%s", instance); + if (result < 0 || result >= (int)sizeof(ldev->dev_path)) { + result = -EOVERFLOW; + goto close_list; + } + result = snprintf(path, sizeof(path), "%s/name", ldev->cls_path); + if (result < 0 || result >= (int)sizeof(path)) { + result = -EOVERFLOW; + goto close_list; + } + ldev->uio_name[0] = '\0'; + metal_uio_read_str_attr(path, ldev->uio_name, + sizeof(ldev->uio_name)); + result = snprintf(ldev->uio_dev_name, + sizeof(ldev->uio_dev_name), "%s", instance); + if (result < 0 || result >= (int)sizeof(ldev->uio_dev_name)) { + result = -EOVERFLOW; + goto close_list; + } + break; + } + + sysfs_close_list(dlist); + result = 0; + + /* Refuse to continue if the selected UIO class path disappeared. */ + if (sysfs_path_is_dir(ldev->cls_path) != 0) { + metal_log(METAL_LOG_ERROR, "invalid device class path %s\n", + ldev->cls_path); + result = -ENODEV; + goto fail; + } + + /* + * Once cls_path and dev_path are resolved, the rest of the open flow is + * shared with the synthetic UIO class-name path. + */ + result = metal_uio_populate(lbus, ldev); + if (result) + goto fail; + + return 0; + +close_list: + sysfs_close_list(dlist); +fail: + metal_uio_dev_close(lbus, ldev); + return result; +} + static void metal_uio_dev_close(struct linux_bus *lbus, struct linux_device *ldev) { From e77402da3fb711a492120c74e6609995c07e6888 Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 8 Jun 2026 09:48:38 -0700 Subject: [PATCH 5/6] lib: linux: add UIO class-name lookup Add the resolver used by the synthetic uio bus. It scans every /sys/class/uio/uioX/name file, compares the first line against the requested libmetal device name, and rejects duplicate matches because they cannot be opened deterministically. When a unique match is found, fill the same linux_device fields that the parent-bus UIO path fills: cls_path points at the UIO sysfs class directory, dev_path points at /dev/uioX, and the UIO name and device node name are saved for diagnostics and future callers. The class-name open callback then reuses metal_uio_populate(), so UIO class opens and parent-bus UIO opens share mmap setup, IRQ registration, DMA handling, and close-time cleanup. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 177 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index fa0accb40..5fb6e967f 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -15,10 +15,13 @@ #include #include +#include +#include #include "irq.h" #define MAX_DRIVERS 64 +#define METAL_UIO_CLASS_PATH "/sys/class/uio" struct linux_bus; struct linux_device; @@ -105,6 +108,48 @@ static struct linux_device *to_linux_device(struct metal_device *device) return metal_container_of(device, struct linux_device, device); } +/** + * @internal + * + * @brief Read the first text line from a sysfs file. + * + * The trailing newline is stripped so callers can compare sysfs text values as + * C strings. + * + * @param[in] path Path to the sysfs file. + * @param[out] output Buffer that receives the first line. + * @param[in] output_len Size of output in bytes. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_linux_read_first_line(const char *path, char *output, + size_t output_len) +{ + FILE *fp; + char *newline; + int result = 0; + + if (!path || !output || output_len < 2) + return -EINVAL; + + fp = fopen(path, "r"); + if (!fp) + return -errno; + + if (!fgets(output, output_len, fp)) { + result = ferror(fp) ? -errno : -ENODATA; + goto close_file; + } + + newline = strchr(output, '\n'); + if (newline) + *newline = '\0'; + +close_file: + fclose(fp); + + return result; +} + static int metal_uio_read_map_attr(struct linux_device *ldev, unsigned int index, const char *name, @@ -252,6 +297,108 @@ static int metal_linux_uio_map_info(struct metal_uio_map_info *info) return 0; } +/** + * @internal + * + * @brief Find a UIO class device by its exported name. + * + * This scans /sys/class/uio/uioX/name for the requested libmetal device name. + * The UIO class name must uniquely identify the device because there is no + * parent platform or PCI sysfs device to bind through first. + * + * @param[in] uio_name UIO name to search for. + * @param[out] ldev Linux device to populate with the resolved paths. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_uio_find_device_by_name(const char *uio_name, + struct linux_device *ldev) +{ + DIR *dir; + struct dirent *entry; + char path[PATH_MAX]; + char value[PATH_MAX]; + bool found = false; + int result = -ENODEV; + + if (!uio_name || !strlen(uio_name) || !ldev) + return -EINVAL; + + dir = opendir(METAL_UIO_CLASS_PATH); + if (!dir) { + result = errno == ENOENT ? -ENODEV : -errno; + return result; + } + + /* + * Walk every UIO class device and compare its reported name against the + * requested libmetal name. Continue after a match so duplicate names can + * be detected instead of silently choosing a nondeterministic device. + */ + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, "uio", 3) != 0) + continue; + + result = snprintf(path, sizeof(path), "%s/%s/name", + METAL_UIO_CLASS_PATH, entry->d_name); + if (result < 0 || result >= (int)sizeof(path)) { + result = -EOVERFLOW; + goto out; + } + + result = metal_linux_read_first_line(path, value, + sizeof(value)); + if (result) + continue; + + if (strcmp(value, uio_name) != 0) + continue; + + if (found) { + /* Duplicate names cannot be opened deterministically. */ + result = -EEXIST; + goto out; + } + found = true; + + result = snprintf(ldev->cls_path, sizeof(ldev->cls_path), + "%s/%s", METAL_UIO_CLASS_PATH, + entry->d_name); + if (result < 0 || result >= (int)sizeof(ldev->cls_path)) { + result = -EOVERFLOW; + goto out; + } + /* + * Fill the same fields as the parent-bus UIO path so both + * open modes can share metal_uio_populate(). + */ + result = snprintf(ldev->dev_path, sizeof(ldev->dev_path), + "/dev/%s", entry->d_name); + if (result < 0 || result >= (int)sizeof(ldev->dev_path)) { + result = -EOVERFLOW; + goto out; + } + result = snprintf(ldev->uio_name, sizeof(ldev->uio_name), + "%s", value); + if (result < 0 || result >= (int)sizeof(ldev->uio_name)) { + result = -EOVERFLOW; + goto out; + } + result = snprintf(ldev->uio_dev_name, + sizeof(ldev->uio_dev_name), "%s", + entry->d_name); + if (result < 0 || result >= (int)sizeof(ldev->uio_dev_name)) { + result = -EOVERFLOW; + goto out; + } + } + + result = found ? 0 : -ENODEV; + +out: + closedir(dir); + return result; +} + static int metal_uio_dev_bind(struct linux_device *ldev, struct linux_driver *ldrv) { @@ -561,6 +708,36 @@ static int metal_uio_dev_open(struct linux_bus *lbus, struct linux_device *ldev) return result; } +/** + * @internal + * + * @brief Open a UIO class device through the synthetic UIO bus. + * + * The device name is matched against /sys/class/uio/uioX/name. No parent + * sysfs device is available on this path. + * + * @param[in] lbus Synthetic UIO bus. + * @param[in,out] ldev Linux device to open. + * @return 0 on success, or a negative error code on failure. + */ +static int metal_uio_class_dev_open(struct linux_bus *lbus, + struct linux_device *ldev) +{ + int result; + + ldev->fd = -1; + ldev->device.irq_info = (void *)-1; + + result = metal_uio_find_device_by_name(ldev->dev_name, ldev); + if (result) { + metal_log(METAL_LOG_ERROR, "UIO device %s not found\n", + ldev->dev_name); + return result; + } + + return metal_uio_populate(lbus, ldev); +} + static void metal_uio_dev_close(struct linux_bus *lbus, struct linux_device *ldev) { From 6e9937c30c29cc550da4f1adcaf906840853ec0a Mon Sep 17 00:00:00 2001 From: Ben Levinsky Date: Mon, 8 Jun 2026 09:49:20 -0700 Subject: [PATCH 6/6] lib: linux: register synthetic UIO bus Register a synthetic Linux uio bus so callers can use the existing metal_device_open("uio", name, ...) API shape to open UIO devices by the value exported in /sys/class/uio/uioX/name. This bus is not backed by a sysfs bus directory or a probed kernel driver handle. During Linux bus initialization, register it only when /sys/class/uio exists, and skip the normal sysfs bus and driver probing that platform and PCI devices require. During device open, allow the synthetic uio driver to run its class-name open callback without an sdrv handle. The callback resolves the UIO class device and then uses the shared populate path added earlier, so the new bus preserves the same mmap, IRQ, DMA, and close semantics as existing UIO-backed platform and PCI opens. Also make bus close tolerate the missing sysfs bus handle and copy the requested device name with snprintf() so oversized names fail cleanly. Signed-off-by: Ben Levinsky --- lib/system/linux/device.c | 75 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/lib/system/linux/device.c b/lib/system/linux/device.c index 5fb6e967f..79ac82446 100644 --- a/lib/system/linux/device.c +++ b/lib/system/linux/device.c @@ -108,6 +108,22 @@ static struct linux_device *to_linux_device(struct metal_device *device) return metal_container_of(device, struct linux_device, device); } +/** + * @internal + * + * @brief Check whether a Linux bus is the synthetic UIO bus. + * + * The synthetic UIO bus opens devices directly from /sys/class/uio instead of + * through a sysfs bus directory and parent-bus drivers. + * + * @param[in] lbus Linux bus to check. + * @return true if the bus is the synthetic UIO bus, otherwise false. + */ +static bool metal_linux_is_uio_bus(const struct linux_bus *lbus) +{ + return strcmp(lbus->bus_name, "uio") == 0; +} + /** * @internal * @@ -553,8 +569,7 @@ static int metal_uio_populate(struct linux_bus *lbus, struct linux_device *ldev) irq_info = 1; if (write(ldev->fd, &irq_info, sizeof(irq_info)) <= 0) { metal_log(METAL_LOG_INFO, - "%s: No IRQ for device %s.\n", - __func__, ldev->dev_name); + "No IRQ for device %s.\n", ldev->dev_name); ldev->device.irq_num = 0; ldev->device.irq_info = (void *)-1; } else { @@ -849,6 +864,22 @@ static void metal_uio_dev_dma_unmap(struct linux_bus *lbus, } static struct linux_bus linux_bus[] = { + { + .bus_name = "uio", + .drivers = { + { + .drv_name = "uio", + .mod_name = "uio", + .cls_name = "uio", + .dev_open = metal_uio_class_dev_open, + .dev_close = metal_uio_dev_close, + .dev_irq_ack = metal_uio_dev_irq_ack, + .dev_dma_map = metal_uio_dev_dma_map, + .dev_dma_unmap = metal_uio_dev_dma_unmap, + }, + { 0 /* sentinel */ } + } + }, { .bus_name = "platform", .drivers = { @@ -924,12 +955,18 @@ static int metal_linux_dev_open(struct metal_bus *bus, for_each_linux_driver(lbus, ldrv) { /* Check if we have a viable driver. */ - if (!ldrv->sdrv || !ldrv->dev_open) + if (!ldrv->dev_open || + (!metal_linux_is_uio_bus(lbus) && !ldrv->sdrv)) continue; /* Reset device data. */ memset(ldev, 0, sizeof(*ldev)); - strncpy(ldev->dev_name, dev_name, sizeof(ldev->dev_name) - 1); + error = snprintf(ldev->dev_name, sizeof(ldev->dev_name), + "%s", dev_name); + if (error < 0 || error >= (int)sizeof(ldev->dev_name)) { + error = -EOVERFLOW; + goto out; + } ldev->fd = -1; ldev->ldrv = ldrv; ldev->device.bus = bus; @@ -974,6 +1011,16 @@ static void metal_linux_dev_close(struct metal_bus *bus, free(ldev); } +/** + * @internal + * + * @brief Close a Linux bus and any probed sysfs driver handles. + * + * The synthetic UIO bus has no sysfs bus handle, so the bus handle is closed + * only when one was opened during probing. + * + * @param[in] bus Metal bus to close. + */ static void metal_linux_bus_close(struct metal_bus *bus) { struct linux_bus *lbus = to_linux_bus(bus); @@ -985,7 +1032,8 @@ static void metal_linux_bus_close(struct metal_bus *bus) ldrv->sdrv = NULL; } - sysfs_close_bus(lbus->sbus); + if (lbus->sbus) + sysfs_close_bus(lbus->sbus); lbus->sbus = NULL; } @@ -1084,11 +1132,28 @@ static int metal_linux_probe_driver(struct linux_bus *lbus, return ldrv->sdrv ? 0 : -ENODEV; } +/** + * @internal + * + * @brief Probe and register a Linux bus. + * + * The synthetic UIO bus is registered only when the /sys/class/uio class + * exists and does not use normal sysfs bus or driver probing. + * + * @param[in,out] lbus Linux bus to probe. + * @return 0 on success, or a negative error code on failure. + */ static int metal_linux_probe_bus(struct linux_bus *lbus) { struct linux_driver *ldrv; int ret, error = -ENODEV; + if (metal_linux_is_uio_bus(lbus)) { + if (sysfs_path_is_dir(METAL_UIO_CLASS_PATH) != 0) + return -ENODEV; + return metal_linux_register_bus(lbus); + } + lbus->sbus = sysfs_open_bus(lbus->bus_name); if (!lbus->sbus) return -ENODEV;