From 21d1a13a903c50d94eb24a1220c47031edefde60 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 6 Mar 2026 16:47:38 +0000 Subject: [PATCH 01/24] cxl: support Type2 when initializing cxl_dev_state BugLink: https://bugs.launchpad.net/bugs/2153819 In preparation for type2 drivers add function and macro for differentiating CXL memory expanders (type 3) from CXL device accelerators (type 2) helping drivers built from public headers to embed struct cxl_dev_state inside a private struct. Update type3 driver for using this same initialization. Signed-off-by: Alejandro Lucero Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260306164741.3796372-2-alejandro.lucero-palau@amd.com Signed-off-by: Dave Jiang (cherry picked from commit 9a775c07bb04384f7c03a35dd04818ed818c1f71) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit b3b0bd6c29f1b77917b3e07dc09855f227bc8ad2) --- drivers/cxl/cxlmem.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 776c50d1db51..700798d2f23c 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -440,6 +440,37 @@ to_cxl_memdev_state(struct cxl_dev_state *cxlds) return container_of(cxlds, struct cxl_memdev_state, cxlds); } +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox); + +/** + * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a + * driver specific struct. + * + * @parent: device behind the request + * @type: CXL device type + * @serial: device identification + * @dvsec: dvsec capability offset + * @drv_struct: driver struct embedding a cxl_dev_state struct + * @member: name of the struct cxl_dev_state member in drv_struct + * @mbox: true if mailbox supported + * + * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state + * struct initialized. + * + * Introduced for Type2 driver support. + */ +#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ + ({ \ + static_assert(__same_type(struct cxl_dev_state, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ + sizeof(drv_struct), mbox); \ + }) + enum cxl_opcode { CXL_MBOX_OP_INVALID = 0x0000, CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, -- Gitee From d08195e26c7e901a822999223fd6aa134ca22fee Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 6 Mar 2026 16:47:39 +0000 Subject: [PATCH 02/24] cxl: export internal structs for external Type2 drivers BugLink: https://bugs.launchpad.net/bugs/2153819 In preparation for type2 support, move structs and functions a type2 driver will need to access to into a new shared header file. Differentiate between public and private data to be preserved by type2 drivers. Signed-off-by: Alejandro Lucero Reviewed-by: Dave Jiang Tested-by: Alison Schofield Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260306164741.3796372-3-alejandro.lucero-palau@amd.com Signed-off-by: Dave Jiang (cherry picked from commit 005869886d1d370afb6c10cd40709d956960e9c2) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit c6ac38b966f9f6c122600da3360782c42e0e96f9) --- drivers/cxl/cxlmem.h | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 700798d2f23c..776c50d1db51 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -440,37 +440,6 @@ to_cxl_memdev_state(struct cxl_dev_state *cxlds) return container_of(cxlds, struct cxl_memdev_state, cxlds); } -struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, - enum cxl_devtype type, - u64 serial, u16 dvsec, - size_t size, bool has_mbox); - -/** - * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a - * driver specific struct. - * - * @parent: device behind the request - * @type: CXL device type - * @serial: device identification - * @dvsec: dvsec capability offset - * @drv_struct: driver struct embedding a cxl_dev_state struct - * @member: name of the struct cxl_dev_state member in drv_struct - * @mbox: true if mailbox supported - * - * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state - * struct initialized. - * - * Introduced for Type2 driver support. - */ -#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ - ({ \ - static_assert(__same_type(struct cxl_dev_state, \ - ((drv_struct *)NULL)->member)); \ - static_assert(offsetof(drv_struct, member) == 0); \ - (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ - sizeof(drv_struct), mbox); \ - }) - enum cxl_opcode { CXL_MBOX_OP_INVALID = 0x0000, CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, -- Gitee From f1e5255a054e9c0515c16ddb9c93004d9a5d8f87 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 6 Mar 2026 16:47:40 +0000 Subject: [PATCH 03/24] cxl: Move pci generic code from cxl_pci to core/cxl_pci BugLink: https://bugs.launchpad.net/bugs/2153819 Inside cxl/core/pci.c there are helpers for CXL PCIe initialization meanwhile cxl/pci_drv.c implements the functionality for a Type3 device initialization. In preparation for type2 support, move helper functions from cxl/pci.c to cxl/core/pci.c in order to be exported and used by type2 drivers. [ dj: Clarified subject. ] Signed-off-by: Alejandro Lucero Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Signed-off-by: Gregory Price Link: https://patch.msgid.link/20260306164741.3796372-4-alejandro.lucero-palau@amd.com Signed-off-by: Dave Jiang (cherry picked from commit 58f28930c7fb0e24cdf2972a9c3b7c91aeef4539) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit f20bbd18cc0d44efea3e09d21b55d238f749a788) --- drivers/cxl/core/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index d1f487b3d809..c32cc62c501d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -707,6 +707,11 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, .resource = CXL_RESOURCE_NONE, }; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); if (component_reg_phys == CXL_RESOURCE_NONE) return -ENXIO; -- Gitee From d9d9f3f3fafd621466b218a7f531858a1b0f18f1 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 6 Mar 2026 16:47:41 +0000 Subject: [PATCH 04/24] cxl/pci: Remove redundant cxl_pci_find_port() call BugLink: https://bugs.launchpad.net/bugs/2153819 Remove the redundant port lookup from cxl_rcrb_get_comp_regs() and use the dport parameter directly. The caller has already validated the port is non-NULL before invoking this function, and dport is given as a param. This is simpler than getting dport in the callee and return the pointer to the caller what would require more changes. Signed-off-by: Gregory Price Reviewed-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://patch.msgid.link/20260306164741.3796372-5-alejandro.lucero-palau@amd.com Signed-off-by: Dave Jiang (cherry picked from commit d537d953c47866bafc89feb66d8ef34baf17659a) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit 9dad3d47e247618fedf0351b1e754fb441e66a99) --- drivers/cxl/core/pci.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c32cc62c501d..d1f487b3d809 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -707,11 +707,6 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, .resource = CXL_RESOURCE_NONE, }; - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); if (component_reg_phys == CXL_RESOURCE_NONE) return -ENXIO; -- Gitee From c08ff96e72ddba4372d1967270c9fa106fdc5b00 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Thu, 23 Apr 2026 19:05:24 +0100 Subject: [PATCH 05/24] NVIDIA: VR: SAUCE: cxl: Prepare memdev creation for type2 BugLink: https://bugs.launchpad.net/bugs/2153819 Current cxl core is relying on a CXL_DEVTYPE_CLASSMEM type device when creating a memdev leading to problems when obtaining cxl_memdev_state references from a CXL_DEVTYPE_DEVMEM type. Modify check for obtaining cxl_memdev_state adding CXL_DEVTYPE_DEVMEM support. Make devm_cxl_add_memdev accessible from an accel driver. Signed-off-by: Alejandro Lucero Reviewed-by: Ben Cheatham Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Dan Williams (cherry picked from https://lore.kernel.org/r/20260423180528.17166-5-alejandro.lucero-palau@amd.com) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit 1b0bf4179e1ec4d96fce92643649a31a683a256a) --- drivers/cxl/core/memdev.c | 15 +++++++++++-- drivers/cxl/cxlmem.h | 6 ------ drivers/cxl/mem.c | 45 +++++++++++++++++++++++++++++---------- include/cxl/cxl.h | 8 +++++++ 4 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 80e65690eb77..0587a7509a6f 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "trace.h" #include "core.h" @@ -579,9 +580,16 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +static const struct device_type cxl_accel_memdev_type = { + .name = "cxl_accel_memdev", + .release = cxl_memdev_release, + .devnode = cxl_memdev_devnode, +}; + bool is_cxl_memdev(const struct device *dev) { - return dev->type == &cxl_memdev_type; + return (dev->type == &cxl_memdev_type || + dev->type == &cxl_accel_memdev_type); } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); @@ -710,7 +718,10 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); - dev->type = &cxl_memdev_type; + if (cxlds->type == CXL_DEVTYPE_DEVMEM) + dev->type = &cxl_accel_memdev_type; + else + dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); INIT_WORK(&cxlmd->detach_work, detach_memdev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 776c50d1db51..92cca400d113 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,10 +34,6 @@ (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ CXLMDEV_RESET_NEEDED_NOT) -struct cxl_memdev_attach { - int (*probe)(struct cxl_memdev *cxlmd); -}; - /** * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device * @dev: driver core device object @@ -103,8 +99,6 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds, const struct cxl_memdev_attach *attach); -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, - const struct cxl_memdev_attach *attach); int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index fcffe24dcb42..ff858318091f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -65,6 +65,26 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa) DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, cxl_debugfs_poison_clear, "%llx\n"); +static void cxl_memdev_poison_enable(struct cxl_memdev_state *mds, + struct cxl_memdev *cxlmd, + struct dentry *dentry) +{ + /* + * Avoid poison debugfs for DEVMEM aka accelerators as they rely on + * cxl_memdev_state. + */ + if (!mds) + return; + + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) + debugfs_create_file("inject_poison", 0200, dentry, cxlmd, + &cxl_poison_inject_fops); + + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) + debugfs_create_file("clear_poison", 0200, dentry, cxlmd, + &cxl_poison_clear_fops); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -92,12 +112,7 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); - if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) - debugfs_create_file("inject_poison", 0200, dentry, cxlmd, - &cxl_poison_inject_fops); - if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) - debugfs_create_file("clear_poison", 0200, dentry, cxlmd, - &cxl_poison_clear_fops); + cxl_memdev_poison_enable(mds, cxlmd, dentry); rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); if (rc) @@ -206,16 +221,24 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +static bool cxl_poison_attr_visible(struct kobject *kobj, struct attribute *a) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - if (a == &dev_attr_trigger_poison_list.attr) - if (!test_bit(CXL_POISON_ENABLED_LIST, - mds->poison.enabled_cmds)) - return 0; + if (!mds || + !test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) + return false; + + return true; +} + +static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_trigger_poison_list.attr && + !cxl_poison_attr_visible(kobj, a)) + return 0; return a->mode; } diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index fa7269154620..10a9b8fa2f6b 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -149,6 +149,10 @@ struct cxl_dpa_partition { #define CXL_NR_PARTITIONS_MAX 2 +struct cxl_memdev_attach { + int (*probe)(struct cxl_memdev *cxlmd); +}; + /** * struct cxl_dev_state - The driver device state * @@ -223,4 +227,8 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ sizeof(drv_struct), mbox); \ }) + +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); +struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach); #endif /* __CXL_CXL_H__ */ -- Gitee From 0cfd011e2b27f0f856a50c216c6ad499f196aac2 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Thu, 23 Apr 2026 19:05:26 +0100 Subject: [PATCH 06/24] NVIDIA: VR: SAUCE: cxl: attach region to an accelerator/type2 memdev BugLink: https://bugs.launchpad.net/bugs/2153819 Support an accelerator driver to safely work with an autodiscovered region from a committed HDM decoder through: 1) an accelerator driver cxl_attach_region struct with attach and detach callbacks. 2) a specific function, cxl_memdev_attach_region() keeping the required locks for finding a region linked to the memdev endpoint, and 3) invoking attach callback while keeping the locking allowing to work (ioremap and other internal stuff) with the related physical range by the accelerator driver, and 4) linking a detach callback to the endpoint device removal where the accelerator driver can stop using the region range. This covers the cases of a potential removal of cxl_acpi module or a accelerator memdev unbinding from cxl_mem driver through sysfs. Signed-off-by: Alejandro Lucero (backported from https://lore.kernel.org/r/20260423180528.17166-7-alejandro.lucero-palau@amd.com) [kobak: Check cxl_memdev_attach_region() errors and propagate failure so SFC probe does not continue after CXL core tears down the attached region. Set probe_data->cxl before attaching so the attach callback can use it, guard attach attempts before a valid endpoint exists, explicitly unwind attach/autoremove side effects if devres action registration fails, preserve DEVMEM target type for autodiscovered regions, and route delete / construct-failure cleanup through endpoint-owned devres actions.] [kobak: Keep no-detach DEVMEM unregister under the endpoint-device guard so attach cannot install endpoint devres actions for a region being freed.] [kobak: Avoid devres-registration failure cleanup under cxl_rwsem.region read lock: keep endpoint->dev locked, drop the region/DPA read guards before unregister_region(), and use devm_remove_action() so failed detach-action registration does not run cxl_endpoint_region_autoremove() under the read lock.] Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit 8e713680899db48ba544181a839d00c8df19d459) --- drivers/cxl/core/region.c | 184 ++++++++++++++++++++++++++++++++++++-- drivers/cxl/cxl.h | 4 + include/cxl/cxl.h | 17 ++++ 3 files changed, 197 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e50dc716d4e8..8cf6f0c89e4d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2559,6 +2559,41 @@ static void unregister_region(void *_cxlr) put_device(&cxlr->dev); } +static void cxl_endpoint_region_autoremove(void *_cxlr); + +static void cxl_region_release_action(struct cxl_region *cxlr) +{ + struct cxl_port *port = cxlrd_to_port(cxlr->cxlrd); + + if (cxlr->type != CXL_DECODER_DEVMEM) { + devm_release_action(port->uport_dev, unregister_region, cxlr); + return; + } + + if (cxlr->params.nr_targets) { + struct cxl_endpoint_decoder *cxled = cxlr->params.targets[0]; + struct cxl_port *endpoint = cxled_to_port(cxled); + + guard(device)(&endpoint->dev); + if (cxlr->detach) { + void (*detach)(void *data) = cxlr->detach; + void *detach_data = cxlr->detach_data; + + cxlr->detach = NULL; + cxlr->detach_data = NULL; + devm_release_action(&endpoint->dev, detach, detach_data); + devm_release_action(&endpoint->dev, + cxl_endpoint_region_autoremove, + cxlr); + } else { + unregister_region(cxlr); + } + return; + } + + unregister_region(cxlr); +} + static struct lock_class_key cxl_region_key; static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id) @@ -2711,9 +2746,16 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, if (rc) goto err; - rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); - if (rc) - return ERR_PTR(rc); + /* + * For accelerators/type2, region release linked to endpoint device. + * See handling of cxl_endpoint_region_autoremove() below by + * cxl_memdev_attach_region(). + */ + if (type == CXL_DECODER_HOSTONLYMEM) { + rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); + if (rc) + return ERR_PTR(rc); + } dev_dbg(port->uport_dev, "%s: created %s\n", dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev)); @@ -2764,7 +2806,6 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, memregion_free(rc); return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, target_type); } @@ -2836,14 +2877,13 @@ static ssize_t delete_region_store(struct device *dev, const char *buf, size_t len) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); - struct cxl_port *port = to_cxl_port(dev->parent); struct cxl_region *cxlr; cxlr = cxl_find_region_by_name(cxlrd, buf); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); - devm_release_action(port->uport_dev, unregister_region, cxlr); + cxl_region_release_action(cxlr); put_device(&cxlr->dev); return len; @@ -3709,7 +3749,6 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, { struct cxl_endpoint_decoder *cxled = ctx->cxled; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct cxl_dev_state *cxlds = cxlmd->cxlds; int rc, part = READ_ONCE(cxled->part); struct cxl_region *cxlr; @@ -3730,7 +3769,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, rc = __construct_region(cxlr, ctx); if (rc) { - devm_release_action(port->uport_dev, unregister_region, cxlr); + cxl_region_release_action(cxlr); return ERR_PTR(rc); } @@ -4043,6 +4082,135 @@ static int cxl_region_can_probe(struct cxl_region *cxlr) return 0; } +static int first_mapped_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + if (cxled->cxld.region) + return 1; + + return 0; +} + +/* + * As this is running in endpoint port remove context it does not race cxl_root + * destruction since port topologies are always removed depth first. + */ +static void cxl_endpoint_region_autoremove(void *_cxlr) +{ + unregister_region(_cxlr); +} + +/** + * cxl_memdev_attach_region - bind region to accelerator memdev + * + * @cxlmd: a pointer to cxl_memdev to use + * @attach: a pointer to region attach struct with callbacks for + * safely working with a region range by the caller + * + * Returns 0 or error. + */ +int cxl_memdev_attach_region(struct cxl_memdev *cxlmd, + struct cxl_attach_region *attach) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; + int rc; + + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + if (!endpoint) + return -ENXIO; + + { + /* hold endpoint lock to setup autoremove of the region */ + guard(device)(&endpoint->dev); + if (!endpoint->dev.driver) + return -ENXIO; + + { + guard(rwsem_read)(&cxl_rwsem.region); + guard(rwsem_read)(&cxl_rwsem.dpa); + + /* + * TODO auto-instantiate a region, for now assume this will + * find an auto-region. + */ + struct device *dev __free(put_device) = + device_find_child(&endpoint->dev, NULL, + first_mapped_decoder); + + if (!dev) { + dev_dbg(cxlmd->cxlds->dev, + "no region found for memdev %s\n", + dev_name(&cxlmd->dev)); + return -ENXIO; + } + + cxled = to_cxl_endpoint_decoder(dev); + cxlr = cxled->cxld.region; + + if (cxlr->params.state < CXL_CONFIG_COMMIT) { + dev_dbg(cxlmd->cxlds->dev, + "region %s not committed for memdev %s\n", + dev_name(&cxlr->dev), dev_name(&cxlmd->dev)); + return -ENXIO; + } + + if (cxlr->params.nr_targets > 1) { + dev_dbg(cxlmd->cxlds->dev, + "Only attach to local non-interleaved region\n"); + return -ENXIO; + } + + attach->region = (struct range) { + .start = cxlr->params.res->start, + .end = cxlr->params.res->end, + }; + + /* + * With endpoint locked leave the caller to safely work + * with the region range. + */ + rc = attach->attach(attach->data); + if (rc) + return rc; + + /* Only teardown regions that pass validation, ignore the rest */ + rc = devm_add_action(&endpoint->dev, + cxl_endpoint_region_autoremove, cxlr); + if (rc) { + attach->detach(attach->data); + goto err_unregister; + } + + /* Link type2 driver callback for stopping use of the region range. */ + rc = devm_add_action_or_reset(&endpoint->dev, + attach->detach, attach->data); + if (rc) { + devm_remove_action(&endpoint->dev, + cxl_endpoint_region_autoremove, + cxlr); + goto err_unregister; + } + + cxlr->detach = attach->detach; + cxlr->detach_data = attach->data; + + return 0; + } +err_unregister: + unregister_region(cxlr); + return rc; + } +} +EXPORT_SYMBOL_NS_GPL(cxl_memdev_attach_region, "CXL"); + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1297594beaec..32702da9edfd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -455,6 +455,8 @@ struct cxl_region_params { * @hpa_range: Address range occupied by the region * @mode: Operational mode of the mapped capacity * @type: Endpoint decoder target type + * @detach: accelerator detach callback for device-memory regions + * @detach_data: accelerator detach callback data * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge * @flags: Region state flags @@ -470,6 +472,8 @@ struct cxl_region { struct range hpa_range; enum cxl_partition_mode mode; enum cxl_decoder_type type; + void (*detach)(void *data); + void *detach_data; struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_pmem_region *cxlr_pmem; unsigned long flags; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 10a9b8fa2f6b..22d9435b351f 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -7,6 +7,7 @@ #include #include +#include #include /** @@ -153,6 +154,20 @@ struct cxl_memdev_attach { int (*probe)(struct cxl_memdev *cxlmd); }; +/** + * struct cxl_attach_region - accelerator region handling + * @attach: invoked at cxl_memdev_attach_region() with endpoint device locked. + * @detach: invoked at endpoint release. + * @data: pointer referencing accelerator data for attach and detach calls. + * @region: initialised with autodiscovered region values linked to memdev. + */ +struct cxl_attach_region { + int (*attach)(void *); + void (*detach)(void *); + void *data; + struct range region; +}; + /** * struct cxl_dev_state - The driver device state * @@ -231,4 +246,6 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, const struct cxl_memdev_attach *attach); +struct cxl_region; +int cxl_memdev_attach_region(struct cxl_memdev *cxlmd, struct cxl_attach_region *attach); #endif /* __CXL_CXL_H__ */ -- Gitee From 8835092edb38c6f37e4fcab77765b9fd195843ab Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Thu, 23 Apr 2026 19:05:27 +0100 Subject: [PATCH 07/24] NVIDIA: VR: SAUCE: cxl: Avoid dax creation for accelerators BugLink: https://bugs.launchpad.net/bugs/2153819 By definition a type2 cxl device will use the host managed memory for specific functionality, therefore it should not be available to other uses like DAX. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (cherry picked from https://lore.kernel.org/r/20260423180528.17166-8-alejandro.lucero-palau@amd.com) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit c8df664ce1c8567f4c89c58d7e074ca56d691379) --- drivers/cxl/core/region.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 8cf6f0c89e4d..f9e9e650253f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4221,6 +4221,13 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* + * HDM-D[B] (device-memory) regions have accelerator specific usage. + * Skip device-dax registration. + */ + if (cxlr->type == CXL_DECODER_DEVMEM) + return 0; + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. -- Gitee From 811b706512697db2836e820aa95912a36dc08679 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Tue, 10 Feb 2026 06:44:55 +0000 Subject: [PATCH 08/24] NVIDIA: VR: SAUCE: cxl/region: Skip decoder reset on detach for autodiscovered regions BugLink: https://bugs.launchpad.net/bugs/2153819 __cxl_decoder_detach() currently resets decoder programming whenever a region is detached if cxl_config_state is beyond CXL_CONFIG_ACTIVE. For autodiscovered regions, this can incorrectly tear down decoder state that may be relied upon by other consumers or by subsequent ownership decisions. Skip cxl_region_decode_reset() during detach when CXL_REGION_F_AUTO is set. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alejandro Lucero Tested-by: Tomasz Wolski Link: https://lore.kernel.org/r/20260210064501.157591-4-Smita.KoralahalliChannabasappa@amd.com (cherry picked from https://lore.kernel.org/r/20260210064501.157591-4-Smita.KoralahalliChannabasappa@amd.com) Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit ba18d87a25e07df2fb0909849cbd437cb75dfd8e) --- drivers/cxl/core/region.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f9e9e650253f..6cd8e5a11e7c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2277,7 +2277,9 @@ __cxl_decoder_detach(struct cxl_region *cxlr, cxled->part = -1; if (p->state > CXL_CONFIG_ACTIVE) { - cxl_region_decode_reset(cxlr, p->interleave_ways); + if (!test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; } -- Gitee From af173386a2fab8d4e500fc4d486942e94f820393 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Tue, 10 Feb 2026 06:44:58 +0000 Subject: [PATCH 09/24] NVIDIA: VR: SAUCE: cxl/region: Add helper to check Soft Reserved containment by CXL regions BugLink: https://bugs.launchpad.net/bugs/2153819 Add a helper to determine whether a given Soft Reserved memory range is fully contained within the committed CXL region. This helper provides a primitive for policy decisions in subsequent patches such as co-ordination with dax_hmem to determine whether CXL has fully claimed ownership of Soft Reserved memory ranges. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Dan Williams Tested-by: Tomasz Wolski Link: https://lore.kernel.org/r/20260210064501.157591-7-Smita.KoralahalliChannabasappa@amd.com (backported from https://lore.kernel.org/r/20260210064501.157591-7-Smita.KoralahalliChannabasappa@amd.com) [kobak: Added the Soft Reserved declaration to the existing Type2 include/cxl/cxl.h header instead of recreating that header.] Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit 11b8aaac68e515d53f89c2c356f86b3d2cb21d43) --- drivers/cxl/core/region.c | 7 +++++++ include/cxl/cxl.h | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6cd8e5a11e7c..54ba2b898378 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -4059,6 +4060,12 @@ bool cxl_region_contains_resource(const struct resource *res) } EXPORT_SYMBOL_FOR_MODULES(cxl_region_contains_resource, "dax_hmem"); +bool cxl_region_contains_soft_reserve(struct resource *res) +{ + return cxl_region_contains_resource(res); +} +EXPORT_SYMBOL_GPL(cxl_region_contains_soft_reserve); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 22d9435b351f..3fbd9eac137e 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -248,4 +248,13 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, const struct cxl_memdev_attach *attach); struct cxl_region; int cxl_memdev_attach_region(struct cxl_memdev *cxlmd, struct cxl_attach_region *attach); + +#ifdef CONFIG_CXL_REGION +bool cxl_region_contains_soft_reserve(struct resource *res); +#else +static inline bool cxl_region_contains_soft_reserve(struct resource *res) +{ + return false; +} +#endif #endif /* __CXL_CXL_H__ */ -- Gitee From e66d7a593a5a64386b05afb2bb5852d0668cc46d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 28 Oct 2025 10:47:53 +0100 Subject: [PATCH 10/24] NVIDIA: VR: SAUCE: cxl/region: Support multi-level interleaving with smaller granularities for lower levels BugLink: https://bugs.launchpad.net/bugs/2153819 The CXL specification supports multi-level interleaving "as long as all the levels use different, but consecutive, HPA bits to select the target and no Interleave Set has more than 8 devices" (from 3.2). Currently the kernel expects that a decoder's "interleave granularity is a multiple of @parent_port granularity". That is, the granularity of a lower level is bigger than those of the parent and uses the outer HPA bits as selector. It works e.g. for the following 8-way config: * cross-link (cross-hostbridge config in CFMWS): * 4-way * 256 granularity * Selector: HPA[8:9] * sub-link (CXL Host bridge config of the HDM): * 2-way * 1024 granularity * Selector: HPA[10] Now, if the outer HPA bits are used for the cross-hostbridge, an 8-way config could look like this: * cross-link (cross-hostbridge config in CFMWS): * 4-way * 512 granularity * Selector: HPA[9:10] * sub-link (CXL Host bridge config of the HDM): * 2-way * 256 granularity * Selector: HPA[8] The enumeration of decoders for this configuration fails then with following error: cxl region0: pci0000:00:port1 cxl_port_setup_targets expected iw: 2 ig: 1024 [mem 0x10000000000-0x1ffffffffff flags 0x200] cxl region0: pci0000:00:port1 cxl_port_setup_targets got iw: 2 ig: 256 state: enabled 0x10000000000:0x1ffffffffff cxl_port endpoint12: failed to attach decoder12.0 to region0: -6 Note that this happens only if firmware is setting up the decoders (CXL_REGION_F_AUTO). For userspace region assembly the granularities are chosen to increase from root down to the lower levels. That is, outer HPA bits are always used for lower interleaving levels. Rework the implementation to also support multi-level interleaving with smaller granularities for lower levels. Determine the interleave set of autodetected decoders. Check that it is a subset of the root interleave. The HPA selector bits are extracted for all decoders of the set and checked that there is no overlap and bits are consecutive. All decoders can be programmed now to use any bit range within the region's target selector. Signed-off-by: Robert Richter (backported from https://lore.kernel.org/all/20251028094754.72816-1-rrichter@amd.com/) [kobak: resolved conflicts with cxlr->cxlrd and spa_maps_hpa()] Signed-off-by: Koba Ko Acked-by: Matthew R. Ochs Acked-by: Jamie Nguyen Acked-by: Carol L Soto Signed-off-by: Brad Figg (cherry picked from commit 8354f8e0fa3a272f4bc0f6f51ba59528fa8c0724) --- drivers/cxl/core/region.c | 201 ++++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 93 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 54ba2b898378..1256661a4208 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1424,57 +1424,119 @@ static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig) return 0; } +static inline u64 get_selector(u64 ways, u64 gran) +{ + if (!is_power_of_2(ways)) + ways /= 3; + + if (!is_power_of_2(ways) || !is_power_of_2(gran)) + return 0; + + return (ways - 1) * gran; +} + static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) { struct cxl_root_decoder *cxlrd = cxlr->cxlrd; - int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos; struct cxl_port *parent_port = to_cxl_port(port->dev.parent); struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_ep *ep = cxl_ep_load(port, cxlmd); struct cxl_region_params *p = &cxlr->params; struct cxl_decoder *cxld = cxl_rr->decoder; - struct cxl_switch_decoder *cxlsd; + struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(&cxld->dev); struct cxl_port *iter = port; - u16 eig, peig; - u8 eiw, peiw; + int ig, iw = cxl_rr->nr_targets, rc, pos = cxled->pos; + int distance, parent_distance; + u64 selector, cxlr_sel; + u16 eig; + u8 eiw; /* * While root level decoders support x3, x6, x12, switch level * decoders only support powers of 2 up to x16. */ - if (!is_power_of_2(cxl_rr->nr_targets)) { + if (!is_power_of_2(iw)) { dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), - cxl_rr->nr_targets); + dev_name(port->uport_dev), dev_name(&port->dev), iw); return -EINVAL; } - cxlsd = to_cxl_switch_decoder(&cxld->dev); - if (cxl_rr->nr_targets_set) { - int i, distance = 1; - struct cxl_region_ref *cxl_rr_iter; + if (iw > 8 || iw > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s:%s: ways: %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), iw, cxlsd->nr_targets); + return -ENXIO; + } - /* - * The "distance" between peer downstream ports represents which - * endpoint positions in the region interleave a given port can - * host. - * - * For example, at the root of a hierarchy the distance is - * always 1 as every index targets a different host-bridge. At - * each subsequent switch level those ports map every Nth region - * position where N is the width of the switch == distance. - */ - do { - cxl_rr_iter = cxl_rr_load(iter, cxlr); - distance *= cxl_rr_iter->nr_targets; - iter = to_cxl_port(iter->dev.parent); - } while (!is_cxl_root(iter)); - distance *= cxlrd->cxlsd.cxld.interleave_ways; + /* + * Calculate the effective granularity and ways to determine + * HPA bits used as target selectors of the interleave set. + * Use this to check if the root decoder and all subsequent + * HDM decoders only use bits from that range as selectors. + * + * The "distance" between peer downstream ports represents which + * endpoint positions in the region interleave a given port can + * host. + * + * For example, at the root of a hierarchy the distance is + * always 1 as every index targets a different host-bridge. At + * each subsequent switch level those ports map every Nth region + * position where N is the width of the switch == distance. + */ + + /* Start with the root decoders selector and distance. */ + selector = get_selector(cxlrd->cxlsd.cxld.interleave_ways, + cxlrd->cxlsd.cxld.interleave_granularity); + distance = cxlrd->cxlsd.cxld.interleave_ways; + if (!is_power_of_2(distance)) + distance /= 3; + + for (iter = parent_port; !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) { + struct cxl_region_ref *cxl_rr_iter = cxl_rr_load(iter, cxlr); + struct cxl_decoder *cxld_iter = cxl_rr_iter->decoder; + u64 cxld_sel; + + if (cxld_iter->interleave_ways == 1) + continue; + + cxld_sel = get_selector(cxld_iter->interleave_ways, + cxld_iter->interleave_granularity); + + if (cxld_sel & selector) { + dev_dbg(&cxlr->dev, "%s:%s: overlapping selectors: %#llx:%#llx\n", + dev_name(iter->uport_dev), + dev_name(&iter->dev), cxld_sel, selector); + return -ENXIO; + } - for (i = 0; i < cxl_rr->nr_targets_set; i++) + selector |= cxld_sel; + distance *= cxl_rr_iter->nr_targets; + } + + parent_distance = distance; + distance *= iw; + + /* The combined selector bits must fit the region selector. */ + cxlr_sel = get_selector(p->interleave_ways, + p->interleave_granularity); + + if ((cxlr_sel & selector) != selector) { + dev_dbg(&cxlr->dev, "%s:%s: invalid selectors: %#llx:%#llx\n", + dev_name(iter->uport_dev), + dev_name(&iter->dev), cxlr_sel, selector); + return -ENXIO; + } + + /* Calculate remaining selector bits available for use. */ + selector = cxlr_sel & ~selector; + + if (cxl_rr->nr_targets_set) { + for (int i = 0; i < cxl_rr->nr_targets_set; i++) if (ep->dport == cxlsd->target[i]) { rc = check_last_peer(cxled, ep, cxl_rr, distance); @@ -1485,87 +1547,40 @@ static int cxl_port_setup_targets(struct cxl_port *port, goto add_target; } - if (is_cxl_root(parent_port)) { + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + ig = cxld->interleave_granularity; + else /* + * Set the interleave granularity with each interleave + * level to a multiple of it's parent port interleave + * ways. Beginning with the granularity of the root + * decoder set to the region granularity (starting + * with the inner selector bits of the HPA), the + * granularity is increased with each level. Calculate + * this using the parent distance and region + * granularity. + * * Root decoder IG is always set to value in CFMWS which * may be different than this region's IG. We can use the * region's IG here since interleave_granularity_store() * does not allow interleaved host-bridges with * root IG != region IG. */ - parent_ig = p->interleave_granularity; - parent_iw = cxlrd->cxlsd.cxld.interleave_ways; - /* - * For purposes of address bit routing, use power-of-2 math for - * switch ports. - */ - if (!is_power_of_2(parent_iw)) - parent_iw /= 3; - } else { - struct cxl_region_ref *parent_rr; - struct cxl_decoder *parent_cxld; - - parent_rr = cxl_rr_load(parent_port, cxlr); - parent_cxld = parent_rr->decoder; - parent_ig = parent_cxld->interleave_granularity; - parent_iw = parent_cxld->interleave_ways; - } - - rc = granularity_to_eig(parent_ig, &peig); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n", - dev_name(parent_port->uport_dev), - dev_name(&parent_port->dev), parent_ig); - return rc; - } - - rc = ways_to_eiw(parent_iw, &peiw); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n", - dev_name(parent_port->uport_dev), - dev_name(&parent_port->dev), parent_iw); - return rc; - } + ig = p->interleave_granularity * parent_distance; - iw = cxl_rr->nr_targets; rc = ways_to_eiw(iw, &eiw); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), iw); - return rc; - } - - /* - * Interleave granularity is a multiple of @parent_port granularity. - * Multiplier is the parent port interleave ways. - */ - rc = granularity_to_eig(parent_ig * parent_iw, &eig); - if (rc) { - dev_dbg(&cxlr->dev, - "%s: invalid granularity calculation (%d * %d)\n", - dev_name(&parent_port->dev), parent_ig, parent_iw); - return rc; - } - - rc = eig_to_granularity(eig, &ig); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), - 256 << eig); - return rc; - } + if (!rc) + rc = granularity_to_eig(ig, &eig); - if (iw > 8 || iw > cxlsd->nr_targets) { - dev_dbg(&cxlr->dev, - "%s:%s:%s: ways: %d overflows targets: %d\n", + if (rc || (iw > 1 && ~selector & get_selector(iw, ig))) { + dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d:%d:%#llx\n", dev_name(port->uport_dev), dev_name(&port->dev), - dev_name(&cxld->dev), iw, cxlsd->nr_targets); + iw, ig, selector); return -ENXIO; } if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || - (iw > 1 && cxld->interleave_granularity != ig) || !spa_maps_hpa(p, &cxld->hpa_range) || ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { dev_err(&cxlr->dev, -- Gitee From a965477ba47ea865a11e1a6a3816320150682155 Mon Sep 17 00:00:00 2001 From: Srirangan Madhavan Date: Fri, 6 Mar 2026 08:00:16 +0000 Subject: [PATCH 11/24] NVIDIA: VR: SAUCE: cxl: Move HDM decoder and register map definitions to include/cxl/cxl.h BugLink: https://bugs.launchpad.net/bugs/2153819 BugLink: https://bugs.launchpad.net/bugs/2143032 Move CXL HDM decoder register defines, register map structs (cxl_reg_map, cxl_component_reg_map, cxl_device_reg_map, cxl_pmu_reg_map, cxl_register_map), cxl_hdm_decoder_count(), enum cxl_regloc_type, and cxl_find_regblock()/cxl_setup_regs() declarations from internal CXL headers to include/cxl/pci.h. This makes them accessible to code outside the CXL subsystem, in particular the PCI core CXL state save/restore support added in a subsequent patch. No functional change. Signed-off-by: Srirangan Madhavan (backported from https://lore.kernel.org/linux-cxl/20260306080026.116789-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An Acked-by: Jamie Nguyen Acked-by: Nirmoy Das Acked-by: Carol L Soto Acked-by: Matthew R. Ochs Signed-off-by: Brad Figg (backported from commit b5e166cae47a8356338c607c99d98007b83d3324 nv-kernels/24.04_linux-nvidia-6.17-next) [koba: Also move CXL_CM_CAP_CAP_ID_RAS, CXL_CM_CAP_CAP_ID_HDM, and CXL_CM_CAP_CAP_HDM_VERSION into public include/cxl/cxl.h to keep the public CXL header layout consistent.] Signed-off-by: Koba Ko Acked-by: Matt Ochs --- drivers/cxl/cxl.h | 83 ++--------------------------------------------- include/cxl/cxl.h | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 81 deletions(-) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 32702da9edfd..c7bffa399581 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -24,72 +24,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops; * (port-driver, region-driver, nvdimm object-drivers... etc). */ -/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ -#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K - -/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/ -#define CXL_CM_OFFSET 0x1000 -#define CXL_CM_CAP_HDR_OFFSET 0x0 -#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0) -#define CM_CAP_HDR_CAP_ID 1 -#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16) -#define CM_CAP_HDR_CAP_VERSION 1 -#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20) -#define CM_CAP_HDR_CACHE_MEM_VERSION 1 -#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) -#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) - -#define CXL_CM_CAP_CAP_ID_RAS 0x2 -#define CXL_CM_CAP_CAP_ID_HDM 0x5 -#define CXL_CM_CAP_CAP_HDM_VERSION 1 - -/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ -#define CXL_HDM_DECODER_CAP_OFFSET 0x0 -#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) -#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) -#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) -#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) -#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) -#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 -#define CXL_HDM_DECODER_ENABLE BIT(1) -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) -#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) -#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) -#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) -#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) -#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) -#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11) -#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12) -#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) -#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) -#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) -#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) - -/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */ -#define CXL_DECODER_MIN_GRANULARITY 256 -#define CXL_DECODER_MAX_ENCODED_IG 6 - -static inline int cxl_hdm_decoder_count(u32 cap_hdr) -{ - int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); - - switch (val) { - case 0: - return 1; - case 1 ... 8: - return val * 2; - case 9 ... 12: - return (val - 4) * 4; - default: - return -ENXIO; - } -} - /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ static inline int eig_to_granularity(u16 eig, unsigned int *granularity) { @@ -223,13 +157,9 @@ int cxl_map_device_regs(const struct cxl_register_map *map, int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); #define CXL_INSTANCES_COUNT -1 -enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map, unsigned int index); -int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map); -int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); @@ -250,7 +180,6 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_DECODER_F_LOCK BIT(4) #define CXL_DECODER_F_ENABLE BIT(5) #define CXL_DECODER_F_NORMALIZED_ADDRESSING BIT(6) -#define CXL_DECODER_F_RESET_MASK (CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK) enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, @@ -296,14 +225,12 @@ struct cxl_decoder { }; /* - * Track whether this decoder is free for userspace provisioning, reserved for - * region autodiscovery, whether it is started connecting (awaiting other - * peers), or has completed auto assembly. + * Track whether this decoder is reserved for region autodiscovery, or + * free for userspace provisioning. */ enum cxl_decoder_state { CXL_DECODER_STATE_MANUAL, CXL_DECODER_STATE_AUTO, - CXL_DECODER_STATE_AUTO_STAGED, }; /** @@ -727,7 +654,6 @@ DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) -DEFINE_FREE(put_cxl_dax_region, struct cxl_dax_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); void cxl_bus_rescan(void); @@ -859,7 +785,6 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_endpoint_decoder *cxled); struct cxl_dax_region *to_cxl_dax_region(struct device *dev); u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); -bool cxl_region_contains_resource(const struct resource *res); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -882,10 +807,6 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, { return 0; } -static inline bool cxl_region_contains_resource(const struct resource *res) -{ - return false; -} #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 3fbd9eac137e..1c496c1e846c 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -5,6 +5,7 @@ #ifndef __CXL_CXL_H__ #define __CXL_CXL_H__ +#include #include #include #include @@ -71,6 +72,63 @@ struct cxl_regs { ); }; +#define CXL_CM_CAP_CAP_ID_RAS 0x2 +#define CXL_CM_CAP_CAP_ID_HDM 0x5 +#define CXL_CM_CAP_CAP_HDM_VERSION 1 + +/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ +#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K + +/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers */ +#define CXL_CM_OFFSET 0x1000 +#define CXL_CM_CAP_HDR_OFFSET 0x0 +#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0) +#define CM_CAP_HDR_CAP_ID 1 +#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16) +#define CM_CAP_HDR_CAP_VERSION 1 +#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20) +#define CM_CAP_HDR_CACHE_MEM_VERSION 1 +#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) +#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) + +/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ +#define CXL_HDM_DECODER_CAP_OFFSET 0x0 +#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) +#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) +#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) +#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 +#define CXL_HDM_DECODER_ENABLE BIT(1) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) +#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) +#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) +#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) +#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11) +#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12) +#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) +#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) +#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) +#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) + +/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */ +#define CXL_DECODER_MIN_GRANULARITY 256 +#define CXL_DECODER_MAX_ENCODED_IG 6 + +static inline int cxl_hdm_decoder_count(u32 cap_hdr) +{ + int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); + + return val ? val * 2 : 1; +} + struct cxl_reg_map { bool valid; int id; -- Gitee From 972b867960b941b4b62de31eb7aecf2600eb0b6b Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Wed, 1 Apr 2026 20:09:00 +0530 Subject: [PATCH 12/24] NVIDIA: VR: SAUCE: cxl: Move component/HDM register defines to uapi/cxl/cxl_regs.h BugLink: https://bugs.launchpad.net/bugs/2152222 VFIO and other code outside the CXL core needs the same offset/mask constants the core uses for the component register block and HDM decoders. Pull them into a new include/uapi/cxl/cxl_regs.h (GPL-2.0 WITH Linux-syscall-note) and include it from include/cxl/cxl.h. Use uapi-friendly __GENMASK helpers for masks and _BITUL() for single-bit flags because UAPI headers cannot depend on kernel-internal BIT(). Section comments in the new file reference CXL spec r4.0 numbering. For UAPI change, replaced the SZ_64K with actual size as the macro will not be available for userspace programs. Signed-off-by: Manish Honap Signed-off-by: Jiandi An (backported from https://lore.kernel.org/linux-cxl/20260401143917.108413-1-mhonap@nvidia.com/) (backported from commit 52ead24ed8ade6b664e65fbc5514147a6022263e from https://github.com/JiandiAnNVIDIA/NV-Kernels.git cxl-vfio_2026-04-23) [jan: Remove defines from include/cxl/cxl.h instead of drivers/cxl/cxl.h as they were already moved there by Srirangan's SAUCE commit, Add #include needed by __GENMASK() in uapi header] Signed-off-by: Koba Ko Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Signed-off-by: Brad Figg --- include/cxl/cxl.h | 43 +-------------------------- include/uapi/cxl/cxl_regs.h | 58 +++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 42 deletions(-) create mode 100644 include/uapi/cxl/cxl_regs.h diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 1c496c1e846c..cf7f37e67644 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -10,6 +10,7 @@ #include #include #include +#include /** * enum cxl_devtype - delineate type-2 from a generic type-3 device @@ -76,48 +77,6 @@ struct cxl_regs { #define CXL_CM_CAP_CAP_ID_HDM 0x5 #define CXL_CM_CAP_CAP_HDM_VERSION 1 -/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ -#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K - -/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers */ -#define CXL_CM_OFFSET 0x1000 -#define CXL_CM_CAP_HDR_OFFSET 0x0 -#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0) -#define CM_CAP_HDR_CAP_ID 1 -#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16) -#define CM_CAP_HDR_CAP_VERSION 1 -#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20) -#define CM_CAP_HDR_CACHE_MEM_VERSION 1 -#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) -#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) - -/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ -#define CXL_HDM_DECODER_CAP_OFFSET 0x0 -#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) -#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) -#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) -#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) -#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) -#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 -#define CXL_HDM_DECODER_ENABLE BIT(1) -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) -#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) -#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) -#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) -#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) -#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) -#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11) -#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12) -#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) -#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) -#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) -#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) - /* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */ #define CXL_DECODER_MIN_GRANULARITY 256 #define CXL_DECODER_MAX_ENCODED_IG 6 diff --git a/include/uapi/cxl/cxl_regs.h b/include/uapi/cxl/cxl_regs.h new file mode 100644 index 000000000000..c821ef7ec2bb --- /dev/null +++ b/include/uapi/cxl/cxl_regs.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * CXL Standard defines + * + * Hardware register offsets and bit-field masks for the CXL Component + * Register block, as defined by the CXL Specification r4.0. + */ + +#ifndef _UAPI_CXL_REGS_H_ +#define _UAPI_CXL_REGS_H_ + +#include /* __BITS_PER_LONG; needed by __GENMASK() */ +#include /* _BITUL(), _BITULL() */ +#include /* __GENMASK() */ + +/* CXL 4.0 8.2.3 CXL Component Register Layout and Definition */ +#define CXL_COMPONENT_REG_BLOCK_SIZE 0x00010000 + +/* CXL 4.0 8.2.4 CXL.cache and CXL.mem Registers*/ +#define CXL_CM_OFFSET 0x1000 +#define CXL_CM_CAP_HDR_OFFSET 0x0 +#define CXL_CM_CAP_HDR_ID_MASK __GENMASK(15, 0) +#define CM_CAP_HDR_CAP_ID 1 +#define CXL_CM_CAP_HDR_VERSION_MASK __GENMASK(19, 16) +#define CM_CAP_HDR_CAP_VERSION 1 +#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK __GENMASK(23, 20) +#define CM_CAP_HDR_CACHE_MEM_VERSION 1 +#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK __GENMASK(31, 24) +#define CXL_CM_CAP_PTR_MASK __GENMASK(31, 20) + +/* CXL HDM Decoder Capability Structure */ +#define CXL_HDM_DECODER_CAP_OFFSET 0x0 +#define CXL_HDM_DECODER_COUNT_MASK __GENMASK(3, 0) +#define CXL_HDM_DECODER_TARGET_COUNT_MASK __GENMASK(7, 4) +#define CXL_HDM_DECODER_INTERLEAVE_11_8 _BITUL(8) +#define CXL_HDM_DECODER_INTERLEAVE_14_12 _BITUL(9) +#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY _BITUL(11) +#define CXL_HDM_DECODER_INTERLEAVE_16_WAY _BITUL(12) +#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 +#define CXL_HDM_DECODER_ENABLE _BITUL(1) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) +#define CXL_HDM_DECODER0_CTRL_IG_MASK __GENMASK(3, 0) +#define CXL_HDM_DECODER0_CTRL_IW_MASK __GENMASK(7, 4) +#define CXL_HDM_DECODER0_CTRL_LOCK _BITUL(8) +#define CXL_HDM_DECODER0_CTRL_COMMIT _BITUL(9) +#define CXL_HDM_DECODER0_CTRL_COMMITTED _BITUL(10) +#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR _BITUL(11) +#define CXL_HDM_DECODER0_CTRL_HOSTONLY _BITUL(12) +#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) +#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) +#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) +#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) + +#endif /* _UAPI_CXL_REGS_H_ */ -- Gitee From ff26896ef5019bdf6992d2c4c18d6ba4e0d09fe7 Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Wed, 1 Apr 2026 20:08:59 +0530 Subject: [PATCH 13/24] NVIDIA: VR: SAUCE: cxl: Declare cxl_probe_component_regs in public header BugLink: https://bugs.launchpad.net/bugs/2152222 vfio-cxl lives outside drivers/cxl/ but still needs to locate the component register block and fill cxl_component_reg_map. BOS already has cxl_find_regblock() in include/cxl/pci.h, but cxl_probe_component_regs() was still private to drivers/cxl/cxl.h. Declare cxl_probe_component_regs() in include/cxl/pci.h next to the existing register-block helpers so VFIO CXL can use the parsed component register map. Signed-off-by: Manish Honap Signed-off-by: Jiandi An (backported from https://lore.kernel.org/linux-cxl/20260401143917.108413-1-mhonap@nvidia.com/) (backported from commit e02c1b7ac02a7 from https://github.com/JiandiAnNVIDIA/NV-Kernels.git cxl-vfio_2026-04-23) [jan: Move cxl_probe_component_regs() to include/cxl/pci.h instead of include/cxl/cxl.h to align with existing Srirangan/Alejandro convention; skip cxl_find_regblock() move as it is already in include/cxl/pci.h; add struct cxl_component_reg_map forward declaration] [kobak: Kept the target's private drivers/cxl/cxl.h declarations while adding the public include/cxl/pci.h header expected by VFIO CXL.] Signed-off-by: Koba Ko Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Signed-off-by: Brad Figg --- include/cxl/cxl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index cf7f37e67644..ce0cec6c76a1 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -274,4 +274,8 @@ static inline bool cxl_region_contains_soft_reserve(struct resource *res) return false; } #endif +struct cxl_component_reg_map; +void cxl_probe_component_regs(struct device *dev, void __iomem *base, + struct cxl_component_reg_map *map); + #endif /* __CXL_CXL_H__ */ -- Gitee From 8642569045a402d8610694cde2c1ebeb39728ae0 Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Wed, 1 Apr 2026 20:09:02 +0530 Subject: [PATCH 14/24] NVIDIA: VR: SAUCE: cxl: Record BIR and BAR offset in cxl_register_map BugLink: https://bugs.launchpad.net/bugs/2152222 The Register Locator DVSEC (CXL 4.0 8.1.9) describes register blocks by BAR index (BIR) and offset within the BAR. CXL core currently only stores the resolved HPA (resource + offset) in struct cxl_register_map, so callers that need to use pci_iomap() or report the BAR to userspace must reverse-engineer the BAR from the HPA. Add bar_index and bar_offset to struct cxl_register_map and fill them in cxl_decode_regblock() when the regblock is BAR-backed (BIR 0-5). Add cxl_regblock_get_bar_info() so callers (e.g. vfio-cxl) can get BAR index and offset directly and use pci_iomap() instead of ioremap(HPA). Return -EINVAL if the map is not BAR-backed. Signed-off-by: Manish Honap Signed-off-by: Jiandi An (backported from https://lore.kernel.org/linux-cxl/20260401143917.108413-1-mhonap@nvidia.com/) (backported from commit 947749bd1b8d0308311553dbb7ed3db38be55907 from https://github.com/JiandiAnNVIDIA/NV-Kernels.git cxl-vfio_2026-04-23) [jan: Add cxl_regblock_get_bar_info() declaration to include/cxl/pci.h unconditionally instead of include/cxl/cxl.h with CONFIG_CXL_BUS guards, consistent with existing convention, Add BIR range validation (reject BIR >= PCI_STD_NUM_BARS) plus a bar_index bounds check in cxl_regblock_get_bar_info()] [kobak: Added the target-local private drivers/cxl/cxl.h cxl_regblock_get_bar_info() prototype; struct cxl_register_map carries bar_index/bar_offset in include/cxl/cxl.h.] Signed-off-by: Koba Ko Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Signed-off-by: Brad Figg --- drivers/cxl/core/regs.c | 49 +++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ include/cxl/cxl.h | 12 ++++++++++ 3 files changed, 63 insertions(+) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 93710cf4f0a6..e59dea079216 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -276,6 +276,19 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, u64 offset = ((u64)reg_hi << 32) | (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW); + /* + * The BIR field is 3 bits wide (CXL spec); values 6 and 7 are + * reserved. PCI only defines BAR 0-5, and pci_resource_*() on a + * higher index reads past the resource array. Reject those here + * so callers do not get garbage. + */ + if (bar >= PCI_STD_NUM_BARS) { + dev_warn(&pdev->dev, + "Reserved BIR %d in Register Locator entry (type %d)\n", + bar, reg_type); + return false; + } + if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar, @@ -286,9 +299,44 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, map->reg_type = reg_type; map->resource = pci_resource_start(pdev, bar) + offset; map->max_size = pci_resource_len(pdev, bar) - offset; + map->bar_index = bar; + map->bar_offset = offset; return true; } +/** + * cxl_regblock_get_bar_info() - Get BAR index and offset for a BAR-backed + * regblock + * @map: Register map from cxl_find_regblock() or cxl_find_regblock_instance() + * @bar_index: Output BAR index (0-5). Optional, may be NULL. + * @bar_offset: Output offset within the BAR. Optional, may be NULL. + * + * When the register block was found via the Register Locator DVSEC and + * lives in a PCI BAR (BIR 0-5), this returns the BAR index and the offset + * within that BAR. + * + * Return: 0 if the regblock is BAR-backed (bar_index <= 5), -EINVAL otherwise. + */ +int cxl_regblock_get_bar_info(const struct cxl_register_map *map, u8 *bar_index, + resource_size_t *bar_offset) +{ + if (!map || map->bar_index == 0xff) + return -EINVAL; + /* + * Guard callers against stale or out-of-range bar_index. Only BAR + * indices 0..5 are valid PCI BARs; anything else means the map was + * not BAR-backed or was filled from a reserved BIR. + */ + if (map->bar_index >= PCI_STD_NUM_BARS) + return -EINVAL; + if (bar_index) + *bar_index = map->bar_index; + if (bar_offset) + *bar_offset = map->bar_offset; + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_regblock_get_bar_info, "CXL"); + /* * __cxl_find_regblock_instance() - Locate a register block or count instances by type / index * Use CXL_INSTANCES_COUNT for @index if counting instances. @@ -307,6 +355,7 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty *map = (struct cxl_register_map) { .host = &pdev->dev, + .bar_index = 0xFF, .resource = CXL_RESOURCE_NONE, }; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index c7bffa399581..d95bfdd8aee1 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -160,6 +160,8 @@ int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map, unsigned int index); +int cxl_regblock_get_bar_info(const struct cxl_register_map *map, + u8 *bar_index, resource_size_t *bar_offset); struct cxl_dport; int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index ce0cec6c76a1..fbeccabe94b3 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -117,9 +117,16 @@ struct cxl_pmu_reg_map { * @resource: physical resource base of the register block * @max_size: maximum mapping size to perform register search * @reg_type: see enum cxl_regloc_type + * @bar_index: PCI BAR index (0-5) when regblock is BAR-backed; 0xFF otherwise + * @bar_offset: offset within the BAR; only valid when bar_index <= 5 * @component_map: cxl_reg_map for component registers * @device_map: cxl_reg_maps for device registers * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units + * + * When the register block is described by the Register Locator DVSEC with + * a BAR Indicator (BIR 0-5), bar_index and bar_offset are set so callers can + * use pci_iomap(pdev, bar_index, size) and base + bar_offset instead of + * ioremap(resource). */ struct cxl_register_map { struct device *host; @@ -127,6 +134,8 @@ struct cxl_register_map { resource_size_t resource; resource_size_t max_size; u8 reg_type; + u8 bar_index; + resource_size_t bar_offset; union { struct cxl_component_reg_map component_map; struct cxl_device_reg_map device_map; @@ -278,4 +287,7 @@ struct cxl_component_reg_map; void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); +int cxl_regblock_get_bar_info(const struct cxl_register_map *map, u8 *bar_index, + resource_size_t *bar_offset); + #endif /* __CXL_CXL_H__ */ -- Gitee From ce7d4a1d4c0875c8f602706fb16d0a913f569600 Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Wed, 1 Apr 2026 20:08:58 +0530 Subject: [PATCH 15/24] NVIDIA: VR: SAUCE: cxl: Add cxl_get_hdm_info() for HDM decoder metadata BugLink: https://bugs.launchpad.net/bugs/2152222 cxl_probe_component_regs() finds the HDM decoder block during device probe and caches its location, but does not record the decoder count and does not expose the result outside drivers/cxl/. vfio-cxl needs the decoder count and the byte offset and size of the HDM block without re-running the probe sequence. Record decoder_cnt in rmap->count when parsing the HDM capability in cxl_probe_component_regs(), extend struct cxl_reg_map with a count member, and add cxl_get_hdm_info() to return offset, size, and count from the cached map. Export under the CXL namespace; stub to -EOPNOTSUPP when CONFIG_CXL_BUS is off. Co-developed-by: Zhi Wang Signed-off-by: Zhi Wang Signed-off-by: Manish Honap Signed-off-by: Jiandi An (backported from https://lore.kernel.org/linux-cxl/20260401143917.108413-1-mhonap@nvidia.com/) (backported from commit fd317b86093e9 from https://github.com/JiandiAnNVIDIA/NV-Kernels.git cxl-vfio_2026-04-23) [kobak: Added the target-local private drivers/cxl/cxl.h cxl_get_hdm_info() prototype because drivers/cxl/core/pci.c includes the private CXL header in addition to the public include/cxl/cxl.h declaration.] Signed-off-by: Koba Ko Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Signed-off-by: Brad Figg --- drivers/cxl/core/pci.c | 29 +++++++++++++++++++++++++++++ drivers/cxl/core/regs.c | 1 + drivers/cxl/cxl.h | 2 ++ include/cxl/cxl.h | 15 +++++++++++++++ 4 files changed, 47 insertions(+) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index d1f487b3d809..2490ea74bfa3 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -448,6 +448,35 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, } EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL"); +/** + * cxl_get_hdm_info - Get HDM decoder register block location and count + * @cxlds: CXL device state (must have component regs enumerated via + * cxl_probe_component_regs()) + * @count: number of HDM decoders in the block (from HDM Capability bits [3:0]) + * @offset: byte offset of HDM decoder block within the component register BAR + * @size: size in bytes of the HDM decoder block + * + * Return: 0 on success. -ENODEV if the HDM decoder block is not present. + */ +int cxl_get_hdm_info(struct cxl_dev_state *cxlds, u8 *count, + resource_size_t *offset, resource_size_t *size) +{ + struct cxl_reg_map *hdm = &cxlds->reg_map.component_map.hdm_decoder; + + if (WARN_ON(!count || !offset || !size)) + return -EINVAL; + + if (!hdm->valid) + return -ENODEV; + + *count = hdm->count; + *offset = hdm->offset; + *size = hdm->size; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_hdm_info, "CXL"); + #define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff #define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00 diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e59dea079216..c73a05742be0 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -84,6 +84,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, decoder_cnt = cxl_hdm_decoder_count(hdr); length = 0x20 * decoder_cnt + 0x10; rmap = &map->hdm_decoder; + rmap->count = decoder_cnt; break; } case CXL_CM_CAP_CAP_ID_RAS: diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index d95bfdd8aee1..605af66d50dd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -724,6 +724,8 @@ int cxl_port_setup_regs(struct cxl_port *port, resource_size_t component_reg_phys); struct cxl_dev_state; +int cxl_get_hdm_info(struct cxl_dev_state *cxlds, u8 *count, + resource_size_t *offset, resource_size_t *size); int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index fbeccabe94b3..ddefc43561fd 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -93,6 +93,7 @@ struct cxl_reg_map { int id; unsigned long offset; unsigned long size; + u8 count; }; struct cxl_component_reg_map { @@ -290,4 +291,18 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, int cxl_regblock_get_bar_info(const struct cxl_register_map *map, u8 *bar_index, resource_size_t *bar_offset); +#ifdef CONFIG_CXL_BUS + +int cxl_get_hdm_info(struct cxl_dev_state *cxlds, u8 *count, + resource_size_t *offset, resource_size_t *size); + +#else + +static inline +int cxl_get_hdm_info(struct cxl_dev_state *cxlds, u8 *count, + resource_size_t *offset, resource_size_t *size) +{ return -EOPNOTSUPP; } + +#endif /* CONFIG_CXL_BUS */ + #endif /* __CXL_CXL_H__ */ -- Gitee From 17d04599dbd347b1da944a37d3114200ec5d353e Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Wed, 1 Apr 2026 20:09:01 +0530 Subject: [PATCH 16/24] NVIDIA: VR: SAUCE: cxl: Split cxl_await_range_active() from media-ready wait BugLink: https://bugs.launchpad.net/bugs/2152222 Before accessing CXL device memory after reset/power-on, the driver must ensure media is ready. Not every CXL device implements the CXL Memory Device register group (many Type-2 devices do not). cxl_await_media_ready() reads cxlds->regs.memdev. Access to the memory device registers on a Type-2 device may result in kernel panic. Split the HDM DVSEC range-active poll out of cxl_await_media_ready() into a new function, cxl_await_range_active(). Type-2 devices often lack the CXLMDEV status register, so they need the range check without the memdev read. cxl_await_media_ready() now calls cxl_await_range_active() for the DVSEC poll, then reads the memory device status as before. Co-developed-by: Zhi Wang Reviewed-by: Dave Jiang Signed-off-by: Zhi Wang Signed-off-by: Manish Honap Signed-off-by: Jiandi An (backported from https://lore.kernel.org/linux-cxl/20260401143917.108413-1-mhonap@nvidia.com/) (backported from commit 023bae337329a533c86481d829ea1bbc9ea7aa21 from https://github.com/JiandiAnNVIDIA/NV-Kernels.git cxl-vfio_2026-04-23) [jan: Add cxl_await_range_active() declaration to include/cxl/pci.h unconditionally instead of include/cxl/cxl.h with CONFIG_CXL_BUS guards, consistent with existing convention] [kobak: Folded the private drivers/cxl/cxl.h cxl_await_range_active() prototype into this helper commit because drivers/cxl/core/pci.c includes the private CXL header.] Signed-off-by: Koba Ko Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Signed-off-by: Brad Figg --- drivers/cxl/core/pci.c | 35 ++++++++++++++++++++++++++++++----- drivers/cxl/cxl.h | 1 + include/cxl/cxl.h | 9 +++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 2490ea74bfa3..9493bcdbf34a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -141,16 +141,24 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) return 0; } -/* - * Wait up to @media_ready_timeout for the device to report memory - * active. +/** + * cxl_await_range_active - Wait for all HDM DVSEC memory ranges to be active + * @cxlds: CXL device state (DVSEC and HDM count must be valid) + * + * For each HDM decoder range reported in the CXL DVSEC capability, waits for + * the range to report MEM INFO VALID (up to 1s per range), then MEM ACTIVE + * (up to media_ready_timeout seconds per range, default 60s). Used by + * cxl_await_media_ready() and by callers that only need range readiness + * without checking the memory device status register. + * + * Return: 0 if all ranges become valid and active, -ETIMEDOUT if a timeout + * occurs, or a negative errno from config read on failure. */ -int cxl_await_media_ready(struct cxl_dev_state *cxlds) +int cxl_await_range_active(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); int d = cxlds->cxl_dvsec; int rc, i, hdm_count; - u64 md_status; u16 cap; rc = pci_read_config_word(pdev, @@ -171,6 +179,23 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) return rc; } + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_await_range_active, "CXL"); + +/* + * Wait up to @media_ready_timeout for the device to report memory + * active. + */ +int cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + u64 md_status; + int rc; + + rc = cxl_await_range_active(cxlds); + if (rc) + return rc; + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); if (!CXLMDEV_READY(md_status)) return -EIO; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 605af66d50dd..535786860049 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -724,6 +724,7 @@ int cxl_port_setup_regs(struct cxl_port *port, resource_size_t component_reg_phys); struct cxl_dev_state; +int cxl_await_range_active(struct cxl_dev_state *cxlds); int cxl_get_hdm_info(struct cxl_dev_state *cxlds, u8 *count, resource_size_t *offset, resource_size_t *size); int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index ddefc43561fd..87fb52e9c47f 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -305,4 +305,13 @@ int cxl_get_hdm_info(struct cxl_dev_state *cxlds, u8 *count, #endif /* CONFIG_CXL_BUS */ +/* f951acc: split from media-ready wait */ +struct cxl_dev_state; +int cxl_await_range_active(struct cxl_dev_state *cxlds); + +/* a6a063d: exported reset helpers for VFIO */ +int cxl_dev_reset(struct pci_dev *pdev, int dvsec, bool mem_clr_en); +int cxl_dev_reset_locked(struct pci_dev *pdev, int dvsec, bool mem_clr_en); +bool pci_cxl_reset_capable(struct pci_dev *pdev); + #endif /* __CXL_CXL_H__ */ -- Gitee From 68e74e52d5918cc3d6ac8f0290b46cd11ae7b7d6 Mon Sep 17 00:00:00 2001 From: Vishal Aslot Date: Tue, 14 Oct 2025 19:40:06 -0700 Subject: [PATCH 17/24] NVIDIA: VR: SAUCE: cxl: Allow zero sized HDM decoders BugLink: https://bugs.launchpad.net/bugs/2138266 CXL spec permits committing zero sized decoders. Linux currently considers them as an error. Zero-sized decoders are helpful when the BIOS is committing them. Often BIOS will also lock them to prevent them being changed due to the TSP requirement. For example, if the type 3 device is part of a TCB. The host bridge, switch, and end-point decoders can all be committed with zero-size. If they are locked along the VH, it is often to prevent hotplugging of a new device that could not be attested post boot and cannot be included in TCB. The caller leaves the decoder allocated but does not add it. It simply continues to the next decoder. Signed-off-by: Vishal Aslot (backported from https://lore.kernel.org/all/20251015024019.1189713-1-vaslot@nvidia.com/) Signed-off-by: Nirmoy Das Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Acked-by: Abdur Rahman Acked-by: Noah Wager Signed-off-by: Brad Figg (cherry picked from commit aa8224a0ffff58f91499084437badac56740dcc7 noble:linux-nvidia-6.17) Signed-off-by: Jacob Martin --- drivers/cxl/core/hdm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 0c80b76a5f9b..5a2c83705a8d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -1031,13 +1031,14 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, return -ENXIO; } + port->commit_end = cxld->id; + if (size == 0) { - dev_warn(&port->dev, + dev_dbg(&port->dev, "decoder%d.%d: Committed with zero size\n", port->id, cxld->id); - return -ENXIO; + return -ENOSPC; } - port->commit_end = cxld->id; } else { if (cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); @@ -1193,6 +1194,8 @@ static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info); if (rc) { + if (rc == -ENOSPC) + continue; dev_warn(&port->dev, "Failed to initialize decoder%d.%d\n", port->id, i); -- Gitee From a05062dad34cc0912c8bbc9c15b97132fbf7768e Mon Sep 17 00:00:00 2001 From: Koba Ko Date: Tue, 25 Nov 2025 13:07:35 +0000 Subject: [PATCH 18/24] NVIDIA: VR: SAUCE: cxl/region: Validate partition index before array access BugLink: https://bugs.launchpad.net/bugs/2138266 Check partition index bounds before accessing cxlds->part[] to prevent out-of-bounds when part is -1 or invalid. Fixes: 5ec67596e368) cxl/region: Drop goto pattern of construct_region() Signed-off-by: Koba Ko Signed-off-by: Nirmoy Das Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Acked-by: Abdur Rahman Acked-by: Noah Wager Signed-off-by: Brad Figg (cherry picked from commit d769d573d8adfcaa6c588b7f079b05962716316a noble:linux-nvidia-6.17) Signed-off-by: Jacob Martin --- drivers/cxl/core/region.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 1256661a4208..48c7cb61944c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3771,6 +3771,14 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, int rc, part = READ_ONCE(cxled->part); struct cxl_region *cxlr; + if (part < 0 || part >= cxlds->nr_partitions) { + dev_err(cxlmd->dev.parent, + "%s:%s: invalid partition index %d (max %u)\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + part, cxlds->nr_partitions); + return ERR_PTR(-ENXIO); + } + do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, atomic_read(&cxlrd->region_id), -- Gitee From 71343c3fb23d87e9073ae0485551fc181444246f Mon Sep 17 00:00:00 2001 From: Srirangan Madhavan Date: Fri, 6 Mar 2026 09:23:18 +0000 Subject: [PATCH 19/24] NVIDIA: VR: SAUCE: cxl: Add memory offlining and cache flush helpers BugLink: https://bugs.launchpad.net/bugs/2153819 BugLink: https://bugs.launchpad.net/bugs/2143032 Add infrastructure for quiescing the CXL data path before reset: - Memory offlining: check if CXL-backed memory is online and offline it via offline_and_remove_memory() before reset, per CXL spec requirement to quiesce all CXL.mem transactions before issuing CXL Reset. - CPU cache flush: invalidate cache lines before reset as a safety measure after memory offline. Signed-off-by: Srirangan Madhavan (cherry picked from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An Acked-by: Jamie Nguyen Acked-by: Nirmoy Das Acked-by: Carol L Soto Acked-by: Matthew R. Ochs Signed-off-by: Brad Figg (backported from commit 98bfbf9c3f88013ffbff4b08a1da0043606d0269 nv-kernels/24.04_linux-nvidia-6.17-next) [koba: Use a real System RAM walker callback so resource walks never invoke a NULL function pointer.] Signed-off-by: Koba Ko Acked-by: Matt Ochs --- drivers/cxl/core/pci.c | 120 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9493bcdbf34a..49b1baee8c06 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -980,3 +982,121 @@ int cxl_port_get_possible_dports(struct cxl_port *port) return ctx.count; } + +/* + * CXL Reset support - core-provided reset logic for CXL devices. + * + * These functions implement the CXL reset sequence. + */ + +/* + * If CXL memory backed by this decoder is online as System RAM, offline + * and remove it per CXL spec requirements before issuing CXL Reset. + * Returns 0 if memory was not online or was successfully offlined. + */ +static int cxl_is_system_ram(struct resource *res, void *arg) +{ + return 1; +} + +static int __maybe_unused cxl_offline_memory(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; + struct cxl_region_params *p; + int rc; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + guard(rwsem_read)(&cxl_rwsem.region); + + cxlr = cxled->cxld.region; + if (!cxlr) + return 0; + + p = &cxlr->params; + if (!p->res) + return 0; + + if (walk_iomem_res_desc(IORES_DESC_NONE, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + p->res->start, p->res->end, NULL, + cxl_is_system_ram) <= 0) + return 0; + + dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res); + +#ifdef CONFIG_MEMORY_HOTREMOVE + rc = offline_and_remove_memory(p->res->start, resource_size(p->res)); + if (rc) { + dev_err(dev, + "Failed to offline CXL memory [%pr]: %d\n", + p->res, rc); + return rc; + } +#else + dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n"); + rc = -EOPNOTSUPP; + return rc; +#endif + + return 0; +} + +static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint; + struct device *dev; + + if (!cxlmd || !cxlmd->cxlds) + return -ENODEV; + + dev = cxlmd->cxlds->dev; + endpoint = cxlmd->endpoint; + if (!endpoint) + return 0; + + return device_for_each_child(&endpoint->dev, NULL, + cxl_offline_memory); +} + +static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; + struct resource *res; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + guard(rwsem_read)(&cxl_rwsem.region); + + cxlr = cxled->cxld.region; + if (!cxlr || !cxlr->params.res) + return 0; + + res = cxlr->params.res; + cpu_cache_invalidate_memregion(res->start, resource_size(res)); + return 0; +} + +static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint; + + if (!cxlmd) + return 0; + + endpoint = cxlmd->endpoint; + if (!endpoint || IS_ERR(endpoint)) + return 0; + + if (!cpu_cache_has_invalidate_memregion()) + return 0; + + device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache); + return 0; +} -- Gitee From 969f62e21feab528869b090d25f968a4ed63047e Mon Sep 17 00:00:00 2001 From: Srirangan Madhavan Date: Fri, 6 Mar 2026 09:23:19 +0000 Subject: [PATCH 20/24] NVIDIA: VR: SAUCE: cxl: Add multi-function sibling coordination for CXL reset BugLink: https://bugs.launchpad.net/bugs/2153819 BugLink: https://bugs.launchpad.net/bugs/2143032 Add sibling PCI function save/disable/restore coordination for CXL reset. Before reset, all CXL.cachemem sibling functions are locked, saved, and disabled; after reset they are restored. The Non-CXL Function Map DVSEC and per-function DVSEC capability register are consulted to skip non-CXL and CXL.io-only functions. A global mutex serializes concurrent resets to prevent deadlocks between sibling functions. Signed-off-by: Srirangan Madhavan (cherry picked from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An Acked-by: Jamie Nguyen Acked-by: Nirmoy Das Acked-by: Carol L Soto Acked-by: Matthew R. Ochs Signed-off-by: Brad Figg (backported from commit 9a08c0246be53f047ed4128455f708b7a4350261 nv-kernels/24.04_linux-nvidia-6.17-next) [koba: Propagate sibling collection allocation failures after pci_walk_bus() so reset aborts instead of proceeding with a partial sibling list.] Signed-off-by: Koba Ko Acked-by: Matt Ochs --- drivers/cxl/core/pci.c | 156 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 49b1baee8c06..9f8b335dfeb3 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -15,6 +15,9 @@ #include "core.h" #include "trace.h" +/* Initial sibling array capacity: covers max non-ARI functions per slot */ +#define CXL_RESET_SIBLINGS_INIT 8 + /** * DOC: cxl core pci * @@ -1100,3 +1103,156 @@ static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd) device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache); return 0; } + +/* + * Serialize all CXL reset operations globally. + */ +static DEFINE_MUTEX(cxl_reset_mutex); + +struct cxl_reset_context { + struct pci_dev *target; + struct pci_dev **pci_functions; + int pci_func_count; + int pci_func_cap; +}; + +/* + * Check if a sibling function is non-CXL using the Non-CXL Function Map + * DVSEC. Returns true if fn is listed as non-CXL, false otherwise (including + * on any read failure). + */ +static bool cxl_is_non_cxl_function(struct pci_dev *pdev, + u16 func_map_dvsec, int fn) +{ + int reg, bit; + u32 map; + + if (pci_ari_enabled(pdev->bus)) { + reg = fn / 32; + bit = fn % 32; + } else { + reg = 0; + bit = fn; + } + + if (pci_read_config_dword(pdev, + func_map_dvsec + PCI_DVSEC_CXL_FUNCTION_MAP_REG + (reg * 4), + &map)) + return false; + + return map & BIT(bit); +} + +struct cxl_reset_walk_ctx { + struct cxl_reset_context *ctx; + u16 func_map_dvsec; + int error; + bool ari; +}; + +static int cxl_reset_collect_sibling(struct pci_dev *func, void *data) +{ + struct cxl_reset_walk_ctx *wctx = data; + struct cxl_reset_context *ctx = wctx->ctx; + struct pci_dev *pdev = ctx->target; + u16 dvsec, cap; + int fn; + + if (func == pdev) + return 0; + + if (!wctx->ari && + PCI_SLOT(func->devfn) != PCI_SLOT(pdev->devfn)) + return 0; + + fn = wctx->ari ? func->devfn : PCI_FUNC(func->devfn); + if (wctx->func_map_dvsec && + cxl_is_non_cxl_function(pdev, wctx->func_map_dvsec, fn)) + return 0; + + /* Only coordinate with siblings that have CXL.cachemem */ + dvsec = pci_find_dvsec_capability(func, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return 0; + if (pci_read_config_word(func, dvsec + PCI_DVSEC_CXL_CAP, &cap)) + return 0; + if (!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE | + PCI_DVSEC_CXL_MEM_CAPABLE))) + return 0; + + /* Grow sibling array; double capacity for ARI devices when running out of space */ + if (ctx->pci_func_count >= ctx->pci_func_cap) { + struct pci_dev **new; + int new_cap = ctx->pci_func_cap ? ctx->pci_func_cap * 2 + : CXL_RESET_SIBLINGS_INIT; + + new = krealloc(ctx->pci_functions, + new_cap * sizeof(*new), GFP_KERNEL); + if (!new) { + wctx->error = -ENOMEM; + return 1; + } + ctx->pci_functions = new; + ctx->pci_func_cap = new_cap; + } + + pci_dev_get(func); + ctx->pci_functions[ctx->pci_func_count++] = func; + return 0; +} + +static void __maybe_unused cxl_pci_functions_reset_release(struct cxl_reset_context *ctx) +{ + int i; + + for (i = 0; i < ctx->pci_func_count; i++) + pci_dev_put(ctx->pci_functions[i]); + kfree(ctx->pci_functions); + ctx->pci_functions = NULL; + ctx->pci_func_count = 0; + ctx->pci_func_cap = 0; +} + +static int __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx) +{ + struct pci_dev *pdev = ctx->target; + struct cxl_reset_walk_ctx wctx; + int i; + + ctx->pci_func_count = 0; + ctx->pci_functions = NULL; + ctx->pci_func_cap = 0; + + wctx.ctx = ctx; + wctx.ari = pci_ari_enabled(pdev->bus); + wctx.error = 0; + wctx.func_map_dvsec = pci_find_dvsec_capability(pdev, + PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_FUNCTION_MAP); + + /* Collect CXL.cachemem siblings under pci_bus_sem */ + pci_walk_bus(pdev->bus, cxl_reset_collect_sibling, &wctx); + if (wctx.error) { + cxl_pci_functions_reset_release(ctx); + return wctx.error; + } + + /* Lock and save/disable siblings outside pci_bus_sem */ + for (i = 0; i < ctx->pci_func_count; i++) { + pci_dev_lock(ctx->pci_functions[i]); + pci_dev_save_and_disable(ctx->pci_functions[i]); + } + + return 0; +} + +static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context *ctx) +{ + int i; + + for (i = 0; i < ctx->pci_func_count; i++) { + pci_dev_restore(ctx->pci_functions[i]); + pci_dev_unlock(ctx->pci_functions[i]); + } + cxl_pci_functions_reset_release(ctx); +} -- Gitee From 33ae6191799006e7c89a017f475d49e8c5b92e8b Mon Sep 17 00:00:00 2001 From: Srirangan Madhavan Date: Fri, 6 Mar 2026 09:23:20 +0000 Subject: [PATCH 21/24] NVIDIA: VR: SAUCE: cxl: Add CXL DVSEC reset sequence and flow orchestration BugLink: https://bugs.launchpad.net/bugs/2153819 BugLink: https://bugs.launchpad.net/bugs/2143032 cxl_dev_reset() implements the hardware reset sequence: optionally enable memory clear, initiate reset via CTRL2, wait for completion, and re-enable caching. cxl_do_reset() orchestrates the full reset flow: 1. CXL pre-reset: mem offlining and cache flush (when memdev present) 2. PCI save/disable: pci_dev_save_and_disable() automatically saves CXL DVSEC and HDM decoder state via PCI core hooks 3. Sibling coordination: save/disable CXL.cachemem sibling functions 4. Execute CXL DVSEC reset 5. Sibling restore: always runs to re-enable sibling functions 6. PCI restore: pci_dev_restore() automatically restores CXL state The CXL-specific DVSEC and HDM save/restore is handled by the PCI core's CXL save/restore infrastructure (drivers/pci/cxl.c). Signed-off-by: Srirangan Madhavan (cherry picked from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An Acked-by: Jamie Nguyen Acked-by: Nirmoy Das Acked-by: Carol L Soto Acked-by: Matthew R. Ochs Signed-off-by: Brad Figg (backported from commit 92fb80732a4ea34b76cbe51b15e95ff04f49cb89 nv-kernels/24.04_linux-nvidia-6.17-next) [koba: Treat error-valued cxlmd->endpoint as no endpoint to avoid dereferencing ERR_PTR before endpoint attach.] [koba: Check sibling collection failure before starting the CXL reset so allocation failure restores the target and aborts.] [koba: Limit the memdev device lock to endpoint-dependent memory preparation and cache flush, restore memory quiesce before PCI disable, and track sibling reset preparation so reset_done cleanup only runs after successful sibling prepare.] [koba: Guard reset_done() against NULL/ERR_PTR memdev endpoints before decoder reset detection.] Signed-off-by: Koba Ko Acked-by: Matt Ochs --- drivers/cxl/core/pci.c | 196 ++++++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 1 + drivers/cxl/pci.c | 10 +-- 3 files changed, 197 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9f8b335dfeb3..50cfe768af0b 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1058,7 +1058,7 @@ static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd) dev = cxlmd->cxlds->dev; endpoint = cxlmd->endpoint; - if (!endpoint) + if (!endpoint || IS_ERR(endpoint)) return 0; return device_for_each_child(&endpoint->dev, NULL, @@ -1202,7 +1202,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *func, void *data) return 0; } -static void __maybe_unused cxl_pci_functions_reset_release(struct cxl_reset_context *ctx) +static void cxl_pci_functions_reset_release(struct cxl_reset_context *ctx) { int i; @@ -1214,7 +1214,7 @@ static void __maybe_unused cxl_pci_functions_reset_release(struct cxl_reset_cont ctx->pci_func_cap = 0; } -static int __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx) +static int cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx) { struct pci_dev *pdev = ctx->target; struct cxl_reset_walk_ctx wctx; @@ -1246,7 +1246,7 @@ static int __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_conte return 0; } -static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context *ctx) +static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx) { int i; @@ -1256,3 +1256,191 @@ static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context } cxl_pci_functions_reset_release(ctx); } + +/* + * CXL device reset execution + */ +static int cxl_dev_reset(struct pci_dev *pdev, int dvsec) +{ + static const u32 reset_timeout_ms[] = { 10, 100, 1000, 10000, 100000 }; + u16 cap, ctrl2, status2; + u32 timeout_ms; + int rc, idx; + + if (!pci_wait_for_pending_transaction(pdev)) + pci_err(pdev, "timed out waiting for pending transactions\n"); + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2); + if (rc) + return rc; + + /* + * Disable caching and initiate cache writeback+invalidation if the + * device supports it. Poll for completion. + * Per CXL r3.2 section 9.6, software may use the cache size from + * DVSEC CXL Capability2 to compute a suitable timeout; we use a + * default of 10ms. + */ + if (cap & PCI_DVSEC_CXL_CACHE_WBI_CAPABLE) { + u32 wbi_poll_us = 100; + s32 wbi_remaining_us = 10000; + + ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + + ctrl2 |= PCI_DVSEC_CXL_INIT_CACHE_WBI; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + + do { + usleep_range(wbi_poll_us, wbi_poll_us + 1); + wbi_remaining_us -= wbi_poll_us; + rc = pci_read_config_word(pdev, + dvsec + PCI_DVSEC_CXL_STATUS2, + &status2); + if (rc) + return rc; + } while (!(status2 & PCI_DVSEC_CXL_CACHE_INV) && + wbi_remaining_us > 0); + + if (!(status2 & PCI_DVSEC_CXL_CACHE_INV)) { + pci_err(pdev, "CXL cache WB+I timed out\n"); + return -ETIMEDOUT; + } + } else if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE) { + ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + } + + if (cap & PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE) { + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + &ctrl2); + if (rc) + return rc; + + ctrl2 |= PCI_DVSEC_CXL_RST_MEM_CLR_EN; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + } + + idx = FIELD_GET(PCI_DVSEC_CXL_RST_TIMEOUT, cap); + if (idx >= ARRAY_SIZE(reset_timeout_ms)) + idx = ARRAY_SIZE(reset_timeout_ms) - 1; + timeout_ms = reset_timeout_ms[idx]; + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2); + if (rc) + return rc; + + ctrl2 |= PCI_DVSEC_CXL_INIT_CXL_RST; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2); + if (rc) + return rc; + + msleep(timeout_ms); + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2, + &status2); + if (rc) + return rc; + + if (status2 & PCI_DVSEC_CXL_RST_ERR) { + pci_err(pdev, "CXL reset error\n"); + return -EIO; + } + + if (!(status2 & PCI_DVSEC_CXL_RST_DONE)) { + pci_err(pdev, "CXL reset timeout\n"); + return -ETIMEDOUT; + } + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2); + if (rc) + return rc; + + ctrl2 &= ~PCI_DVSEC_CXL_DISABLE_CACHING; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2); + if (rc) + return rc; + + return 0; +} + +static int match_memdev_by_parent(struct device *dev, const void *parent) +{ + return is_cxl_memdev(dev) && dev->parent == parent; +} + +static int __cxl_do_reset(struct pci_dev *pdev, struct cxl_memdev *cxlmd, + int dvsec) +{ + struct cxl_reset_context ctx = { .target = pdev }; + bool siblings_prepared = false; + int rc; + + mutex_lock(&cxl_reset_mutex); + pci_dev_lock(pdev); + + if (cxlmd) { + guard(device)(&cxlmd->dev); + + rc = cxl_reset_prepare_memdev(cxlmd); + if (rc) + goto out_unlock; + + cxl_reset_flush_cpu_caches(cxlmd); + } + + pci_dev_save_and_disable(pdev); + + rc = cxl_pci_functions_reset_prepare(&ctx); + if (!rc) { + siblings_prepared = true; + rc = cxl_dev_reset(pdev, dvsec); + } + + if (siblings_prepared) + cxl_pci_functions_reset_done(&ctx); + + pci_dev_restore(pdev); + +out_unlock: + pci_dev_unlock(pdev); + mutex_unlock(&cxl_reset_mutex); + + return rc; +} + +static int cxl_do_reset(struct pci_dev *pdev) +{ + int dvsec; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return -ENODEV; + + struct device *memdev __free(put_device) = + bus_find_device(&cxl_bus_type, NULL, &pdev->dev, + match_memdev_by_parent); + if (!memdev) + return __cxl_do_reset(pdev, NULL, dvsec); + + struct cxl_memdev *cxlmd = to_cxl_memdev(memdev); + + return __cxl_do_reset(pdev, cxlmd, dvsec); +} diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 535786860049..83e9ed0db04b 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -182,6 +182,7 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_DECODER_F_LOCK BIT(4) #define CXL_DECODER_F_ENABLE BIT(5) #define CXL_DECODER_F_NORMALIZED_ADDRESSING BIT(6) +#define CXL_DECODER_F_RESET_MASK (CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK) enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index bace662dc988..deafa5bae2c7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -977,6 +977,7 @@ static void cxl_reset_done(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct cxl_port *endpoint; struct device *dev = &pdev->dev; /* @@ -986,14 +987,11 @@ static void cxl_reset_done(struct pci_dev *pdev) * that no longer exists. */ guard(device)(&cxlmd->dev); - if (!cxlmd->dev.driver) + endpoint = cxlmd->endpoint; + if (!endpoint || IS_ERR(endpoint)) return; - if (cxlmd->endpoint && - cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { - device_for_each_child(&cxlmd->endpoint->dev, NULL, - cxl_endpoint_decoder_clear_reset_flags); - + if (cxl_endpoint_decoder_reset_detected(endpoint)) { dev_crit(dev, "SBR happened without memory regions removal.\n"); dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); add_taint(TAINT_USER, LOCKDEP_STILL_OK); -- Gitee From 85b53e92b839a599ed135bd27d7ca58799c55ee8 Mon Sep 17 00:00:00 2001 From: Srirangan Madhavan Date: Fri, 6 Mar 2026 09:23:21 +0000 Subject: [PATCH 22/24] NVIDIA: VR: SAUCE: cxl: Add cxl_reset sysfs interface for PCI devices BugLink: https://bugs.launchpad.net/bugs/2153819 BugLink: https://bugs.launchpad.net/bugs/2143032 Add a "cxl_reset" sysfs attribute to PCI devices that support CXL Reset (CXL r3.2 section 8.1.3.1). The attribute is visible only on devices with both CXL.cache and CXL.mem capabilities and the CXL Reset Capable bit set in the DVSEC. Writing "1" to the attribute triggers the full CXL reset flow via cxl_do_reset(). The interface is decoupled from memdev creation: when a CXL memdev exists, memory offlining and cache flush are performed; otherwise reset proceeds without the memory management. The sysfs attribute is managed entirely by the CXL module using sysfs_create_group() / sysfs_remove_group() rather than the PCI core's static attribute groups. This avoids cross-module symbol dependencies between the PCI core (always built-in) and CXL_BUS (potentially modular). At module init, existing PCI devices are scanned and a PCI bus notifier handles hot-plug/unplug. kernfs_drain() makes sure that any in-flight store() completes before sysfs_remove_group() returns, preventing use-after-free during module unload. Signed-off-by: Srirangan Madhavan (cherry picked from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An Acked-by: Jamie Nguyen Acked-by: Nirmoy Das Acked-by: Carol L Soto Acked-by: Matthew R. Ochs Signed-off-by: Brad Figg (cherry picked from commit 6e96f7e341a4eb1b9631e40b43d120b2b9e2c6e2 nv-kernels/24.04_linux-nvidia-6.17-next) Signed-off-by: Koba Ko Acked-by: Matt Ochs --- drivers/cxl/core/core.h | 2 + drivers/cxl/core/pci.c | 113 ++++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c | 3 ++ 3 files changed, 118 insertions(+) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 82ca3a476708..7b3bdcee6416 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -138,6 +138,8 @@ extern struct cxl_rwsem cxl_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); +void cxl_reset_sysfs_init(void); +void cxl_reset_sysfs_exit(void); enum cxl_poison_trace_type { CXL_POISON_TRACE_LIST, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 50cfe768af0b..6b860cadfe2e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1444,3 +1444,116 @@ static int cxl_do_reset(struct pci_dev *pdev) return __cxl_do_reset(pdev, cxlmd, dvsec); } + +/* + * CXL reset sysfs attribute management. + * + * The cxl_reset attribute is added to PCI devices that advertise CXL Reset + * capability. Managed entirely by the CXL module via subsys_interface on + * pci_bus_type, avoiding cross-module symbol dependencies between the PCI + * core (built-in) and CXL (potentially modular). + * + * subsys_interface handles existing devices at register time and hot-plug + * add/remove automatically. On unregister, remove_dev runs for all tracked + * devices under bus core serialization. + */ + +static bool pci_cxl_reset_capable(struct pci_dev *pdev) +{ + int dvsec; + u16 cap; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return false; + + if (pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap)) + return false; + + if (!(cap & PCI_DVSEC_CXL_CACHE_CAPABLE) || + !(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) + return false; + + return !!(cap & PCI_DVSEC_CXL_RST_CAPABLE); +} + +static ssize_t cxl_reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int rc; + + if (!sysfs_streq(buf, "1")) + return -EINVAL; + + rc = cxl_do_reset(pdev); + return rc ? rc : count; +} +static DEVICE_ATTR_WO(cxl_reset); + +static umode_t cxl_reset_attr_is_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); + + if (!pci_cxl_reset_capable(pdev)) + return 0; + + return a->mode; +} + +static struct attribute *cxl_reset_attrs[] = { + &dev_attr_cxl_reset.attr, + NULL, +}; + +static const struct attribute_group cxl_reset_attr_group = { + .attrs = cxl_reset_attrs, + .is_visible = cxl_reset_attr_is_visible, +}; + +static int cxl_reset_add_dev(struct device *dev, + struct subsys_interface *sif) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pci_cxl_reset_capable(pdev)) + return 0; + + return sysfs_create_group(&dev->kobj, &cxl_reset_attr_group); +} + +static void cxl_reset_remove_dev(struct device *dev, + struct subsys_interface *sif) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pci_cxl_reset_capable(pdev)) + return; + + sysfs_remove_group(&dev->kobj, &cxl_reset_attr_group); +} + +static struct subsys_interface cxl_reset_interface = { + .name = "cxl_reset", + .subsys = &pci_bus_type, + .add_dev = cxl_reset_add_dev, + .remove_dev = cxl_reset_remove_dev, +}; + +void cxl_reset_sysfs_init(void) +{ + int rc; + + rc = subsys_interface_register(&cxl_reset_interface); + if (rc) + pr_warn("CXL: failed to register cxl_reset interface (%d)\n", + rc); +} + +void cxl_reset_sysfs_exit(void) +{ + subsys_interface_unregister(&cxl_reset_interface); +} diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c5aacd7054f1..f95f0bdd7b90 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2530,6 +2530,8 @@ static __init int cxl_core_init(void) if (rc) goto err_ras; + cxl_reset_sysfs_init(); + return 0; err_ras: @@ -2545,6 +2547,7 @@ static __init int cxl_core_init(void) static void cxl_core_exit(void) { + cxl_reset_sysfs_exit(); cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); -- Gitee From 62395f32078864374a0c324726b1dcb71387f242 Mon Sep 17 00:00:00 2001 From: Manish Honap Date: Thu, 30 Apr 2026 08:44:02 +0530 Subject: [PATCH 23/24] NVIDIA: VR: SAUCE: cxl: Export the CXL reset helpers for VFIO users BugLink: https://bugs.launchpad.net/bugs/2152222 Export CXL reset helper entry points for VFIO CXL users so vfio-pci can coordinate CXL reset and memory/cache state safely. Signed-off-by: Manish Honap Signed-off-by: Jiandi An (backported from commit 2d40efbb4f42 from https://github.com/JiandiAnNVIDIA/NV-Kernels.git cxl-vfio_2026-04-23) [kobak: Kept the BOS CXL core tail and placed the exported reset helpers after cxl_port_get_possible_dports().] [kobak: Adapted to the BOS cxl_pci_functions_reset_prepare() error-return flow and added the target-local CXL reset helper prototypes required by public and private CXL headers.] Signed-off-by: Koba Ko Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Signed-off-by: Brad Figg --- drivers/cxl/core/pci.c | 57 ++++++++++++++++++++++++++++++++--- drivers/cxl/cxl.h | 5 +++ include/linux/pci.h | 3 ++ include/uapi/linux/pci_regs.h | 13 ++++++++ 4 files changed, 74 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 6b860cadfe2e..c4f4d5e161e0 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1260,7 +1260,7 @@ static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx) /* * CXL device reset execution */ -static int cxl_dev_reset(struct pci_dev *pdev, int dvsec) +int cxl_dev_reset(struct pci_dev *pdev, int dvsec, bool mem_clr_en) { static const u32 reset_timeout_ms[] = { 10, 100, 1000, 10000, 100000 }; u16 cap, ctrl2, status2; @@ -1330,7 +1330,17 @@ static int cxl_dev_reset(struct pci_dev *pdev, int dvsec) if (rc) return rc; - ctrl2 |= PCI_DVSEC_CXL_RST_MEM_CLR_EN; + /* + * Explicitly set or clear RST_MEM_CLR_EN rather than only + * setting it. A previous reset may have left the bit set in + * hardware; if mem_clr_en is false we must clear it so that a + * guest-triggered reset does not unexpectedly scrub DPA. + */ + if (mem_clr_en) + ctrl2 |= PCI_DVSEC_CXL_RST_MEM_CLR_EN; + else + ctrl2 &= ~PCI_DVSEC_CXL_RST_MEM_CLR_EN; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2); if (rc) @@ -1379,6 +1389,44 @@ static int cxl_dev_reset(struct pci_dev *pdev, int dvsec) return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_dev_reset, "CXL"); + +/** + * cxl_dev_reset_locked() - cxl_dev_reset() under cxl_reset_mutex with sibling + * CXL.cachemem function save/restore. + * @pdev: Target CXL function + * @dvsec: CXL DVSEC capability offset (pci_find_dvsec_capability()) + * @mem_clr_en: Pass-through to cxl_dev_reset() (Mem_Clr_Enable in CTRL2) + * + * Return: 0 on success, negative errno from cxl_dev_reset() or sibling + * coordination failure. + */ +int cxl_dev_reset_locked(struct pci_dev *pdev, int dvsec, bool mem_clr_en) +{ + struct cxl_reset_context ctx = { .target = pdev }; + bool siblings_prepared = false; + int rc; + + mutex_lock(&cxl_reset_mutex); + pci_dev_lock(pdev); + + pci_dev_save_and_disable(pdev); + rc = cxl_pci_functions_reset_prepare(&ctx); + if (!rc) { + siblings_prepared = true; + rc = cxl_dev_reset(pdev, dvsec, mem_clr_en); + } + + if (siblings_prepared) + cxl_pci_functions_reset_done(&ctx); + + pci_dev_restore(pdev); + pci_dev_unlock(pdev); + mutex_unlock(&cxl_reset_mutex); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_dev_reset_locked, "CXL"); static int match_memdev_by_parent(struct device *dev, const void *parent) { @@ -1410,7 +1458,7 @@ static int __cxl_do_reset(struct pci_dev *pdev, struct cxl_memdev *cxlmd, rc = cxl_pci_functions_reset_prepare(&ctx); if (!rc) { siblings_prepared = true; - rc = cxl_dev_reset(pdev, dvsec); + rc = cxl_dev_reset(pdev, dvsec, true); } if (siblings_prepared) @@ -1458,7 +1506,7 @@ static int cxl_do_reset(struct pci_dev *pdev) * devices under bus core serialization. */ -static bool pci_cxl_reset_capable(struct pci_dev *pdev) +bool pci_cxl_reset_capable(struct pci_dev *pdev) { int dvsec; u16 cap; @@ -1477,6 +1525,7 @@ static bool pci_cxl_reset_capable(struct pci_dev *pdev) return !!(cap & PCI_DVSEC_CXL_RST_CAPABLE); } +EXPORT_SYMBOL_NS_GPL(pci_cxl_reset_capable, "CXL"); static ssize_t cxl_reset_store(struct device *dev, struct device_attribute *attr, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 83e9ed0db04b..7973d1519cc2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -162,6 +162,11 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map, unsigned int index); int cxl_regblock_get_bar_info(const struct cxl_register_map *map, u8 *bar_index, resource_size_t *bar_offset); +int cxl_dev_reset(struct pci_dev *pdev, int dvsec, bool mem_clr_en); +int cxl_dev_reset_locked(struct pci_dev *pdev, int dvsec, bool mem_clr_en); +bool pci_cxl_reset_capable(struct pci_dev *pdev); +void cxl_reset_sysfs_init(void); +void cxl_reset_sysfs_exit(void); struct cxl_dport; int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4f1308244c82..82b86c7bdf6e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2049,6 +2049,9 @@ int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) +void pci_dev_save_and_disable(struct pci_dev *dev); +void pci_dev_restore(struct pci_dev *dev); + /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), * a PCI domain is defined to be a set of PCI buses which share diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 14f634ab9350..a7ac017baa1c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1353,6 +1353,19 @@ #define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4) #define PCI_DVSEC_CXL_CTRL 0xC #define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2) + +#define PCI_DVSEC_CXL_CTRL_RWL 0x5FED +#define PCI_DVSEC_CXL_CTRL2 0x10 +#define PCI_DVSEC_CXL_DISABLE_CACHING _BITUL(0) +#define PCI_DVSEC_CXL_INIT_CACHE_WBI _BITUL(1) +#define PCI_DVSEC_CXL_INIT_CXL_RST _BITUL(2) +#define PCI_DVSEC_CXL_RST_MEM_CLR_EN _BITUL(3) +#define PCI_DVSEC_CXL_STATUS2 0x12 +#define PCI_DVSEC_CXL_CACHE_INV _BITUL(0) +#define PCI_DVSEC_CXL_RST_DONE _BITUL(1) +#define PCI_DVSEC_CXL_RST_ERR _BITUL(2) +#define PCI_DVSEC_CXL_LOCK 0x14 +#define PCI_DVSEC_CXL_LOCK_CONFIG _BITUL(0) #define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) #define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) #define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0) -- Gitee From 48e64c2bff7c5c1cb8aa6f7931f9d4131cafd9ca Mon Sep 17 00:00:00 2001 From: Vishal Aslot Date: Tue, 14 Oct 2025 19:40:05 -0700 Subject: [PATCH 24/24] NVIDIA: VR: SAUCE: cxl_test: enable zero sized decoders under hb0 BugLink: https://bugs.launchpad.net/bugs/2138266 The cxl core in linux updated to supported committed decoders of zero size, because this is allowed by the CXL spec. This patch updates cxl_test to enable decoders 1 and 2 in the host-bridge 0 port, in a switch uport under hb0, and the endpoints ports with size zero simulating committed zero sized decoders. Signed-off-by: Vishal Aslot (backported from https://lore.kernel.org/all/20251015024019.1189713-1-vaslot@nvidia.com/) Signed-off-by: Nirmoy Das Acked-by: Jamie Nguyen Acked-by: Matthew R. Ochs Acked-by: Carol L Soto Acked-by: Abdur Rahman Acked-by: Noah Wager Signed-off-by: Brad Figg (cherry picked from commit a40b0390737baececc5d0b6ee4fb174516373ac9 noble:linux-nvidia-6.17) Signed-off-by: Jacob Martin --- tools/testing/cxl/test/cxl.c | 113 +++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 18 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 418669927fb0..6acdf48d2bd3 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1041,6 +1041,45 @@ static void default_mock_decoder(struct cxl_decoder *cxld) WARN_ON_ONCE(!cxld_registry_new(cxld)); } +static void size_zero_mock_decoder_ep(struct cxl_decoder *cxld, u64 base) +{ + struct cxl_endpoint_decoder *cxled; + + cxled = to_cxl_endpoint_decoder(&cxld->dev); + cxld->hpa_range = (struct range){ + .start = base, + .end = base - 1, /* Size 0 */ + }; + + cxld->interleave_ways = 2; + cxld->interleave_granularity = 4096; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + cxld->flags = CXL_DECODER_F_ENABLE; + cxled->state = CXL_DECODER_STATE_AUTO; + cxld->commit = mock_decoder_commit; + cxld->reset = mock_decoder_reset; +} + +static void size_zero_mock_decoder_sw(struct device *dev, u64 base, int i) +{ + struct cxl_switch_decoder *cxlsd; + struct cxl_decoder *cxld; + + cxlsd = to_cxl_switch_decoder(dev); + cxld = &cxlsd->cxld; + cxld->flags = CXL_DECODER_F_ENABLE; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; + if (i == 0) + cxld->interleave_ways = 2; + else + cxld->interleave_ways = 1; + cxld->interleave_granularity = 4096; + cxld->hpa_range = (struct range) { + .start = base, + .end = base - 1, /* Size 0 */ + }; +} + static int first_decoder(struct device *dev, const void *data) { struct cxl_decoder *cxld; @@ -1053,22 +1092,31 @@ static int first_decoder(struct device *dev, const void *data) return 0; } -/* - * Initialize a decoder during HDM enumeration. - * - * If a saved registry entry exists: - * - enabled decoders are restored from the saved programming - * - disabled decoders are initialized in a clean disabled state - * - * If no registry entry exists the decoder follows the normal mock - * initialization path, including the special auto-region setup for - * the first endpoints under host-bridge0. - * - * Returns true if decoder state was restored from the registry. In - * that case the saved decode configuration (including target mapping) - * has already been applied and the map_targets() is skipped. - */ -static bool mock_init_hdm_decoder(struct cxl_decoder *cxld) +static int second_decoder(struct device *dev, const void *data) +{ + struct cxl_decoder *cxld; + + if (!is_switch_decoder(dev)) + return 0; + cxld = to_cxl_decoder(dev); + if (cxld->id == 1) + return 1; + return 0; +} + +static int third_decoder(struct device *dev, const void *data) +{ + struct cxl_decoder *cxld; + + if (!is_switch_decoder(dev)) + return 0; + cxld = to_cxl_decoder(dev); + if (cxld->id == 2) + return 1; + return 0; +} + +static void mock_init_hdm_decoder(struct cxl_decoder *cxld) { struct acpi_cedt_cfmws *window = mock_cfmws[0]; struct platform_device *pdev = NULL; @@ -1080,7 +1128,7 @@ static bool mock_init_hdm_decoder(struct cxl_decoder *cxld) struct cxl_dport *dport; struct device *dev; bool hb0 = false; - u64 base; + u64 base = window->base_hpa; int i; if (is_endpoint_decoder(&cxld->dev)) { @@ -1122,6 +1170,20 @@ static bool mock_init_hdm_decoder(struct cxl_decoder *cxld) return false; } + /* + * Decoders 1 and 2 of the endpoint under host bridge 0 should be enabled as zero-sized. + * It would be even better to make sure that the parent switch uport decoder was + * also enabled before enabling the size zero decoders but there is no harm in doing it + * anyway. + */ + if (hb0 && (cxld->id == 1 || cxld->id == 2)) { + port = to_cxl_port(cxld->dev.parent); + size_zero_mock_decoder_ep(cxld, base); + /* Commit the zero-sized decoder */ + port->commit_end = cxld->id; + return; + } + /* * The first decoder on the first 2 devices on the first switch * attached to host-bridge0 mock a fake / static RAM region. All @@ -1142,7 +1204,6 @@ static bool mock_init_hdm_decoder(struct cxl_decoder *cxld) return false; } - base = window->base_hpa; if (extended_linear_cache) base += mock_auto_region_size; cxld->hpa_range = (struct range) { @@ -1214,6 +1275,22 @@ static bool mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld_registry_update(cxld); put_device(dev); + + /* Enable the next two decoders also and make them zero sized */ + dev = device_find_child(&iter->dev, NULL, second_decoder); + WARN_ON(!dev); + if (dev) { + size_zero_mock_decoder_sw(dev, base, i); + iter->commit_end = 1; + put_device(dev); + } + dev = device_find_child(&iter->dev, NULL, third_decoder); + WARN_ON(!dev); + if (dev) { + size_zero_mock_decoder_sw(dev, base, i); + iter->commit_end = 2; + put_device(dev); + } } return false; -- Gitee