From 3ce6add1220b76890ff9769b4d165d9e8fe2117a Mon Sep 17 00:00:00 2001 From: Raul Akhmetshin Date: Tue, 12 Aug 2025 16:08:11 +0300 Subject: [PATCH 1/2] UCP/CORE: Cache reachable system devices (Direct NIC datapath). --- src/ucp/core/ucp_context.c | 29 +++++++++++++++++++++++++++++ src/ucp/core/ucp_context.h | 5 +++++ src/ucp/core/ucp_mm.c | 28 ++-------------------------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/src/ucp/core/ucp_context.c b/src/ucp/core/ucp_context.c index aa66d5ccfe9..f7d4e6d1261 100644 --- a/src/ucp/core/ucp_context.c +++ b/src/ucp/core/ucp_context.c @@ -1799,6 +1799,34 @@ static void ucp_fill_resources_reg_md_map_update(ucp_context_h context) } } +static void ucp_fill_resources_reachable_sys_devs(ucp_context_h context) +{ + unsigned num_sys_devs; + ucp_md_index_t md_index; + ucp_tl_md_t *tl_md; + ucs_sys_device_t sys_dev, sys_dev_md; + int is_reachable; + + num_sys_devs = ucs_topo_num_devices(); + for (md_index = 0; md_index < context->num_mds; ++md_index) { + tl_md = &context->tl_mds[md_index]; + tl_md->reachable_sys_devs = 0; + for (sys_dev = 0; sys_dev < num_sys_devs; ++sys_dev) { + is_reachable = 1; + ucs_for_each_bit(sys_dev_md, tl_md->sys_dev_map) { + if (!ucs_topo_is_reachable(sys_dev_md, sys_dev)) { + is_reachable = 0; + break; + } + } + + if (is_reachable) { + tl_md->reachable_sys_devs |= UCS_BIT(sys_dev); + } + } + } +} + static ucs_status_t ucp_fill_resources(ucp_context_h context, const ucp_config_t *config) { @@ -1913,6 +1941,7 @@ static ucs_status_t ucp_fill_resources(ucp_context_h context, } ucp_fill_resources_reg_md_map_update(context); + ucp_fill_resources_reachable_sys_devs(context); /* If unified mode is enabled, initialize tl_bitmap to 0. * Then the worker will open all available transport resources and will diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h index 7df775955db..e0bfeea7588 100644 --- a/src/ucp/core/ucp_context.h +++ b/src/ucp/core/ucp_context.h @@ -331,6 +331,11 @@ typedef struct ucp_tl_md { * Set of known system devices associated to the MD */ ucp_sys_dev_map_t sys_dev_map; + + /** + * Set of reachable system devices + */ + ucp_sys_dev_map_t reachable_sys_devs; } ucp_tl_md_t; diff --git a/src/ucp/core/ucp_mm.c b/src/ucp/core/ucp_mm.c index 783cdce1f8b..0435ccc242f 100644 --- a/src/ucp/core/ucp_mm.c +++ b/src/ucp/core/ucp_mm.c @@ -524,28 +524,6 @@ static ucs_status_t ucp_memh_register_gva(ucp_context_h context, ucp_mem_h memh, return UCS_OK; } -static int ucp_memh_sys_dev_reachable(ucs_sys_device_t mem_sys_dev, - ucp_sys_dev_map_t sys_dev_map) -{ - ucs_sys_device_t sys_dev; - - if (mem_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) { - return 1; - } - - /* - * If at least one sys_dev is not reachable, do not register on it - * as we cannot know in advance which device is going to be used. - */ - ucs_for_each_bit(sys_dev, sys_dev_map) { - if (!ucs_topo_is_reachable(sys_dev, mem_sys_dev)) { - return 0; - } - } - - return 1; -} - static ucs_status_t ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map, unsigned uct_flags, @@ -566,7 +544,6 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh, void *reg_address; size_t reg_length; size_t reg_align; - ucp_sys_dev_map_t sys_dev_map; if (gva_enable) { status = ucp_memh_register_gva(context, memh, md_map); @@ -614,9 +591,8 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh, /* Exclude any unreachable MD from registration */ ucs_for_each_bit(md_index, dmabuf_md_map) { - sys_dev_map = context->tl_mds[md_index].sys_dev_map; - if (!ucp_memh_sys_dev_reachable(mem_attr.sys_dev, - sys_dev_map)) { + if (!(context->tl_mds[md_index].reachable_sys_devs & + UCS_BIT(mem_attr.sys_dev))) { ucs_trace("md[%d] skipped: cannot reach mem_sys_dev=%u", md_index, mem_attr.sys_dev); reg_md_map &= ~UCS_BIT(md_index); From 6a2c06a6cefc8e1ff7304ba5816df310147e3b8c Mon Sep 17 00:00:00 2001 From: Raul Akhmetshin Date: Wed, 13 Aug 2025 12:27:59 +0300 Subject: [PATCH 2/2] UCP/CORE: Applied suggestions. --- src/ucp/core/ucp_context.c | 10 ++-------- src/ucp/core/ucp_context.h | 3 ++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/ucp/core/ucp_context.c b/src/ucp/core/ucp_context.c index f7d4e6d1261..e7215f58788 100644 --- a/src/ucp/core/ucp_context.c +++ b/src/ucp/core/ucp_context.c @@ -1805,24 +1805,18 @@ static void ucp_fill_resources_reachable_sys_devs(ucp_context_h context) ucp_md_index_t md_index; ucp_tl_md_t *tl_md; ucs_sys_device_t sys_dev, sys_dev_md; - int is_reachable; num_sys_devs = ucs_topo_num_devices(); for (md_index = 0; md_index < context->num_mds; ++md_index) { tl_md = &context->tl_mds[md_index]; - tl_md->reachable_sys_devs = 0; + tl_md->reachable_sys_devs = UCS_MASK(num_sys_devs); for (sys_dev = 0; sys_dev < num_sys_devs; ++sys_dev) { - is_reachable = 1; ucs_for_each_bit(sys_dev_md, tl_md->sys_dev_map) { if (!ucs_topo_is_reachable(sys_dev_md, sys_dev)) { - is_reachable = 0; + tl_md->reachable_sys_devs &= ~UCS_BIT(sys_dev); break; } } - - if (is_reachable) { - tl_md->reachable_sys_devs |= UCS_BIT(sys_dev); - } } } } diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h index e0bfeea7588..2e6b467e82e 100644 --- a/src/ucp/core/ucp_context.h +++ b/src/ucp/core/ucp_context.h @@ -333,7 +333,8 @@ typedef struct ucp_tl_md { ucp_sys_dev_map_t sys_dev_map; /** - * Set of reachable system devices + * Set of system devices that can reach all system devices associated with + * the MD */ ucp_sys_dev_map_t reachable_sys_devs; } ucp_tl_md_t;