diff --git a/src/ucp/core/ucp_context.c b/src/ucp/core/ucp_context.c index aa66d5ccfe9..e7215f58788 100644 --- a/src/ucp/core/ucp_context.c +++ b/src/ucp/core/ucp_context.c @@ -1799,6 +1799,28 @@ static void ucp_fill_resources_reg_md_map_update(ucp_context_h context) } } +static void ucp_fill_resources_reachable_sys_devs(ucp_context_h context) +{ + unsigned num_sys_devs; + ucp_md_index_t md_index; + ucp_tl_md_t *tl_md; + ucs_sys_device_t sys_dev, sys_dev_md; + + num_sys_devs = ucs_topo_num_devices(); + for (md_index = 0; md_index < context->num_mds; ++md_index) { + tl_md = &context->tl_mds[md_index]; + tl_md->reachable_sys_devs = UCS_MASK(num_sys_devs); + for (sys_dev = 0; sys_dev < num_sys_devs; ++sys_dev) { + ucs_for_each_bit(sys_dev_md, tl_md->sys_dev_map) { + if (!ucs_topo_is_reachable(sys_dev_md, sys_dev)) { + tl_md->reachable_sys_devs &= ~UCS_BIT(sys_dev); + break; + } + } + } + } +} + static ucs_status_t ucp_fill_resources(ucp_context_h context, const ucp_config_t *config) { @@ -1913,6 +1935,7 @@ static ucs_status_t ucp_fill_resources(ucp_context_h context, } ucp_fill_resources_reg_md_map_update(context); + ucp_fill_resources_reachable_sys_devs(context); /* If unified mode is enabled, initialize tl_bitmap to 0. * Then the worker will open all available transport resources and will diff --git a/src/ucp/core/ucp_context.h b/src/ucp/core/ucp_context.h index 7df775955db..2e6b467e82e 100644 --- a/src/ucp/core/ucp_context.h +++ b/src/ucp/core/ucp_context.h @@ -331,6 +331,12 @@ typedef struct ucp_tl_md { * Set of known system devices associated to the MD */ ucp_sys_dev_map_t sys_dev_map; + + /** + * Set of system devices that can reach all system devices associated with + * the MD + */ + ucp_sys_dev_map_t reachable_sys_devs; } ucp_tl_md_t; diff --git a/src/ucp/core/ucp_mm.c b/src/ucp/core/ucp_mm.c index 783cdce1f8b..0435ccc242f 100644 --- a/src/ucp/core/ucp_mm.c +++ b/src/ucp/core/ucp_mm.c @@ -524,28 +524,6 @@ static ucs_status_t ucp_memh_register_gva(ucp_context_h context, ucp_mem_h memh, return UCS_OK; } -static int ucp_memh_sys_dev_reachable(ucs_sys_device_t mem_sys_dev, - ucp_sys_dev_map_t sys_dev_map) -{ - ucs_sys_device_t sys_dev; - - if (mem_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) { - return 1; - } - - /* - * If at least one sys_dev is not reachable, do not register on it - * as we cannot know in advance which device is going to be used. - */ - ucs_for_each_bit(sys_dev, sys_dev_map) { - if (!ucs_topo_is_reachable(sys_dev, mem_sys_dev)) { - return 0; - } - } - - return 1; -} - static ucs_status_t ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh, ucp_md_map_t md_map, unsigned uct_flags, @@ -566,7 +544,6 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh, void *reg_address; size_t reg_length; size_t reg_align; - ucp_sys_dev_map_t sys_dev_map; if (gva_enable) { status = ucp_memh_register_gva(context, memh, md_map); @@ -614,9 +591,8 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh, /* Exclude any unreachable MD from registration */ ucs_for_each_bit(md_index, dmabuf_md_map) { - sys_dev_map = context->tl_mds[md_index].sys_dev_map; - if (!ucp_memh_sys_dev_reachable(mem_attr.sys_dev, - sys_dev_map)) { + if (!(context->tl_mds[md_index].reachable_sys_devs & + UCS_BIT(mem_attr.sys_dev))) { ucs_trace("md[%d] skipped: cannot reach mem_sys_dev=%u", md_index, mem_attr.sys_dev); reg_md_map &= ~UCS_BIT(md_index);