Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/ucp/core/ucp_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,28 @@ static void ucp_fill_resources_reg_md_map_update(ucp_context_h context)
}
}

static void ucp_fill_resources_reachable_sys_devs(ucp_context_h context)
{
unsigned num_sys_devs;
ucp_md_index_t md_index;
ucp_tl_md_t *tl_md;
ucs_sys_device_t sys_dev, sys_dev_md;

num_sys_devs = ucs_topo_num_devices();
for (md_index = 0; md_index < context->num_mds; ++md_index) {
tl_md = &context->tl_mds[md_index];
tl_md->reachable_sys_devs = UCS_MASK(num_sys_devs);
for (sys_dev = 0; sys_dev < num_sys_devs; ++sys_dev) {
ucs_for_each_bit(sys_dev_md, tl_md->sys_dev_map) {
if (!ucs_topo_is_reachable(sys_dev_md, sys_dev)) {
tl_md->reachable_sys_devs &= ~UCS_BIT(sys_dev);
break;
}
}
}
}
}

static ucs_status_t ucp_fill_resources(ucp_context_h context,
const ucp_config_t *config)
{
Expand Down Expand Up @@ -1913,6 +1935,7 @@ static ucs_status_t ucp_fill_resources(ucp_context_h context,
}

ucp_fill_resources_reg_md_map_update(context);
ucp_fill_resources_reachable_sys_devs(context);

/* If unified mode is enabled, initialize tl_bitmap to 0.
* Then the worker will open all available transport resources and will
Expand Down
6 changes: 6 additions & 0 deletions src/ucp/core/ucp_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,12 @@ typedef struct ucp_tl_md {
* Set of known system devices associated to the MD
*/
ucp_sys_dev_map_t sys_dev_map;

/**
* Set of system devices that can reach all system devices associated with
* the MD
*/
ucp_sys_dev_map_t reachable_sys_devs;
} ucp_tl_md_t;


Expand Down
28 changes: 2 additions & 26 deletions src/ucp/core/ucp_mm.c
Original file line number Diff line number Diff line change
Expand Up @@ -524,28 +524,6 @@ static ucs_status_t ucp_memh_register_gva(ucp_context_h context, ucp_mem_h memh,
return UCS_OK;
}

static int ucp_memh_sys_dev_reachable(ucs_sys_device_t mem_sys_dev,
ucp_sys_dev_map_t sys_dev_map)
{
ucs_sys_device_t sys_dev;

if (mem_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) {
return 1;
}

/*
* If at least one sys_dev is not reachable, do not register on it
* as we cannot know in advance which device is going to be used.
*/
ucs_for_each_bit(sys_dev, sys_dev_map) {
if (!ucs_topo_is_reachable(sys_dev, mem_sys_dev)) {
return 0;
}
}

return 1;
}

static ucs_status_t
ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
ucp_md_map_t md_map, unsigned uct_flags,
Expand All @@ -566,7 +544,6 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
void *reg_address;
size_t reg_length;
size_t reg_align;
ucp_sys_dev_map_t sys_dev_map;

if (gva_enable) {
status = ucp_memh_register_gva(context, memh, md_map);
Expand Down Expand Up @@ -614,9 +591,8 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,

/* Exclude any unreachable MD from registration */
ucs_for_each_bit(md_index, dmabuf_md_map) {
sys_dev_map = context->tl_mds[md_index].sys_dev_map;
if (!ucp_memh_sys_dev_reachable(mem_attr.sys_dev,
sys_dev_map)) {
if (!(context->tl_mds[md_index].reachable_sys_devs &
UCS_BIT(mem_attr.sys_dev))) {
ucs_trace("md[%d] skipped: cannot reach mem_sys_dev=%u",
md_index, mem_attr.sys_dev);
reg_md_map &= ~UCS_BIT(md_index);
Expand Down
Loading