|
10 | 10 |
|
11 | 11 | #include "proto_single.h"
|
12 | 12 | #include "proto_common.h"
|
| 13 | +#include "proto_common.inl" |
13 | 14 | #include "proto_init.h"
|
14 | 15 | #include "proto_debug.h"
|
15 | 16 |
|
16 | 17 | #include <ucs/debug/assert.h>
|
17 | 18 | #include <ucs/debug/log.h>
|
18 | 19 | #include <ucs/sys/math.h>
|
19 | 20 |
|
| 21 | +static double |
| 22 | +ucp_proto_single_get_bandwidth(const ucp_proto_common_init_params_t *params, |
| 23 | + ucp_lane_index_t lane) |
| 24 | +{ |
| 25 | + ucp_proto_common_tl_perf_t tl_perf; |
| 26 | + ucp_proto_perf_node_t *perf_node; |
| 27 | + ucs_status_t status; |
| 28 | + |
| 29 | + status = ucp_proto_common_get_lane_perf(params, lane, &tl_perf, &perf_node); |
| 30 | + if (status != UCS_OK) { |
| 31 | + return 0; |
| 32 | + } |
| 33 | + |
| 34 | + ucp_proto_perf_node_deref(&perf_node); |
| 35 | + return tl_perf.bandwidth; |
| 36 | +} |
| 37 | + |
| 38 | +static void |
| 39 | +ucp_proto_single_update_lane(const ucp_proto_single_init_params_t *params, |
| 40 | + ucp_lane_index_t *lane_p) |
| 41 | +{ |
| 42 | + const ucp_proto_common_init_params_t *common_params = ¶ms->super; |
| 43 | + const ucp_proto_init_params_t *init_params = &common_params->super; |
| 44 | + const ucp_context_h context = init_params->worker->context; |
| 45 | + double bandwidth; |
| 46 | + ucp_lane_index_t lanes[UCP_PROTO_MAX_LANES]; |
| 47 | + ucs_sys_device_t sys_devs[UCP_PROTO_MAX_LANES]; |
| 48 | + ucp_lane_index_t num_lanes, num_same_bw_devs, i, lane; |
| 49 | + ucs_sys_device_t sys_dev; |
| 50 | + |
| 51 | + if (!context->config.ext.proto_use_single_net_device || |
| 52 | + /* skip lane update for node_local_id 0 since the original lane would be |
| 53 | + * selected anyway */ |
| 54 | + (context->config.node_local_id == 0)) { |
| 55 | + return; |
| 56 | + } |
| 57 | + |
| 58 | + if (!ucp_proto_common_is_net_dev(init_params, *lane_p)) { |
| 59 | + return; |
| 60 | + } |
| 61 | + |
| 62 | + bandwidth = ucp_proto_single_get_bandwidth(common_params, *lane_p); |
| 63 | + lanes[0] = *lane_p; |
| 64 | + sys_devs[0] = ucp_proto_common_get_sys_dev(init_params, lanes[0]); |
| 65 | + |
| 66 | + num_lanes = ucp_proto_common_find_lanes( |
| 67 | + init_params, common_params->flags, params->lane_type, |
| 68 | + params->tl_cap_flags, UCP_PROTO_MAX_LANES - 1, |
| 69 | + (common_params->exclude_map | UCS_BIT(lanes[0])), |
| 70 | + ucp_proto_common_filter_min_frag, lanes + 1); |
| 71 | + |
| 72 | + for (num_same_bw_devs = 1, i = 1; i < num_lanes; ++i) { |
| 73 | + lane = lanes[i]; |
| 74 | + if (!ucp_proto_common_is_net_dev(init_params, lane)) { |
| 75 | + continue; |
| 76 | + } |
| 77 | + |
| 78 | + if (!ucp_proto_common_bandwidth_equal( |
| 79 | + ucp_proto_single_get_bandwidth(common_params, lane), bandwidth)) { |
| 80 | + continue; |
| 81 | + } |
| 82 | + |
| 83 | + sys_dev = ucp_proto_common_get_sys_dev(init_params, lane); |
| 84 | + if (ucp_proto_common_add_unique_sys_dev(sys_dev, sys_devs, |
| 85 | + &num_same_bw_devs, |
| 86 | + UCP_PROTO_MAX_LANES)) { |
| 87 | + lanes[num_same_bw_devs - 1] = lane; |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + *lane_p = lanes[ucp_proto_common_select_sys_dev_by_node_id(init_params, |
| 92 | + num_same_bw_devs)]; |
| 93 | +} |
20 | 94 |
|
21 | 95 | ucs_status_t ucp_proto_single_init(const ucp_proto_single_init_params_t *params,
|
22 | 96 | ucp_proto_perf_t **perf_p,
|
@@ -47,6 +121,8 @@ ucs_status_t ucp_proto_single_init(const ucp_proto_single_init_params_t *params,
|
47 | 121 |
|
48 | 122 | ucs_assert(num_lanes == 1);
|
49 | 123 |
|
| 124 | + ucp_proto_single_update_lane(params, &lane); |
| 125 | + |
50 | 126 | reg_md_map = ucp_proto_common_reg_md_map(¶ms->super, UCS_BIT(lane));
|
51 | 127 | if (reg_md_map == 0) {
|
52 | 128 | spriv->reg_md = UCP_NULL_RESOURCE;
|
|
0 commit comments