Skip to content

Commit 3842a65

Browse files
authored
Merge pull request #10686 from amastbaum/mpi_comm_split_performance_issue
UCP/PROTO: Increase latency factor in send-zcopy protocols only for fast completions
2 parents 9878cfa + ef3c110 commit 3842a65

File tree

2 files changed

+28
-26
lines changed

2 files changed

+28
-26
lines changed

src/ucp/proto/proto_init.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ ucp_proto_init_add_tl_perf(const ucp_proto_common_init_params_t *params,
170170

171171
/* Send time is representing request completion, which in case of zcopy
172172
waits for ACK from remote side. */
173-
if (params->flags & UCP_PROTO_COMMON_INIT_FLAG_SEND_ZCOPY) {
173+
if ((op_attr_mask & UCP_OP_ATTR_FLAG_FAST_CMPL) &&
174+
(params->flags & UCP_PROTO_COMMON_INIT_FLAG_SEND_ZCOPY)) {
174175
perf_factors[UCP_PROTO_PERF_FACTOR_LATENCY].c += tl_perf->latency;
175176
}
176177

test/gtest/ucp/test_ucp_proto_mock.cc

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -560,10 +560,10 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane, "IB_NUM_PATHS?=1",
560560
/* Prefer mock_0:1 iface for RNDV because it has larger BW */
561561
check_ep_config(sender(), {
562562
{0, 200, "short", "rc_mlx5/mock_1:1"},
563-
{201, 6650, "copy-in", "rc_mlx5/mock_1:1"},
564-
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1"},
565-
{8247, 21991, "multi-frag zero-copy", "rc_mlx5/mock_1:1"},
566-
{21992, INF, "rendezvous zero-copy read from remote",
563+
{201, 404, "copy-in", "rc_mlx5/mock_1:1"},
564+
{405, 8246, "zero-copy", "rc_mlx5/mock_1:1"},
565+
{8247, 21145, "multi-frag zero-copy", "rc_mlx5/mock_1:1"},
566+
{21146, INF, "rendezvous zero-copy read from remote",
567567
"rc_mlx5/mock_0:1"},
568568
}, key);
569569
}
@@ -577,10 +577,10 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, zero_rndv_perf_diff, "IB_NUM_PATHS?=1",
577577

578578
check_ep_config(sender(), {
579579
{0, 200, "short", "rc_mlx5/mock_1:1"},
580-
{201, 6650, "copy-in", "rc_mlx5/mock_1:1"},
581-
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1"},
582-
{8247, 22502, "multi-frag zero-copy", "rc_mlx5/mock_1:1"},
583-
{22503, INF, "rendezvous zero-copy read from remote",
580+
{201, 404, "copy-in", "rc_mlx5/mock_1:1"},
581+
{405, 8246, "zero-copy", "rc_mlx5/mock_1:1"},
582+
{8247, 21563, "multi-frag zero-copy", "rc_mlx5/mock_1:1"},
583+
{21564, INF, "rendezvous zero-copy read from remote",
584584
"rc_mlx5/mock_0:1"},
585585
}, key);
586586
}
@@ -595,10 +595,10 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, "IB_NUM_PATHS?=2",
595595
/* The optimal RNDV config must use mock_0:1 and mock_1:1 proportionally. */
596596
check_ep_config(sender(), {
597597
{0, 200, "short", "rc_mlx5/mock_1:1/path0"},
598-
{201, 6650, "copy-in", "rc_mlx5/mock_1:1/path0"},
599-
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1/path0"},
600-
{8247, 19883, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"},
601-
{19884, INF, "rendezvous zero-copy read from remote",
598+
{201, 404, "copy-in", "rc_mlx5/mock_1:1/path0"},
599+
{405, 8246, "zero-copy", "rc_mlx5/mock_1:1/path0"},
600+
{8247, 19149, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"},
601+
{19150, INF, "rendezvous zero-copy read from remote",
602602
"47% on rc_mlx5/mock_1:1/path0 and 53% on rc_mlx5/mock_0:1/path0"},
603603
}, key);
604604
}
@@ -618,12 +618,13 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_4_paths,
618618

619619
/* All existing IB paths should be selected. */
620620
check_ep_config(sender(), {
621-
{1, 5418, "rendezvous fragmented copy-in copy-out",
621+
{1, 477, "rendezvous fragmented copy-in copy-out",
622622
"rc_mlx5/mock_1:1/path0"},
623-
{5419, 283699, "rendezvous zero-copy read from remote",
623+
{478, 3813, "rendezvous zero-copy", "rc_mlx5/mock_1:1/path0"},
624+
{3814, 283699, "rendezvous zero-copy read from remote",
624625
"12% on rc_mlx5/mock_1:1/path0, 14% on rc_mlx5/mock_0:1/path0, "
625626
"14% on rc_mlx5/mock_0:1/path1, 12% on rc_mlx5/mock_1:1/path1, 14%"},
626-
{283700, INF, "rendezvous zero-copy fenced write to remote",
627+
{283700, INF, "rendezvous zero-copy fenced write to remote",
627628
"12% on rc_mlx5/mock_1:1/path0, 14% on rc_mlx5/mock_0:1/path0, "
628629
"14% on rc_mlx5/mock_0:1/path1, 12% on rc_mlx5/mock_1:1/path1, 14%"},
629630
}, key);
@@ -667,9 +668,9 @@ UCS_TEST_P(test_ucp_proto_mock_rcx2, rndv_send_recv_small_frag,
667668
key.param.op_attr = 0;
668669

669670
check_ep_config(sender(), {
670-
{1, 3724, "rendezvous fragmented copy-in copy-out",
671+
{1, 433, "rendezvous fragmented copy-in copy-out",
671672
"rc_mlx5/mock_0:1/path0"},
672-
{3725, INF, "rendezvous zero-copy read from remote",
673+
{434, INF, "rendezvous zero-copy read from remote",
673674
"54% on rc_mlx5/mock_0:1/path0 and 46% on rc_mlx5/mock_1:1/path0"},
674675
}, key);
675676

@@ -716,8 +717,8 @@ UCS_TEST_P(test_ucp_proto_mock_rcx3, single_lane_no_zcopy,
716717

717718
/* Check that get_zcopy is selected on slower device */
718719
check_ep_config(sender(), {
719-
{1, 3662, "rendezvous fragmented copy-in copy-out", "rc_mlx5/mock_0:1"},
720-
{3663, 53753, "rendezvous zero-copy read from remote", "rc_mlx5/mock_1:1"},
720+
{1, 94, "rendezvous fragmented copy-in copy-out", "rc_mlx5/mock_0:1"},
721+
{95, 53753, "rendezvous zero-copy read from remote", "rc_mlx5/mock_1:1"},
721722
{53754, INF, "rendezvous zero-copy fenced write to remote",
722723
"54% on rc_mlx5/mock_0:1 and 46% on rc_mlx5/mock_1:1"},
723724
}, key);
@@ -747,8 +748,8 @@ UCS_TEST_P(test_ucp_proto_mock_cma, am_send_1_lane)
747748

748749
check_ep_config(sender(), {
749750
{0, 92, "short", "posix/memory"},
750-
{93, 5345, "copy-in", "posix/memory"},
751-
{5346, INF, "rendezvous zero-copy read from remote", "cma/mock"},
751+
{93, 5028, "copy-in", "posix/memory"},
752+
{5029, INF, "rendezvous zero-copy read from remote", "cma/mock"},
752753
}, key);
753754
}
754755

@@ -781,10 +782,10 @@ UCS_TEST_P(test_ucp_proto_mock_tcp, am_send_1_lane)
781782
key.param.op_attr = 0;
782783

783784
check_ep_config(sender(), {
784-
{0, 8184, "short", "tcp/mock"},
785-
{8185, 65528, "zero-copy", "tcp/mock"},
786-
{65529, 366864, "multi-frag zero-copy", "tcp/mock"},
787-
{366865, INF, "rendezvous zero-copy fenced write to remote", "tcp/mock"},
785+
{0, 0, "short", "tcp/mock"},
786+
{1, 65528, "zero-copy", "tcp/mock"},
787+
{65529, 367108, "multi-frag zero-copy", "tcp/mock"},
788+
{367109, INF, "rendezvous zero-copy fenced write to remote", "tcp/mock"},
788789
}, key);
789790
}
790791

0 commit comments

Comments
 (0)