Skip to content

Commit 6b2e361

Browse files
authored
Merge pull request #10823 from arun-chandran-edarath/fix_10761
UCT/MM: Fix the FIFO room calculation for tail > head
2 parents aaea0a8 + c4b647f commit 6b2e361

File tree

4 files changed

+110
-16
lines changed

4 files changed

+110
-16
lines changed

src/uct/sm/mm/base/mm_ep.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,6 @@ typedef enum {
2424
UCT_MM_SEND_AM_SHORT_IOV
2525
} uct_mm_send_op_t;
2626

27-
28-
/* Check if the resources on the remote peer are available for sending to it.
29-
* i.e. check if the remote receive FIFO has room in it.
30-
* return 1 if can send.
31-
* return 0 if can't send.
32-
* Ignore the event arm bit after the subtraction to accommodate
33-
* a) A head ARMED with UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED bit
34-
* b) head wrapping around after 0x7fff ffff ffff ffff and
35-
* tail going beyond 0x7fff ffff ffff ffff, in this case the subtraction
36-
* will wrap around, this scenario is highly unlikely.
37-
*/
38-
#define UCT_MM_EP_IS_ABLE_TO_SEND(_head, _tail, _fifo_size) \
39-
ucs_likely((((_head) - (_tail)) & ~UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED) \
40-
< (uint64_t)(_fifo_size))
41-
4227
static UCS_F_NOINLINE ucs_status_t
4328
uct_mm_ep_attach_remote_seg(uct_mm_ep_t *ep, uct_mm_seg_id_t seg_id,
4429
size_t length, void **address_p)

src/uct/sm/mm/base/mm_ep.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
33
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2019. ALL RIGHTS RESERVED.
44
* Copyright (C) ARM Ltd. 2016. ALL RIGHTS RESERVED.
5+
* Copyright (C) Advanced Micro Devices, Inc. 2025. ALL RIGHTS RESERVED.
56
* See file LICENSE for terms.
67
*/
78

@@ -17,6 +18,25 @@
1718
KHASH_INIT(uct_mm_remote_seg, uintptr_t, uct_mm_remote_seg_t, 1,
1819
kh_int64_hash_func, kh_int64_hash_equal)
1920

21+
/*
22+
* Check if the remote receive FIFO has room.
23+
* Returns 1 if can send, 0 otherwise.
24+
*
25+
* Logic (ignore UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED on head, compare signed delta):
26+
* - Compute s = (int64_t)(((uint64_t)_head & ~UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED) -
27+
* (uint64_t)_tail)
28+
* - Room available iff s < (int64_t)_fifo_size
29+
*
30+
* Practical note (head counter runtime): We assume the head counter
31+
* increments once every 1 ns. On this timescale, the signed63 midpoint (2^62)
32+
* is ~4.61e18 ticks (~146 years). Over 5 years, head would advance by
33+
* ~1.5768e17 ticks (~3.4% of that midpoint), which is far from any wraparound
34+
* edge case.
35+
*/
36+
#define UCT_MM_EP_IS_ABLE_TO_SEND(_head, _tail, _fifo_size) \
37+
(((int64_t)(((_head) & ~UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED) - \
38+
(_tail))) < (int64_t)(_fifo_size))
39+
2040

2141
/**
2242
* MM transport endpoint

test/gtest/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (C) UT-Battelle, LLC. 2015. ALL RIGHTS RESERVED.
44
# Copyright (C) The University of Tennessee and the University of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
55
# Copyright (C) Los Alamos National Security, LLC. 2018 ALL RIGHTS RESERVED.
6-
# Copyright (C) Advanced Micro Devices, Inc. 2019. ALL RIGHTS RESERVED.
6+
# Copyright (C) Advanced Micro Devices, Inc. 2025. ALL RIGHTS RESERVED.
77
# Copyright (C) ARM Ltd. 2020. ALL RIGHTS RESERVED.
88
# Copyright (C) NextSilicon Ltd. 2021. ALL RIGHTS RESERVED.
99
#
@@ -118,6 +118,7 @@ gtest_SOURCES = \
118118
uct/test_many2one_am.cc \
119119
uct/test_md.cc \
120120
uct/test_mm.cc \
121+
uct/sm/mm/test_mm_fifo_room.cc \
121122
uct/test_mem.cc \
122123
uct/test_p2p_am.cc \
123124
uct/test_p2p_err.cc \
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/**
2+
* Copyright (C) Advanced Micro Devices, Inc. 2025. ALL RIGHTS RESERVED.
3+
* See file LICENSE for terms.
4+
*/
5+
#include <common/test.h>
6+
#include <cstdint>
7+
#include "uct/sm/mm/base/mm_iface.h"
8+
#include "uct/sm/mm/base/mm_ep.h"
9+
10+
namespace {
11+
12+
static constexpr uint64_t EA = UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED;
13+
14+
/* Practical upper bound for head/tail in real deployments: 2^62.
15+
* Rationale: head advances roughly once per nanosecond in the worst case.
16+
* The signed-63 midpoint is 2^62 (~4.61e18). At 1 tick/ns, reaching 2^62
17+
* takes ~146 years. Therefore tests cap counters at <= 2^62 to reflect
18+
* realistic long-running processes while still exercising wrap-related logic.
19+
*/
20+
static constexpr uint64_t LIM = 1ull << 62;
21+
/* 2^31 constant to guard against regressions to 32-bit comparisons */
22+
static constexpr uint64_t POW2_31 = 1ull << 31;
23+
24+
struct case_item {
25+
const char *name;
26+
uint64_t head;
27+
uint64_t tail;
28+
unsigned fifo;
29+
bool expect;
30+
};
31+
32+
static const case_item k_cases[] = {
33+
/* head < tail */
34+
{"lt:<fifo", 512, 600, 256, true},
35+
{"lt:==fifo", 512, 768, 256, true},
36+
{"lt:>fifo", 512, 900, 256, true},
37+
{"EA lt:<fifo", EA | 512, 600, 256, true},
38+
{"EA lt:==fifo", EA | 512, 768, 256, true},
39+
{"EA lt:>fifo", EA | 512, 900, 256, true},
40+
41+
/* head > tail */
42+
{"gt:<fifo", 100, 0, 256, true},
43+
{"gt:==fifo", 256, 0, 256, false},
44+
{"gt:>fifo", 300, 0, 256, false},
45+
{"EA gt:<fifo", EA | 100, 0, 256, true},
46+
{"EA gt:==fifo", EA | 256, 0, 256, false},
47+
{"EA gt:>fifo", EA | 300, 0, 256, false},
48+
49+
/* Large deltas around 2^31 to catch regressions to 32-bit compare */
50+
{"gt:d=2^31-1@t0", POW2_31 - 1ull, 0, 256, false},
51+
{"gt:d=2^31@t0", POW2_31, 0, 256, false},
52+
{"gt:d=2^31+1@t0", POW2_31 + 1ull, 0, 256, false},
53+
{"EA gt:d=2^31@t0", EA | POW2_31, 0, 256, false},
54+
55+
/* head == tail */
56+
{"eq:zero", 0, 0, 256, true},
57+
{"eq:EA", EA, 0, 256, true},
58+
59+
/* Around 2^62 boundaries (head < tail deltas) */
60+
{"lt:2^62-1", 512, 512 + LIM - 1, 256, true},
61+
{"lt:2^62", 512, 512 + LIM, 256, true},
62+
{"lt:2^62+1", 512, 512 + LIM + 1, 256, true},
63+
64+
/* Special tail at MSB (robustness) */
65+
{"tailEA:+255", 0xff, EA, 256, true},
66+
{"tailEA:+256", 0x100, EA, 256, true},
67+
68+
/* Practical cap at 2^62 */
69+
{"cap:eq", LIM, LIM, 256, true},
70+
{"cap:eq EA", EA | LIM, LIM, 256, true}
71+
};
72+
73+
}
74+
75+
class test_mm_fifo_room : public ucs::test {
76+
protected:
77+
void check_case(const case_item &c) {
78+
bool got = UCT_MM_EP_IS_ABLE_TO_SEND(c.head, c.tail, c.fifo);
79+
EXPECT_EQ(c.expect, got) << c.name;
80+
}
81+
};
82+
83+
UCS_TEST_F(test_mm_fifo_room, predicate_matrix) {
84+
for (const auto &c : k_cases) {
85+
check_case(c);
86+
}
87+
88+
}

0 commit comments

Comments
 (0)