Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 116 additions & 12 deletions src/mctpd.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ enum discovery_state {
};

struct link {
enum discovery_state discovered;
bool published;
int ifindex;
enum endpoint_role role;
Expand All @@ -135,6 +134,14 @@ struct link {
sd_bus_slot *slot_iface;
sd_bus_slot *slot_busowner;

struct {
enum discovery_state flag;
sd_event_source *notify_source;
dest_phys notify_dest;
uint64_t notify_retry_delay;
uint8_t notify_retries_left;
} discovery;

struct ctx *ctx;
};

Expand Down Expand Up @@ -793,8 +800,8 @@ static int handle_control_set_endpoint_id(struct ctx *ctx, int sd,
warnx("ERR: cannot add bus owner to object lists");
}

if (link_data->discovered != DISCOVERY_UNSUPPORTED) {
link_data->discovered = DISCOVERY_DISCOVERED;
if (link_data->discovery.flag != DISCOVERY_UNSUPPORTED) {
link_data->discovery.flag = DISCOVERY_DISCOVERED;
}
resp->status =
SET_MCTP_EID_ASSIGNMENT_STATUS(MCTP_SET_EID_ACCEPTED) |
Expand All @@ -805,13 +812,13 @@ static int handle_control_set_endpoint_id(struct ctx *ctx, int sd,
return reply_message(ctx, sd, resp, resp_len, addr);

case MCTP_SET_EID_DISCOVERED:
if (link_data->discovered == DISCOVERY_UNSUPPORTED) {
if (link_data->discovery.flag == DISCOVERY_UNSUPPORTED) {
resp->completion_code = MCTP_CTRL_CC_ERROR_INVALID_DATA;
resp_len = sizeof(struct mctp_ctrl_resp);
return reply_message(ctx, sd, resp, resp_len, addr);
}

link_data->discovered = DISCOVERY_DISCOVERED;
link_data->discovery.flag = DISCOVERY_DISCOVERED;
resp->status =
SET_MCTP_EID_ASSIGNMENT_STATUS(MCTP_SET_EID_REJECTED) |
SET_MCTP_EID_ALLOCATION_STATUS(MCTP_SET_EID_POOL_NONE);
Expand Down Expand Up @@ -1009,16 +1016,16 @@ static int handle_control_prepare_endpoint_discovery(
resp = (void *)resp;
mctp_ctrl_msg_hdr_init_resp(&resp->ctrl_hdr, *req);

if (link_data->discovered == DISCOVERY_UNSUPPORTED) {
if (link_data->discovery.flag == DISCOVERY_UNSUPPORTED) {
warnx("received prepare for discovery request to unsupported interface %d",
addr->smctp_ifindex);
resp->completion_code = MCTP_CTRL_CC_ERROR_UNSUPPORTED_CMD;
return reply_message_phys(ctx, sd, resp,
sizeof(struct mctp_ctrl_resp), addr);
}

if (link_data->discovered == DISCOVERY_DISCOVERED) {
link_data->discovered = DISCOVERY_UNDISCOVERED;
if (link_data->discovery.flag == DISCOVERY_DISCOVERED) {
link_data->discovery.flag = DISCOVERY_UNDISCOVERED;
warnx("clear discovered flag of interface %d",
addr->smctp_ifindex);
}
Expand Down Expand Up @@ -1053,13 +1060,13 @@ handle_control_endpoint_discovery(struct ctx *ctx, int sd,
return 0;
}

if (link_data->discovered == DISCOVERY_UNSUPPORTED) {
if (link_data->discovery.flag == DISCOVERY_UNSUPPORTED) {
resp->completion_code = MCTP_CTRL_CC_ERROR_INVALID_DATA;
return reply_message(ctx, sd, resp,
sizeof(struct mctp_ctrl_resp), addr);
}

if (link_data->discovered == DISCOVERY_DISCOVERED) {
if (link_data->discovery.flag == DISCOVERY_DISCOVERED) {
// if we are already discovered (i.e, assigned an EID), then no reply
return 0;
}
Expand Down Expand Up @@ -3460,6 +3467,81 @@ static int bus_link_get_prop(sd_bus *bus, const char *path,
return rc;
}

static int query_discovery_notify(struct link *link)
{
struct mctp_ctrl_cmd_discovery_notify req = { 0 };
struct mctp_ctrl_resp_discovery_notify *resp;
struct sockaddr_mctp_ext resp_addr;
size_t buf_size;
uint8_t *buf;
int rc;

mctp_ctrl_msg_hdr_init_req(&req.ctrl_hdr, mctp_next_iid(link->ctx),
MCTP_CTRL_CMD_DISCOVERY_NOTIFY);

rc = endpoint_query_phys(link->ctx, &link->discovery.notify_dest,
MCTP_CTRL_HDR_MSG_TYPE, &req, sizeof(req),
&buf, &buf_size, &resp_addr);
if (rc < 0)
goto free_buf;

if (buf_size != sizeof(*resp)) {
warnx("%s: wrong reply length %zu bytes. dest %s", __func__,
buf_size, dest_phys_tostr(&link->discovery.notify_dest));
rc = -ENOMSG;
goto free_buf;
}

resp = (void *)buf;
if (resp->completion_code != 0) {
warnx("Failure completion code 0x%02x from %s",
resp->completion_code,
dest_phys_tostr(&link->discovery.notify_dest));
rc = -ECONNREFUSED;
goto free_buf;
}

free_buf:
free(buf);
return rc;
}

static int link_discovery_notify_callback(sd_event_source *source,
uint64_t time, void *userdata)
{
struct link *link = userdata;
struct ctx *ctx = link->ctx;
int rc;

// sanity check
assert(link->discovery.notify_source == source);

// Discovery notify succeeded
if (link->discovery.flag == DISCOVERY_DISCOVERED) {
sd_event_source_disable_unref(source);
link->discovery.notify_source = NULL;
return 0;
}

rc = query_discovery_notify(link);
if (rc < 0) {
if (ctx->verbose) {
warnx("failed to send discovery notify at retry %d: %s",
link->discovery.notify_retries_left,
strerror(-rc));
}
}
link->discovery.notify_retries_left -= 1;
if (link->discovery.notify_retries_left == 0) {
warnx("failed to send discovery notify after all retries");
sd_event_source_disable_unref(source);
} else {
sd_event_source_set_time_relative(
source, link->discovery.notify_retry_delay);
}
return 0;
}

static int bus_link_set_prop(sd_bus *bus, const char *path,
const char *interface, const char *property,
sd_bus_message *value, void *userdata,
Expand Down Expand Up @@ -4279,7 +4361,7 @@ static int add_interface(struct ctx *ctx, int ifindex)
if (!link)
return -ENOMEM;

link->discovered = DISCOVERY_UNSUPPORTED;
link->discovery.flag = DISCOVERY_UNSUPPORTED;
link->published = false;
link->ifindex = ifindex;
link->ctx = ctx;
Expand Down Expand Up @@ -4309,7 +4391,29 @@ static int add_interface(struct ctx *ctx, int ifindex)
}

if (phys_binding == MCTP_PHYS_BINDING_PCIE_VDM) {
link->discovered = DISCOVERY_UNDISCOVERED;
link->discovery.flag = DISCOVERY_UNDISCOVERED;
// TODO: These numbers are respectively MN1 and MT4, specified in DSP0239
// control message timing.
//
// Might need to extract these to macros like MCTP_I2C_TSYM_* in this file,
// or a commit to actually centralize those timing at one place, now that
// we have support for detecting link binding type.
link->discovery.notify_retries_left = 2;
link->discovery.notify_retry_delay = 5000000;
Comment on lines +4395 to +4402
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I generally am not happy with this for two reasons:

  • I probably should extract all the control message timing from binding specifications somewhere and have a way to query them.
  • Testing this is painful. Do you have any plans on also mocking time in Python? Like adding mctp_ops.timeout()..., and then use some existing pytest or trio time support.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I'd like to implement time control through the ops interface; it'd just be a get_time() (or similar) callback; mctpd would then use that to set the poll timeouts (as it does currently, just with no callback).

Copy link
Member

@jk-ozlabs jk-ozlabs Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no wait, that might be a bit more complex with the sd_event bits, let me check it out in more detail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if I understand you correctly, but do you mean keeping track of timers inside mctpd itself?

Something like libcurl multi interface, where you decide when to call curl_multi_socket_action (could be repeatedly in a loop, or efficiently by polling I/O outside), and then the library double-checks if time elapsed is enough to trigger some timeouts, or sockets are ready to be read/recv/... Or something like quiche, where there are no I/O inside the core library, the OS-specific wrapper application does all I/O related to time and socket.

I see we are using two timers:

  1. Recovery/Discovery timer: Since we are using callbacks anyway, I assume converting to the above style is not that much of a hassle. Have a list of timeout and callback, and check elapsed time then callback() on our own.

  2. Socket receive timeout: I assume we could sidestep this by adding SO_RCVTIMEO and keep the blocking I/O, at the cost of introducing more paths related to time. Alternately, we could convert everything into callbacks, which... should require major reworks since all paths are now deconstructed into callbacks and state machines. Otherwise, I don't have much experience on how async C should be written (or perhaps... threads/threadpool instead of single thread event loop)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking something more along the (high-level) lines of:

  • when using the test environment, time does not advance automatically
    • so, no timeouts occur at all (!)
  • for tests that need to interact with the time (ie., those that check for timeouts), they explicitly update mctpd's value of the current time - typically expecting the timeout action to occur after doing so

this would be fairly straightforward if we were using poll() (and friends) directly, and could hook at that layer, but i can't (after only some cursory research) seem to see a facility to do this with sd_event, without some awful hacks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that said, our usage of sd_event timers is pretty minimal, maybe I can mock that...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i am currently committing crimes against sd_event


// For PCIe-VDM, we want an all zeroes address for Route-to-Root-Complex.
mctp_nl_hwaddr_len_byindex(
ctx->nl, ifindex,
&link->discovery.notify_dest.hwaddr_len);
memset(link->discovery.notify_dest.hwaddr, 0,
link->discovery.notify_dest.hwaddr_len);
link->discovery.notify_dest.ifindex = ifindex;

sd_event_add_time_relative(ctx->event,
&link->discovery.notify_source,
CLOCK_MONOTONIC, 0, 0,
link_discovery_notify_callback,
link);
}

link->published = true;
Expand Down
34 changes: 32 additions & 2 deletions tests/test_mctpd_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@ async def iface():


@pytest.fixture
async def sysnet(iface):
async def bo(iface):
return Endpoint(iface, bytes([0x10]), eid=8)


@pytest.fixture
async def sysnet(iface, bo):
system = System()
await system.add_interface(iface)
network = Network()
network.add_endpoint(Endpoint(iface, bytes([0x10]), eid=8))
network.add_endpoint(bo)
return Sysnet(system, network)


Expand Down Expand Up @@ -113,12 +118,37 @@ class TestDiscovery:
async def iface(self):
return System.Interface("mctp0", 1, 1, bytes([0x1D]), 68, 254, True, PhysicalBinding.PCIE_VDM)

@pytest.fixture
async def bo(self, iface):
return TestDiscovery.BusOwnerEndpoint(iface, bytes([0x00]), eid=8)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I might need to add support for other routing type here like broadcast, or specific route-to-root-complex type inside Network. Any ideas here? (Currently I'm just using zeroes for the bus owner physical address 😄 )

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could just add a method to Endpoint, something like:

class PCIeEndpoint:
    ...
    def lladdr_matches(self, lladdr):
        return lladdr == self.ROUTE_TO_RC_LLADDR

which, if defined, would allow Network.lookup_endpoint() to match on broadcast/etc targets. Then you'd just use whatever physical-addressing scheme is typical for the transport type.

Would that work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, extending that in Endpoint does look really nice!



class BusOwnerEndpoint(Endpoint):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.sem = trio.Semaphore(initial_value=0)

async def handle_mctp_control(self, sock, addr, data):
print(addr, data)
flags, opcode = data[0:2]
if opcode != 0x0D:
return await super().handle_mctp_control(sock, addr, data)
dst_addr = MCTPSockAddr.for_ep_resp(self, addr, sock.addr_ext)
await sock.send(dst_addr, bytes([flags & 0x1F, opcode, 0x00]))
self.sem.release()


""" Test simple Discovery sequence """
async def test_simple_discovery_sequence(self, dbus, mctpd):
bo = mctpd.network.endpoints[0]

assert len(mctpd.system.addresses) == 0

# BMC should send a Discovery Notify message
with trio.move_on_after(5) as expected:
await bo.sem.acquire()
assert not expected.cancelled_caught

# no EID yet
rsp = await bo.send_control(mctpd.network.mctp_socket, MCTPControlCommand(True, 0, 0x02))
assert rsp.hex(' ') == '00 02 00 00 02 00'
Expand Down