Skip to content

Commit 2ab02ac

Browse files
authored
Fix hangs while quering HW context report (#741) (#747)
Fix hangs while quering HW context report on platforms that do not support app health. Signed-off-by: Nishad Saraf <[email protected]>
1 parent 88d10a8 commit 2ab02ac

File tree

3 files changed

+16
-7
lines changed

3 files changed

+16
-7
lines changed

src/driver/amdxdna/aie2_message.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,19 @@ int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, struct aie2_mgmt_
685685
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
686686
}
687687

688+
void aie2_reset_app_health_report(struct app_health_report *r)
689+
{
690+
if (!r)
691+
return;
692+
693+
r->fatal_info.exception_type = AIE2_APP_HEALTH_RESET_FATAL_INFO;
694+
r->fatal_info.exception_pc = AIE2_APP_HEALTH_RESET_FATAL_INFO;
695+
r->fatal_info.app_module = AIE2_APP_HEALTH_RESET_FATAL_INFO;
696+
r->fatal_info.fatal_type = AIE2_APP_HEALTH_RESET_FATAL_INFO;
697+
r->txn_op_id = AIE2_APP_HEALTH_RESET_TXN_OP_ID;
698+
r->ctx_pc = AIE2_APP_HEALTH_RESET_CTX_PC;
699+
}
700+
688701
int aie2_get_app_health(struct amdxdna_dev_hdl *ndev, struct aie2_mgmt_dma_hdl *mgmt_hdl,
689702
u32 context_id, u32 size)
690703
{

src/driver/amdxdna/aie2_pci.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,14 +1285,9 @@ static int aie2_query_ctx_status_array(struct amdxdna_client *client,
12851285
ctx->priv->id, sizeof(*r));
12861286
mutex_unlock(&xdna->dev_handle->aie2_lock);
12871287
if (ret)
1288-
return ret;
1288+
aie2_reset_app_health_report(r);
12891289
} else {
1290-
r->fatal_info.exception_type = AIE2_APP_HEALTH_RESET_FATAL_INFO;
1291-
r->fatal_info.exception_pc = AIE2_APP_HEALTH_RESET_FATAL_INFO;
1292-
r->fatal_info.app_module = AIE2_APP_HEALTH_RESET_FATAL_INFO;
1293-
r->fatal_info.fatal_type = AIE2_APP_HEALTH_RESET_FATAL_INFO;
1294-
r->txn_op_id = AIE2_APP_HEALTH_RESET_TXN_OP_ID;
1295-
r->ctx_pc = AIE2_APP_HEALTH_RESET_CTX_PC;
1290+
aie2_reset_app_health_report(r);
12961291
}
12971292

12981293
tmp[hw_i].fatal_error_exception_type = r->fatal_info.exception_type;

src/driver/amdxdna/aie2_pci.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ int aie2_query_aie_telemetry(struct amdxdna_dev_hdl *ndev, struct aie2_mgmt_dma_
506506
u32 type, u32 size, struct aie_version *version);
507507
int aie2_get_app_health(struct amdxdna_dev_hdl *ndev, struct aie2_mgmt_dma_hdl *mgmt_hdl,
508508
u32 context_id, u32 size);
509+
void aie2_reset_app_health_report(struct app_health_report *r);
509510
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
510511
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
511512
int aie2_query_aie_firmware_version(struct amdxdna_dev_hdl *ndev,

0 commit comments

Comments
 (0)