21
21
22
22
BEGIN_C_DECLS
23
23
24
+ /* Tracks ongoing xcast operations to ensure all messages are delivered exactly
25
+ * once to all daemons even in the presence of daemon failures */
26
+ typedef struct {
27
+ pmix_object_t super ;
28
+ // list of ongoing operations, defined in grpcomm_direct_xcast.c
29
+ pmix_list_t ops ;
30
+ // list of operations sent to HNP to be started, but not seen since
31
+ pmix_list_t pending_ops ;
32
+ // global op id of the last known completed (in our subtree) operation
33
+ size_t op_id_completed ;
34
+ // global op id of what was completed (in our subtree) when we were last
35
+ // promoted (meaning our subtree grew, so we can't assume completion in the
36
+ // new subtree)
37
+ size_t op_id_completed_at_promotion ;
38
+ // local op id of the last op generated here
39
+ size_t op_id_local ;
40
+ // used by HNP to assign global op id
41
+ size_t op_id_global ;
42
+ } prte_grpcomm_xcast_t ;
43
+ PRTE_MODULE_EXPORT PMIX_CLASS_DECLARATION (prte_grpcomm_xcast_t );
44
+
24
45
/*
25
46
* Grpcomm interfaces
26
47
*/
27
-
28
48
typedef struct {
29
- prte_grpcomm_base_component_t super ;
30
- // track ongoing fence operations - list of prte_grpcomm_fence_t
31
- pmix_list_t fence_ops ;
32
- // track ongoiong group operations - list of prte_grpcomm_group_t
33
- pmix_list_t group_ops ;
49
+ prte_grpcomm_base_component_t super ;
50
+ prte_grpcomm_xcast_t xcast_ops ;
51
+ // track ongoing fence operations - list of prte_grpcomm_fence_t
52
+ pmix_list_t fence_ops ;
53
+ // track ongoiong group operations - list of prte_grpcomm_group_t
54
+ pmix_list_t group_ops ;
34
55
} prte_grpcomm_direct_component_t ;
35
56
36
57
PRTE_MODULE_EXPORT extern prte_grpcomm_direct_component_t prte_mca_grpcomm_direct_component ;
@@ -65,7 +86,6 @@ typedef struct {
65
86
} prte_grpcomm_direct_group_signature_t ;
66
87
PRTE_MODULE_EXPORT PMIX_CLASS_DECLARATION (prte_grpcomm_direct_group_signature_t );
67
88
68
-
69
89
/* Internal component object for tracking ongoing
70
90
* allgather operations */
71
91
typedef struct {
@@ -169,6 +189,14 @@ void prte_grpcomm_direct_xcast_recv(int status, pmix_proc_t *sender,
169
189
pmix_data_buffer_t * buffer ,
170
190
prte_rml_tag_t tg , void * cbdata );
171
191
192
+ PRTE_MODULE_EXPORT extern
193
+ void prte_grpcomm_direct_xcast_ack (int status , pmix_proc_t * sender ,
194
+ pmix_data_buffer_t * buffer ,
195
+ prte_rml_tag_t tg , void * cbdata );
196
+
197
+ PRTE_MODULE_EXPORT extern
198
+ void prte_grpcomm_direct_xcast_fault_handler (const prte_rml_recovery_status_t * status );
199
+
172
200
/* fence functions */
173
201
PRTE_MODULE_EXPORT extern
174
202
int prte_grpcomm_direct_fence (const pmix_proc_t procs [], size_t nprocs ,
@@ -185,6 +213,8 @@ void prte_grpcomm_direct_fence_release(int status, pmix_proc_t *sender,
185
213
pmix_data_buffer_t * buffer ,
186
214
prte_rml_tag_t tag , void * cbdata );
187
215
216
+ PRTE_MODULE_EXPORT extern
217
+ void prte_grpcomm_direct_fence_fault_handler (const prte_rml_recovery_status_t * status );
188
218
189
219
/* group functions */
190
220
PRTE_MODULE_EXPORT extern
@@ -193,6 +223,9 @@ int prte_grpcomm_direct_group(pmix_group_operation_t op, char *grpid,
193
223
const pmix_info_t directives [], size_t ndirs ,
194
224
pmix_info_cbfunc_t cbfunc , void * cbdata );
195
225
226
+ PRTE_MODULE_EXPORT extern
227
+ void prte_grpcomm_direct_group_fault_handler (const prte_rml_recovery_status_t * status );
228
+
196
229
#if PMIX_NUMERIC_VERSION >= 0x00060000
197
230
198
231
PRTE_MODULE_EXPORT extern
0 commit comments