Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 29 additions & 24 deletions src/t8_cmesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include <t8_schemes/t8_scheme.h>

/** Forward pointer reference to hidden cmesh implementation.
* This reference needs to be known by t8_geometry, hence we
* This reference needs to be known by t8_geometry, hence we
* put it before the include. */
typedef struct t8_cmesh *t8_cmesh_t;

Expand Down Expand Up @@ -320,7 +320,7 @@ t8_cmesh_set_attribute_gloidx_array (t8_cmesh_t cmesh, t8_gloidx_t gtree_id, int
* \param [in] face1 The face number of the first tree.
* \param [in] face2 The face number of the second tree.
* \param [in] orientation Specify how face1 and face2 are oriented to each other
*
*
* \note The orientation is defined as:
* Let my_face and other_face be the two face numbers of the connecting trees.
* We chose a main_face from them as follows: Either both trees have the same
Expand Down Expand Up @@ -405,7 +405,7 @@ t8_cmesh_reorder (t8_cmesh_t cmesh, sc_MPI_Comm comm);
/** Register a geometry in the cmesh. The cmesh takes ownership of the geometry.
* \param [in,out] cmesh The cmesh.
* \param [in] geometry The geometry to register.
*
*
* If no geometry is registered and cmesh is modified from another cmesh then
* the other cmesh's geometries are used.
* \note If you need to use \ref t8_cmesh_bcast, then all geometries must be
Expand Down Expand Up @@ -435,18 +435,18 @@ t8_cmesh_commit (t8_cmesh_t cmesh, sc_MPI_Comm comm);

/**
* Save the cmesh to a file with the given fileprefix.
*
*
* \param[in] cmesh The cmesh to save.
* \param[in] fileprefix The prefix of the file to save the cmesh to.
*
*
* \note Currently, it is only legal to save cmeshes that use the linear geometry.
*/
int
t8_cmesh_save (t8_cmesh_t cmesh, const char *fileprefix);

/**
* Load a cmesh from a file.
*
*
* \param[in] filename The name of the file to load the cmesh from.
* \param[in] comm The MPI communicator to use.
*/
Expand All @@ -455,31 +455,36 @@ t8_cmesh_load (const char *filename, sc_MPI_Comm comm);

/**
* Load a cmesh from multiple files and distribute it across the processes.
*
*
* \param[in] fileprefix The prefix of the files to load the cmesh from.
* \param[in] num_files The number of files to load.
* \param[in] comm The MPI communicator to use.
* \param[in] mode The load mode to use, see \ref t8_load_mode_t.
* \param[in] procs_per_node The number of processes per node, only relevant in JUQUEEN mode.
*
*
* \note \a procs_per_node is only relevant in mode==JUQUEEN. If \a num_files = 1 a replicated cmesh is constructed.
*/
t8_cmesh_t
t8_cmesh_load_and_distribute (const char *fileprefix, int num_files, sc_MPI_Comm comm, t8_load_mode_t mode,
int procs_per_node);

/** Check whether a given MPI communicator assigns the same rank and mpisize
* as stored in a cmesh.
/** Check whether a given MPI communicator matches the cmesh communicator.
* \param [in] cmesh The cmesh to be considered.
* \param [in] comm A MPI communicator.
* \return True if mpirank and mpisize from \a comm are the same as
* the values stored in \a cmesh.
* \return True if \a comm is the communicator \a cmesh was commited with.
* False otherwise.
* \a cmesh must be committed before calling this function.
* */
int
t8_cmesh_comm_is_valid (t8_cmesh_t cmesh, sc_MPI_Comm comm);

/** Get the MPI communicator associated to the cmesh
* \param [in] cmesh The cmesh, which must be committed
* \return The MPI communicator provided at the commit of the cmesh
*/
sc_MPI_Comm
t8_cmesh_get_mpicomm (t8_cmesh_t cmesh);

/** Query whether a committed cmesh is partitioned or replicated.
* \param [in] cmesh A committed cmesh.
* \return True if \a cmesh is partitioned.
Expand Down Expand Up @@ -721,7 +726,7 @@ t8_cmesh_get_attribute (const t8_cmesh_t cmesh, const int package_id, const int
* \param [in] key A key used to identify the attribute under all
* attributes of this tree with the same \a package_id.
* \param [in] ltree_id The local number of the tree.
* \param [in] data_count The number of entries in the array that are requested.
* \param [in] data_count The number of entries in the array that are requested.
* This must be smaller or equal to the \a data_count parameter
* of the corresponding call to \ref t8_cmesh_set_attribute_gloidx_array
* \return The attribute pointer of the tree \a ltree_id or NULL if the attribute is not found.
Expand Down Expand Up @@ -761,7 +766,7 @@ t8_cmesh_get_partition_table (t8_cmesh_t cmesh);
* the calling processor anymore. Not computed if NULL.
* \param [out] first_tree_shared If not NULL, 1 or 0 is stored here depending on whether \a first_local_tree is the
* same as \a last_local_tree on the previous process.
* \a cmesh must be committed before calling this function.
* \a cmesh must be committed before calling this function.
*/
void
t8_cmesh_uniform_bounds_equal_element_count (t8_cmesh_t cmesh, const int level, const t8_scheme_c *tree_scheme,
Expand All @@ -770,11 +775,11 @@ t8_cmesh_uniform_bounds_equal_element_count (t8_cmesh_t cmesh, const int level,
int8_t *first_tree_shared);

/**
* Calculate the section of a uniform hybrid forest for the current rank. Needed for hybrid meshes, especially
* Calculate the section of a uniform hybrid forest for the current rank. Needed for hybrid meshes, especially
* meshes where not all elements refine into 1:2^dim manner. The section is calculated without assuming such refinement
* and each process computes its number of elements on the given \a level, communicates the number to other processes,
* and the correct section is computed based on this information.
*
* and the correct section is computed based on this information.
*
* \param [in] cmesh The cmesh to be considered.
* \param [in] level The uniform refinement level to be created.
* \param [in] scheme The element scheme for which to compute the bounds.
Expand All @@ -785,7 +790,7 @@ t8_cmesh_uniform_bounds_equal_element_count (t8_cmesh_t cmesh, const int level,
* the calling process anymore. Not computed if NULL.
* \param [out] first_tree_shared If not NULL, 1 or 0 is stored here depending on whether \a first_local_tree is the
* same as \a last_local_tree on the previous process.
* \param [in] comm The communicator
* \param [in] comm The communicator
*/
void
t8_cmesh_uniform_bounds_for_irregular_refinement (const t8_cmesh_t cmesh, const int level, const t8_scheme_c *scheme,
Expand Down Expand Up @@ -826,7 +831,7 @@ void
t8_cmesh_destroy (t8_cmesh_t *pcmesh);

/** Compute y = ax + b on an array of doubles, interpreting
* each 3 as one vector x
* each 3 as one vector x
* \param[in] coords_in The incoming coordinates of the vectors
* \param[out] coords_out The computed coordinates of the vectors
* \param[in] num_vertices The number of vertices/vectors
Expand All @@ -836,7 +841,7 @@ t8_cmesh_destroy (t8_cmesh_t *pcmesh);
void
t8_cmesh_coords_axb (const double *coords_in, double *coords_out, int num_vertices, double alpha, const double b[3]);

/** Compute y = x + translate on an array of doubles, interpreting
/** Compute y = x + translate on an array of doubles, interpreting
* each 3 as one vector x
* \param[in] coords_in The incoming coordinates of the vectors
* \param[out] coords_out The computed coordinates of the vectors
Expand All @@ -853,11 +858,11 @@ t8_cmesh_new_translate_vertices_to_attributes (const t8_locidx_t *tvertices, con
double *attr_vertices, const int num_vertices);

/**
* \warning This function is only available in debug-modus and should only
* \warning This function is only available in debug-modus and should only
* be used in debug-modus.
*
*
* Prints the vertices of each tree of each process
*
*
* \param[in] cmesh Source-cmesh, which trees get printed.
* \param[in] comm The MPI communicator to use for printing.
*/
Expand All @@ -873,7 +878,7 @@ t8_cmesh_debug_print_trees (const t8_cmesh_t cmesh, sc_MPI_Comm comm);
* bounds[3] = y_max
* bounds[4] = z_min
* bounds[5] = z_max
*
*
* \param [in] cmesh The cmesh to be considered.
* \param [out] bounds The bounding box of the cmesh. If the box is flat (for quads for example, z_min == z_max)
*
Expand Down
65 changes: 31 additions & 34 deletions src/t8_cmesh/t8_cmesh.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -175,16 +175,13 @@ t8_cmesh_validate_geometry (const t8_cmesh_t cmesh)
int
t8_cmesh_comm_is_valid (const t8_cmesh_t cmesh, sc_MPI_Comm comm)
{
int mpiret, mpisize, mpirank;
return cmesh->mpicomm == comm;
}

mpiret = sc_MPI_Comm_rank (comm, &mpirank);
SC_CHECK_MPI (mpiret);
mpiret = sc_MPI_Comm_size (comm, &mpisize);
SC_CHECK_MPI (mpiret);
if (mpisize != cmesh->mpisize || mpirank != cmesh->mpirank) {
return 0;
}
return 1;
sc_MPI_Comm
t8_cmesh_get_mpicomm (const t8_cmesh_t cmesh)
{
return cmesh->mpicomm;
}

void
Expand Down Expand Up @@ -871,7 +868,7 @@ t8_cmesh_bcast (const t8_cmesh_t cmesh_in, const int root, sc_MPI_Comm comm)
SC_CHECK_MPI (mpiret);
if (!meta_info.pre_commit) {
T8_ASSERT (t8_cmesh_is_committed (cmesh_out));
T8_ASSERT (t8_cmesh_comm_is_valid (cmesh_out, comm));
T8_ASSERT (t8_cmesh_get_mpicomm (cmesh_out) == comm);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would still use t8_cmesh_comm_is_valid here, because of the naming

Suggested change
T8_ASSERT (t8_cmesh_get_mpicomm (cmesh_out) == comm);
T8_ASSERT (t8_cmesh_comm_is_valid (cmesh, comm);

}
#endif
return cmesh_out;
Expand Down Expand Up @@ -1245,7 +1242,7 @@ t8_cmesh_reset (t8_cmesh_t *pcmesh)
* This is useful for debugging. */
if (t8_cmesh_is_committed (cmesh)) {
comm = t8_shmem_array_get_comm (cmesh->tree_offsets);
T8_ASSERT (t8_cmesh_comm_is_valid (cmesh, comm));
T8_ASSERT (t8_cmesh_get_mpicomm (cmesh) == comm);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would still use t8_cmesh_comm_is_valid here because of the straighforward naming

Suggested change
T8_ASSERT (t8_cmesh_get_mpicomm (cmesh) == comm);
T8_ASSERT (t8_cmesh_comm_is_valid (cmesh, comm);

Copy link
Contributor Author

@dutkalex dutkalex Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change was proposed because it felt clearer that way, since the term "valid" is kind of vague in this context. I would personally argue for endorsing the == spelling as the canonical way of checking this and deprecating the is_valid spelling, but if you guys feel otherwise I'm not against reverting this either

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just had another look concerning the is_valid function and there is an easily addressable problem:
The cmesh now saves the comm along with the mpirank and mpisize. Then afterwards, is_valid only checks if the already saved comm and the provided comm are the same. The mpisize and rank are not checked. This can lead to possible errors in other routines which create a new cmesh. For example in t8_cmesh_copy ().
This function takes an empty cmesh, a committed cmesh and a communicator. The new cmesh copies everything of the committed cmesh, including the mpisize and rank. Then the provided comm is checked with the new cmesh. In your case this would lead to a segfault, because the copy routine does not fill the comm of the new cmesh. But even if it filled the new one with the provided comm, the is_valid function would return true, even though it was never checked if the comm has the same mpisize and rank as the already saved rank and size. So is_valid is still necessary and should not only check if the comms are the same, but also if the comm is in alignment with the stored mpisize and rank.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that there is indeed room for inconsistency here, which could lead to subtle bugs, so this can't make it into production as-is.
A possible solution is effectively to harden the checks to have this bug class be catched by the debug mode. Although probably effective for debugging purposes, this approach does not prevent the bugs from appearing in the first place, thus implying a (hopefully minor) maintenance burden in the long run.
Another solution could be prevent the inconsistency from appearing by only storing the communicator which already holds all the relevant information. The rank and size could then be queried when necessary using MPI_Comm_rank / MPI_Comm_size. This solution requires a little bit more effort right now, but is arguably superior because it would completely eliminate this class of bugs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sandro-elsweijer While reading the forest implementation, it seems like this mpicomm+mpirank+mpisize pattern could lead to the same kind of issues on the forest side. Maybe this should be addressed in a separate PR then, and your solution is the best strategy for this PR. What do you think?

}
#endif
/* Destroy the shared memory array */
Expand Down Expand Up @@ -1623,7 +1620,7 @@ t8_cmesh_uniform_bounds_equal_element_count (t8_cmesh_t cmesh, const int level,
*child_in_tree_end = (last_global_child - *last_local_tree * children_per_tree);
}
if (is_empty) {
/* This process is empty
/* This process is empty
* We now set the first local tree to the first local tree that is owned by the
* next nonempty rank, and the last local tree to first - 1 */
*first_local_tree = last_global_child / children_per_tree;
Expand All @@ -1645,7 +1642,7 @@ t8_cmesh_uniform_bounds_equal_element_count (t8_cmesh_t cmesh, const int level,
* \param[in] num_procs The total number of processes.
* \param[in] process_offset The offset of the current process in the global numbering.
* \return The rank of the process that will have the element in a uniform partition.
*
*
* \note This function is used standalone and as a callback for vector splitting
*/
static size_t
Expand Down Expand Up @@ -1753,7 +1750,7 @@ t8_cmesh_uniform_bounds_from_unpartioned (const t8_cmesh_t cmesh, const t8_gloid
// Loop over all trees to find the ones containing the first and last element of this process
for (t8_gloidx_t igtree = 0; igtree < num_trees; ++igtree) {
const t8_eclass_t tree_class = t8_cmesh_get_tree_class (cmesh, (t8_locidx_t) igtree);
/* TODO: We can optimize by buffering the num_leaf_elems_in_tree value. Thus, if
/* TODO: We can optimize by buffering the num_leaf_elems_in_tree value. Thus, if
the computation is expensive (may be for non-morton-type schemes),
we do it only once. */
const t8_gloidx_t num_leaf_elems_in_tree = scheme->count_leaves_from_root (tree_class, level);
Expand Down Expand Up @@ -1795,9 +1792,9 @@ t8_cmesh_uniform_bounds_from_unpartioned (const t8_cmesh_t cmesh, const t8_gloid
}

/**
* Send the start or end message to the process given by iproc. The message contains the global id of the first/last tree and
* Send the start or end message to the process given by iproc. The message contains the global id of the first/last tree and
* the global id of the first/last element in the tree.
*
*
* \param[in] cmesh The cmesh.
* \param[in] start_message If true, send the start message, otherwise send the end message.
* \param[in] proc_is_empty If true, the process the message will be send to is empty.
Expand All @@ -1809,13 +1806,13 @@ t8_cmesh_uniform_bounds_from_unpartioned (const t8_cmesh_t cmesh, const t8_gloid
* \param[in] current_pos_in_send_buffer The current position in the send buffer.
* \param[in] first_or_last_element_in_tree_index_of_iproc The tree-local id of the first/last element in the tree. If we send an end message, this is the tree-local index of the last tree on this process.
* \param[in, out] first_or_last_local_tree The global id of the first/last tree of the current process.
* \param[in, out] first_tree_shared The first tree shared flag. Only used if we send the start message. Set to NULL if not used.
* \param[in, out] first_tree_shared The first tree shared flag. Only used if we send the start message. Set to NULL if not used.
* \param[in, out] child_in_tree_end_or_begin The tree-local id of the first/last element in the tree. Set to NULL if not used.
* \param[in, out] expect_start_or_end_message If true, we expect a start or end message from the process.
* \param[in] comm The MPI communicator.
* \param[in, out] num_received_start_or_end_messages The number of received start or end messages. Only used if T8_ENABLE_DEBUG is defined.
* \param[in, out] num_message_sent The number of sent messages. Only used if T8_ENABLE_DEBUG is defined.
*
*
*/
static void
t8_cmesh_bounds_send_start_or_end (const t8_cmesh_t cmesh, const bool start_message, const bool proc_is_empty,
Expand Down Expand Up @@ -1865,7 +1862,7 @@ t8_cmesh_bounds_send_start_or_end (const t8_cmesh_t cmesh, const bool start_mess
}
}
if (child_in_tree_end_or_begin != NULL) {
/* If we send the last element add 1 to the id. During later processing of the cmesh we iterate
/* If we send the last element add 1 to the id. During later processing of the cmesh we iterate
* as long as ielement < child_in_tree_end. Therefore we have to shift by one. */
*child_in_tree_end_or_begin = first_or_last_element_in_tree_index_of_iproc;
}
Expand All @@ -1875,8 +1872,8 @@ t8_cmesh_bounds_send_start_or_end (const t8_cmesh_t cmesh, const bool start_mess
}

/**
* Receive a start or end message. Which is defined by the \a start flag.
*
* Receive a start or end message. Which is defined by the \a start flag.
*
* \param[in] start If true, receive the start message, otherwise receive the end message.
* \param[in, out] first_last_local_tree On Input empty but allocated, on output the global id of the first or last tree of the current process.
* \param[in, out] child_in_tree_begin_end On Input empty but allocated, on output the global id of the first or last element in the tree of the current process.
Expand Down Expand Up @@ -1925,13 +1922,13 @@ recv_message (const bool start, t8_gloidx_t *first_last_local_tree, t8_gloidx_t
}

/**
* Find the bounds of a value in a given offset array. Used for a binary search to find the
* Find the bounds of a value in a given offset array. Used for a binary search to find the
* rank that contains the value.
*
*
* \param[in] array The offset array.
* \param[in] guess The index to start searching from.
* \param[in] value The value to find the bounds for.
*
*
* \return 0 if the value is within the bounds, -1 if it is below the bounds, and 1 otherwise.
*/
inline int
Expand Down Expand Up @@ -2001,7 +1998,7 @@ t8_cmesh_uniform_bounds_from_partition (const t8_cmesh_t cmesh, const t8_gloidx_
first_element_tree[0] = t8_shmem_array_get_gloidx (offset_array, cmesh->mpirank);

/* Compute the first element in every pure local tree.
* This array stores for each tree the global element index offset.
* This array stores for each tree the global element index offset.
* Example: 2 local trees, each has 8 Elements. First element index 12: | 12 | 20 | 28 | */
for (t8_locidx_t itree = 0; itree < num_pure_local_trees; ++itree, ++igtree) {
const t8_eclass_t tree_class = t8_cmesh_get_tree_class (cmesh, igtree);
Expand Down Expand Up @@ -2071,7 +2068,7 @@ t8_cmesh_uniform_bounds_from_partition (const t8_cmesh_t cmesh, const t8_gloidx_
belongs to send_first+i.
If no such tree exists, then the index of the previous process is stored.

Examples:
Examples:
We describe the situation via
Proc i needs tree first local_tree_id, with i as the index in send_first + i.

Expand Down Expand Up @@ -2125,7 +2122,7 @@ t8_cmesh_uniform_bounds_from_partition (const t8_cmesh_t cmesh, const t8_gloidx_
const t8_locidx_t possibly_first_puretree_of_next_proc = tree_offsets_partition[iproc + 1 - send_first];
const t8_gloidx_t first_el_index_of_first_tree = first_element_tree[possibly_first_puretree_of_iproc];
if (first_element_index_of_iproc >= first_element_tree[num_pure_local_trees]) {
/* We do not send to this process iproc at all. Its first element is in a tree that belongs
/* We do not send to this process iproc at all. Its first element is in a tree that belongs
* to the next process. */
send_start_message = send_end_message = false;
first_puretree_of_iproc = -1;
Expand All @@ -2151,11 +2148,11 @@ t8_cmesh_uniform_bounds_from_partition (const t8_cmesh_t cmesh, const t8_gloidx_
}
}
/* Compute the last tree of this proc and whether we need to send an end message to it. */
/* We know
*
/* We know
*
* possibly_first_puretree_of_next_proc - The tree whose first element lies on the next process.
*
*
*
*
* If the next process is empty, then possibly_first_puretree_of_next_proc = possibly_first_puretree_of_iproc
* and this is our last tree.
*/
Expand Down Expand Up @@ -2237,7 +2234,7 @@ t8_cmesh_uniform_bounds_from_partition (const t8_cmesh_t cmesh, const t8_gloidx_
send_start_message = first_child_next_non_empty < first_element_tree[num_pure_local_trees];
if (send_start_message) {
/* We might have detected a larger range of empty processes. We directly send to this range to avoid a recomputation
* of this information. We have to take into account, that in the last iteration of the above do-while-loop
* of this information. We have to take into account, that in the last iteration of the above do-while-loop
* next_non_empty_proc has been added up by one again, so this loop goes [iproc, next_non_empty_proc - 1). */
for (t8_gloidx_t iempty_proc = iproc; iempty_proc < next_non_empty_proc; ++iempty_proc) {
t8_cmesh_bounds_send_start_or_end (cmesh, send_start_message, proc_is_empty, first_puretree_of_iproc,
Expand All @@ -2259,7 +2256,7 @@ t8_cmesh_uniform_bounds_from_partition (const t8_cmesh_t cmesh, const t8_gloidx_
/*
*
* MPI Communication for non-empty processes starts here
*
*
*/

/* Post the start message: We send the first tree id of the process
Expand Down Expand Up @@ -2377,7 +2374,7 @@ t8_cmesh_uniform_bounds_for_irregular_refinement (const t8_cmesh_t cmesh, const
int8_t *first_tree_shared, sc_MPI_Comm comm)
{
T8_ASSERT (cmesh != NULL);
/* TODO: Clean up size_t and gloidx_t data types, ensure that each variables has the
/* TODO: Clean up size_t and gloidx_t data types, ensure that each variables has the
* matching type. */

t8_debugf ("Into t8_cmesh_uniform_bounds_for_irregular_refinement.\n");
Expand Down
Loading
Loading