CHERIoT-Platform · hlef · Dec 2, 2024 · Dec 3, 2024 · Dec 3, 2024 · Dec 4, 2024
diff --git a/examples/05.HTTP_SERVER/http_server.cc b/examples/05.HTTP_SERVER/http_server.cc
@@ -196,6 +196,11 @@ void __cheri_compartment("http_server_example") example()
 		if (retries == 0)
 		{
 			Debug::log("Failed to close the listening socket.");
+
+			// We have to stop now - if we cannot close the
+			// listening socket, we will not be able to bind onto
+			// the server port anymore. This is bad!
+			break;
 		}
 	}
 

diff --git a/lib/tcpip/FreeRTOS_IP_wrapper.c b/lib/tcpip/FreeRTOS_IP_wrapper.c
@@ -21,20 +21,8 @@ void ip_thread_start(void);
 /**
  * Flag used to synchronize the network stack thread and user threads at
  * startup.
- *
- * This should not be reset by the error handler and is reset-critical.
- *
- * Note (which also applies to other reset-critical data): ultimately we should
- * move this to a separate "network stack TCB" compartment to be able to reset
- * all the state of this compartment unconditionally by re-zeroing the BSS and
- * resetting .data from snapshots.
  */
-static uint32_t threadEntryGuard;
-
-/**
- * Store the thread ID of the TCP/IP thread for use in the error handler.
- */
-uint16_t networkThreadID;
+uint32_t threadEntryGuard;
 
 /**
  * Global lock acquired by the IP thread at startup time. In normal running
@@ -112,8 +100,6 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 {
 	FreeRTOS_printf(("ip_thread_entry\n"));
 
-	networkThreadID = thread_id_get();
-
 	while (1)
 	{
 		CHERIOT_DURING
@@ -125,22 +111,7 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 				futex_wait(&threadEntryGuard, 0);
 			}
 
-			// Reset the guard now: we will only ever re-iterate
-			// the loop in the case of a network stack reset, in
-			// which case we want to wait again for a call to
-			// `ip_thread_start`.
-			//
-			// Note that we cannot reset this in `ip_cleanup`,
-			// because that would overwrite the 1 written by
-			// `ip_thread_start` if the latter was called before we
-			// call `ip_cleanup`. This will happen if the IP thread
-			// crashes, in which case we would first call
-			// `ip_thread_start` in the error handler, and then
-			// reset the context to `ip_thread_entry` (itself then
-			// calling `ip_cleanup`).
-			threadEntryGuard = 0;
-
-			xIPTaskHandle = networkThreadID;
+			xIPTaskHandle = thread_id_get();
 			FreeRTOS_printf(
 			  ("ip_thread_entry starting, thread ID is %p\n", xIPTaskHandle));
 
@@ -149,6 +120,25 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 			// FreeRTOS event loop. This will only return if a user
 			// thread crashed.
 			prvIPTask(NULL);
+
+			// Reset the guard. We only want to do this in the case
+			// of a user thread crash, and we will only ever reach
+			// this line if a user thread crashes (otherwise we'd
+			// end up in the error handler).
+			//
+			// This is guaranteed not to race with
+			// `ip_thread_start` since that function will only get
+			// called after we release the IP thread lock at the
+			// bottom of this function.
+			//
+			// The reason why we only reset this in the case of a
+			// user thread crash is that the synchronization is
+			// trivial in the case of a network thread crash: we
+			// know that we will only enter the next loop iteration
+			// after the guard has been set to 1 by
+			// `ip_thread_start`, since the error handler only
+			// returns once `network_restart` has returned.
+			threadEntryGuard = 0;
 		}
 		CHERIOT_HANDLER
 		{

diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc
@@ -58,6 +58,10 @@ extern "C" int ipFOREVER(void)
  * previous instance of the network stack.
  *
  * This should not be reset by the error handler and is reset-critical.
+ *
+ * Note, however, that only one line in the code ever writes to this variable:
+ * the error handler. Assuming that the control-flow is not compromised (threat
+ * model of the reset), this should be impossible to corrupt.
  */
 std::atomic<uint32_t> currentSocketEpoch = 0;
 
@@ -131,6 +135,17 @@ namespace
 		  -EAGAIN /* return -EAGAIN if we are restarting */);
 	}
 
+	/**
+	 * Length of a locking step for socket locks. When we attempt to grab a
+	 * socket lock, do so in steps of `SocketLockTimeoutStep`. That way, if
+	 * the network stack crashes and we somehow do not have a pointer to
+	 * the socket lock anymore (and thus cannot reset it), we have a
+	 * guarantee that the blocking thread will become live again within a
+	 * bounded amount of time, and bail out as it finds out, through the
+	 * condition function, that we are currently restarting.
+	 */
+	static constexpr const Ticks SocketLockTimeoutStep = MS_TO_TICKS(500);
+
 	/**
 	 * Wrapper around `with_sealed_socket` that locks the socket before calling
 	 * the operation.  This is used by everything except the close operation
@@ -143,7 +158,24 @@ namespace
 	{
 		return with_sealed_socket(
 		  [&](SealedSocket *socket) {
-			  if (LockGuard g{socket->socketLock, timeout})
+			  if (LockGuard g{
+			        socket->socketLock, timeout, SocketLockTimeoutStep, [&]() {
+					// Return true when we need to bail
+					// out, in two cases:
+					// - we are currently restarting
+					// - the socket is from a previous
+					//   instance of the network stack
+				        //
+					// We must check the latter: if the
+					// socket list was corrupted, we may
+					// not have been able to set this lock
+					// for destruction, and it may still be
+					// left in a "locked" state by a thread
+					// that previously crashed while
+					// holding the lock.
+				        return restartState != 0 || (socket->socketEpoch !=
+				                                     currentSocketEpoch.load());
+			        }})
 			  {
 				  return operation(socket);
 			  }
@@ -773,14 +805,22 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket)
 	}
 	return with_sealed_socket<true /* this is a close operation */>(
 	  [=](SealedSocket *socket) {
+		  bool socketIsFromPreviousInstance =
+		    (socket->socketEpoch != currentSocketEpoch.load());
 		  // We will fail to lock if the socket is coming from
 		  // a previous instance of the network stack as it set
 		  // for destruction. Ignore the failure: we will not
 		  // call the FreeRTOS API on it anyways.
-		  LockGuard g{socket->socketLock, t};
-		  if (g || (socket->socketEpoch != currentSocketEpoch.load()))
+		  LockGuard g{socket->socketLock, t, SocketLockTimeoutStep, [&]() {
+			              // Return true when we need to bail out,
+			              // in two cases (see comment in
+			              // `with_sealed_socket`).
+			              return restartState != 0 ||
+			                     socketIsFromPreviousInstance;
+		              }};
+		  if (g || socketIsFromPreviousInstance)
 		  {
-			  if (socket->socketEpoch != currentSocketEpoch.load())
+			  if (socketIsFromPreviousInstance)
 			  {
 				  Debug::log(
 				    "Destroying a socket from a previous instance of the "
@@ -809,7 +849,7 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket)
 			  // stack (the socket is invalid anyways). Don't close
 			  // the firewall either as this was already done
 			  // during the reset.
-			  if (socket->socketEpoch == currentSocketEpoch.load())
+			  if (!socketIsFromPreviousInstance)
 			  {
 				  auto rawSocket = socket->socket;
 
@@ -885,7 +925,7 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket)
 				  }
 			  }
 			  int ret = 0;
-			  if (socket->socketEpoch == currentSocketEpoch.load())
+			  if (!socketIsFromPreviousInstance)
 			  {
 				  Debug::Assert(!ds::linked_list::is_singleton(&(socket->ring)),
 				                "The socket should be present in the list.");

diff --git a/lib/tcpip/tcpip-internal.h b/lib/tcpip/tcpip-internal.h
@@ -70,8 +70,6 @@ struct SealedSocket
  *
  * This is used as part of the network stack reset to clean up sockets and
  * unblock threads waiting on message queues.
- *
- * This will be reset by the error handler, however it *is* reset-critical.
  */
 extern ds::linked_list::Sentinel<SocketRingLink> sealedSockets;
 
@@ -95,6 +93,13 @@ extern std::atomic<uint8_t> restartState;
  * performing a network stack reset.
  *
  * This should not be reset by the error handler and is reset-critical.
+ *
+ * Note however that the only code that ever writes to this variable is 1)
+ * `with_restarting_checks` and `with_restarting_checks_driver` below, and 2)
+ * the error handler. `with_restarting_checks` and
+ * `with_restarting_checks_driver` can only be executed when entering the
+ * compartment. Assuming the control-flow is not compromised (threat model of
+ * the reset), this should be impossible to corrupt.
  */
 extern std::atomic<uint8_t> userThreadCount;
 

diff --git a/lib/tcpip/tcpip_error_handler.h b/lib/tcpip/tcpip_error_handler.h
@@ -39,6 +39,7 @@ extern struct RecursiveMutexState __CriticalSectionFlagLock;
 extern struct RecursiveMutexState __SuspendFlagLock;
 extern QueueHandle_t              xNetworkEventQueue;
 extern FlagLockPriorityInherited  sealedSocketsListLock;
+extern uint32_t                   threadEntryGuard;
 
 /**
  * Restart the network stack. See documentation in `startup.cc`
@@ -141,6 +142,22 @@ extern "C" void reset_network_stack_state(bool isIpThread)
 			DebugErrorHandler::log("The network thread crashed while "
 			                       "restarting. This may be unrecoverable.");
 		}
+		else if (isIpThread)
+		{
+			// Reset the IP thread entry guard. We only want to do
+			// this in the case of a user thread crash, and we will
+			// only ever reach this line if a user thread crashed
+			// first (causing the network thread to crash).
+			//
+			// This is guaranteed not to race with
+			// `ip_thread_start` since that function will only get
+			// called after we return.
+			//
+			// For more information, look at the other place where
+			// we reset `threadEntryGuard` in
+			// `FreeRTOS_IP_wrapper.c`.
+			threadEntryGuard = 0;
+		}
 
 		// Another instance of the error handler is running, do not do
 		// anything.
@@ -173,45 +190,63 @@ extern "C" void reset_network_stack_state(bool isIpThread)
 	// ensure that threads waiting on them exit the compartment. We will
 	// empty the list later.
 	//
-	// FIXME This should be made more resilient against corruption of the
-	// linked list by checking all pointers.
+	// This is designed to be resilient to a corrupted socket list. If the
+	// socket list somehow ended up corrupted, we would still manage to
+	// reset, albeit slower. In the case of socket locks, we would need to
+	// wait up to 500ms for waiters to wake up, check the network stack
+	// state machine, and bail out.  For event groups, we would need to
+	// wait 500ms too, until waiters try to re-acquire the lock and crash
+	// because it was freed through the heap-free-all below.
 	DebugErrorHandler::log(
 	  "Setting socket locks for destruction and destroying event groups.");
 	if (!sealedSockets.is_empty())
 	{
 		auto *cell = sealedSockets.first();
 		while (cell != &sealedSockets.sentinel)
 		{
-			struct SealedSocket *socket = SealedSocket::from_ring(cell);
-
-			FlagLockPriorityInherited *lock = &(socket->socketLock);
-			if (Capability{lock}.is_valid())
+			if (!Capability{cell}.is_valid())
 			{
-				DebugErrorHandler::log("Destroying socket lock {}.", lock);
-				lock->upgrade_for_destruction();
-			}
-			else
-			{
-				DebugErrorHandler::log("Ignoring corrupted socket lock {}.",
-				                       lock);
+				DebugErrorHandler::log(
+				  "Ignoring corrupted cell {} in the socket list.", cell);
+				// We have to stop here because we cannot
+				// retrieve the next cell from a corrupted
+				// cell.
+				break;
 			}
 
-			FreeRTOS_Socket_t *s = socket->socket;
-			if (Capability{s}.is_valid() &&
-			    Capability{s->xEventGroup}.is_valid())
+			struct SealedSocket *socket = SealedSocket::from_ring(cell);
+
+			if (Capability{socket}.is_valid())
 			{
-				DebugErrorHandler::log("Destroying event group {}.",
-				                       s->xEventGroup);
-				eventgroup_destroy_force(MALLOC_CAPABILITY, s->xEventGroup);
+				FlagLockPriorityInherited *lock = &(socket->socketLock);
+				if (Capability{lock}.is_valid())
+				{
+					DebugErrorHandler::log("Destroying socket lock {}.", lock);
+					lock->upgrade_for_destruction();
+				}
+				else
+				{
+					DebugErrorHandler::log("Ignoring corrupted socket lock {}.",
+					                       lock);
+				}
+
+				FreeRTOS_Socket_t *s = socket->socket;
+				if (Capability{s}.is_valid() &&
+				    Capability{s->xEventGroup}.is_valid())
+				{
+					DebugErrorHandler::log("Destroying event group {}.",
+					                       s->xEventGroup);
+					eventgroup_destroy_force(MALLOC_CAPABILITY, s->xEventGroup);
+				}
+				else
+				{
+					DebugErrorHandler::log("Ignoring corrupted socket {}.", s);
+				}
 			}
 			else
 			{
-				// The memory of the event group will still be
-				// freed later with the `heap_free_all`,
-				// however we run the risk to have the IP
-				// thread stuck on the event queue which we
-				// didn't manage to destroy.
-				DebugErrorHandler::log("Ignoring corrupted socket {}.", s);
+				DebugErrorHandler::log(
+				  "Ignoring corrupted socket {} in the socket list.", socket);
 			}
 
 			cell = cell->cell_next();