From f2940f10fd4d599dbaf56ab0aeb0a458e292b020 Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
Date: Mon, 2 Dec 2024 14:48:43 -0800
Subject: [PATCH 1/6] Make waiting behavior on socket locks use steps.

If a thread somehow corrupts the socket list, we will lose the ability
to retrieve references to socket locks, ultimately preventing us from
unblocking threads blocked on them.

To handle that situation, ensure that threads block on the socket lock
in steps, checking the network stack reset state at every step through
`LockGuard` "conditions". If a network stack reset is detected in this
way, threads will bail out.

Note that the "condition" lambda is necessary here, since socket locks
are allocated on a caller capability which we cannot heap-free-all. (see
recent additions to the `LockGuard` class).

Signed-off-by: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
---
 lib/tcpip/network_wrapper.cc | 48 +++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 6 deletions(-)
diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc
index a42f189..8e509b4 100644
--- a/lib/tcpip/network_wrapper.cc
+++ b/lib/tcpip/network_wrapper.cc
@@ -131,6 +131,17 @@ namespace
 		  -EAGAIN /* return -EAGAIN if we are restarting */);
 	}
 
+	/**
+	 * Length of a locking step for socket locks. When we attempt to grab a
+	 * socket lock, do so in steps of `SocketLockTimeoutStep`. That way, if
+	 * the network stack crashes and we somehow do not have a pointer to
+	 * the socket lock anymore (and thus cannot reset it), we have a
+	 * guarantee that the blocking thread will become live again within a
+	 * bounded amount of time, and bail out as it finds out, through the
+	 * condition function, that we are currently restarting.
+	 */
+	static constexpr const Ticks SocketLockTimeoutStep = MS_TO_TICKS(500);
+
 	/**
 	 * Wrapper around `with_sealed_socket` that locks the socket before calling
 	 * the operation.  This is used by everything except the close operation
@@ -143,7 +154,24 @@ namespace
 	{
 		return with_sealed_socket(
 		  [&](SealedSocket *socket) {
-			  if (LockGuard g{socket->socketLock, timeout})
+			  if (LockGuard g{
+			        socket->socketLock, timeout, SocketLockTimeoutStep, [&]() {
+					// Return true when we need to bail
+					// out, in two cases:
+					// - we are currently restarting
+					// - the socket is from a previous
+					//   instance of the network stack
+				        //
+					// We must check the latter: if the
+					// socket list was corrupted, we may
+					// not have been able to set this lock
+					// for destruction, and it may still be
+					// left in a "locked" state by a thread
+					// that previously crashed while
+					// holding the lock.
+				        return restartState != 0 || (socket->socketEpoch !=
+				                                     currentSocketEpoch.load());
+			        }})
 			  {
 				  return operation(socket);
 			  }
@@ -773,14 +801,22 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket)
 	}
 	return with_sealed_socket<true /* this is a close operation */>(
 	  [=](SealedSocket *socket) {
+		  bool socketIsFromPreviousInstance =
+		    (socket->socketEpoch != currentSocketEpoch.load());
 		  // We will fail to lock if the socket is coming from
 		  // a previous instance of the network stack as it set
 		  // for destruction. Ignore the failure: we will not
 		  // call the FreeRTOS API on it anyways.
-		  LockGuard g{socket->socketLock, t};
-		  if (g || (socket->socketEpoch != currentSocketEpoch.load()))
+		  LockGuard g{socket->socketLock, t, SocketLockTimeoutStep, [&]() {
+			              // Return true when we need to bail out,
+			              // in two cases (see comment in
+			              // `with_sealed_socket`).
+			              return restartState != 0 ||
+			                     socketIsFromPreviousInstance;
+		              }};
+		  if (g || socketIsFromPreviousInstance)
 		  {
-			  if (socket->socketEpoch != currentSocketEpoch.load())
+			  if (socketIsFromPreviousInstance)
 			  {
 				  Debug::log(
 				    "Destroying a socket from a previous instance of the "
@@ -809,7 +845,7 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket)
 			  // stack (the socket is invalid anyways). Don't close
 			  // the firewall either as this was already done
 			  // during the reset.
-			  if (socket->socketEpoch == currentSocketEpoch.load())
+			  if (!socketIsFromPreviousInstance)
 			  {
 				  auto rawSocket = socket->socket;
 
@@ -885,7 +921,7 @@ int network_socket_close(Timeout *t, SObj mallocCapability, SObj sealedSocket)
 				  }
 			  }
 			  int ret = 0;
-			  if (socket->socketEpoch == currentSocketEpoch.load())
+			  if (!socketIsFromPreviousInstance)
 			  {
 				  Debug::Assert(!ds::linked_list::is_singleton(&(socket->ring)),
 				                "The socket should be present in the list.");

From 11a261ac99d8ec1f15f9d30b05ec6224874c2769 Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
Date: Mon, 2 Dec 2024 17:20:58 -0800
Subject: [PATCH 2/6] Harden TCP/IP stack error handler against a corrupted
 socket list.

This commit addresses a number of issues in the reset when socket lists
are corrupted.

Together with the recent support of steps/conditions when waiting on
socket locks and event queues, this removes the socket list from the set
of reset-critical variables.

Signed-off-by: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
---
 lib/tcpip/tcpip-internal.h      |  2 -
 lib/tcpip/tcpip_error_handler.h | 68 +++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/lib/tcpip/tcpip-internal.h b/lib/tcpip/tcpip-internal.h
index f4f11dd..bc5fd9b 100644
--- a/lib/tcpip/tcpip-internal.h
+++ b/lib/tcpip/tcpip-internal.h
@@ -70,8 +70,6 @@ struct SealedSocket
  *
  * This is used as part of the network stack reset to clean up sockets and
  * unblock threads waiting on message queues.
- *
- * This will be reset by the error handler, however it *is* reset-critical.
  */
 extern ds::linked_list::Sentinel<SocketRingLink> sealedSockets;
 
diff --git a/lib/tcpip/tcpip_error_handler.h b/lib/tcpip/tcpip_error_handler.h
index c3a3fe1..5ccaff0 100644
--- a/lib/tcpip/tcpip_error_handler.h
+++ b/lib/tcpip/tcpip_error_handler.h
@@ -173,8 +173,13 @@ extern "C" void reset_network_stack_state(bool isIpThread)
 	// ensure that threads waiting on them exit the compartment. We will
 	// empty the list later.
 	//
-	// FIXME This should be made more resilient against corruption of the
-	// linked list by checking all pointers.
+	// This is designed to be resilient to a corrupted socket list. If the
+	// socket list somehow ended up corrupted, we would still manage to
+	// reset, albeit slower. In the case of socket locks, we would need to
+	// wait up to 500ms for waiters to wake up, check the network stack
+	// state machine, and bail out.  For event groups, we would need to
+	// wait 500ms too, until waiters try to re-acquire the lock and crash
+	// because it was freed through the heap-free-all below.
 	DebugErrorHandler::log(
 	  "Setting socket locks for destruction and destroying event groups.");
 	if (!sealedSockets.is_empty())
@@ -182,36 +187,49 @@ extern "C" void reset_network_stack_state(bool isIpThread)
 		auto *cell = sealedSockets.first();
 		while (cell != &sealedSockets.sentinel)
 		{
-			struct SealedSocket *socket = SealedSocket::from_ring(cell);
-
-			FlagLockPriorityInherited *lock = &(socket->socketLock);
-			if (Capability{lock}.is_valid())
+			if (!Capability{cell}.is_valid())
 			{
-				DebugErrorHandler::log("Destroying socket lock {}.", lock);
-				lock->upgrade_for_destruction();
-			}
-			else
-			{
-				DebugErrorHandler::log("Ignoring corrupted socket lock {}.",
-				                       lock);
+				DebugErrorHandler::log(
+				  "Ignoring corrupted cell {} in the socket list.", cell);
+				// We have to stop here because we cannot
+				// retrieve the next cell from a corrupted
+				// cell.
+				break;
 			}
 
-			FreeRTOS_Socket_t *s = socket->socket;
-			if (Capability{s}.is_valid() &&
-			    Capability{s->xEventGroup}.is_valid())
+			struct SealedSocket *socket = SealedSocket::from_ring(cell);
+
+			if (Capability{socket}.is_valid())
 			{
-				DebugErrorHandler::log("Destroying event group {}.",
-				                       s->xEventGroup);
-				eventgroup_destroy_force(MALLOC_CAPABILITY, s->xEventGroup);
+				FlagLockPriorityInherited *lock = &(socket->socketLock);
+				if (Capability{lock}.is_valid())
+				{
+					DebugErrorHandler::log("Destroying socket lock {}.", lock);
+					lock->upgrade_for_destruction();
+				}
+				else
+				{
+					DebugErrorHandler::log("Ignoring corrupted socket lock {}.",
+					                       lock);
+				}
+
+				FreeRTOS_Socket_t *s = socket->socket;
+				if (Capability{s}.is_valid() &&
+				    Capability{s->xEventGroup}.is_valid())
+				{
+					DebugErrorHandler::log("Destroying event group {}.",
+					                       s->xEventGroup);
+					eventgroup_destroy_force(MALLOC_CAPABILITY, s->xEventGroup);
+				}
+				else
+				{
+					DebugErrorHandler::log("Ignoring corrupted socket {}.", s);
+				}
 			}
 			else
 			{
-				// The memory of the event group will still be
-				// freed later with the `heap_free_all`,
-				// however we run the risk to have the IP
-				// thread stuck on the event queue which we
-				// didn't manage to destroy.
-				DebugErrorHandler::log("Ignoring corrupted socket {}.", s);
+				DebugErrorHandler::log(
+				  "Ignoring corrupted socket {} in the socket list.", socket);
 			}
 
 			cell = cell->cell_next();

From 0d07c7d70ad13f4bcd239e89848776cfc5afc54f Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
Date: Mon, 2 Dec 2024 17:55:37 -0800
Subject: [PATCH 3/6] Document exploitability of reset-critical variables.

Although `currentSocketEpoch` and `userThreadCount` are both reset
critical, they should be impossible to corrupt by construction, unless
control-flow or spatial memory safety is compromised. Document that.

Signed-off-by: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
---
 lib/tcpip/network_wrapper.cc | 4 ++++
 lib/tcpip/tcpip-internal.h   | 7 +++++++
 2 files changed, 11 insertions(+)

diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc
index 8e509b4..d3bdf04 100644
--- a/lib/tcpip/network_wrapper.cc
+++ b/lib/tcpip/network_wrapper.cc
@@ -58,6 +58,10 @@ extern "C" int ipFOREVER(void)
  * previous instance of the network stack.
  *
  * This should not be reset by the error handler and is reset-critical.
+ *
+ * Note, however, that only one line in the code ever writes to this variable:
+ * the error handler. Assuming that the control-flow is not compromised (threat
+ * model of the reset), this should be impossible to corrupt.
  */
 std::atomic<uint32_t> currentSocketEpoch = 0;
 
diff --git a/lib/tcpip/tcpip-internal.h b/lib/tcpip/tcpip-internal.h
index bc5fd9b..6f7e310 100644
--- a/lib/tcpip/tcpip-internal.h
+++ b/lib/tcpip/tcpip-internal.h
@@ -93,6 +93,13 @@ extern std::atomic<uint8_t> restartState;
  * performing a network stack reset.
  *
  * This should not be reset by the error handler and is reset-critical.
+ *
+ * Note however that the only code that ever writes to this variable is 1)
+ * `with_restarting_checks` and `with_restarting_checks_driver` below, and 2)
+ * the error handler. `with_restarting_checks` and
+ * `with_restarting_checks_driver` can only be executed when entering the
+ * compartment. Assuming the control-flow is not compromised (threat model of
+ * the reset), this should be impossible to corrupt.
  */
 extern std::atomic<uint8_t> userThreadCount;
 

From 8d4f64f4a52029b83926e7593f4231e04b24d63e Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
Date: Wed, 4 Dec 2024 11:32:52 -0800
Subject: [PATCH 4/6] Make `threadEntryGuard` not reset critical.

By moving the reset of `threadEntryGuard` at a different place in the
execution flow, we can remove it from the set of reset variables.

The idea here is to only reset `threadEntryGuard` in the case of a
crash, and only if the crash was triggered by a user thread, since
resetting is not needed in the case of a network thread crash (due to
deterministic execution flow, see comment in the code).

Signed-off-by: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
---
 lib/tcpip/FreeRTOS_IP_wrapper.c | 43 +++++++++++++++------------------
 lib/tcpip/tcpip_error_handler.h | 17 +++++++++++++
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/lib/tcpip/FreeRTOS_IP_wrapper.c b/lib/tcpip/FreeRTOS_IP_wrapper.c
index 5242dc2..ab4a1cd 100644
--- a/lib/tcpip/FreeRTOS_IP_wrapper.c
+++ b/lib/tcpip/FreeRTOS_IP_wrapper.c
@@ -21,15 +21,8 @@ void ip_thread_start(void);
 /**
  * Flag used to synchronize the network stack thread and user threads at
  * startup.
- *
- * This should not be reset by the error handler and is reset-critical.
- *
- * Note (which also applies to other reset-critical data): ultimately we should
- * move this to a separate "network stack TCB" compartment to be able to reset
- * all the state of this compartment unconditionally by re-zeroing the BSS and
- * resetting .data from snapshots.
  */
-static uint32_t threadEntryGuard;
+uint32_t threadEntryGuard;
 
 /**
  * Store the thread ID of the TCP/IP thread for use in the error handler.
@@ -125,21 +118,6 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 				futex_wait(&threadEntryGuard, 0);
 			}
 
-			// Reset the guard now: we will only ever re-iterate
-			// the loop in the case of a network stack reset, in
-			// which case we want to wait again for a call to
-			// `ip_thread_start`.
-			//
-			// Note that we cannot reset this in `ip_cleanup`,
-			// because that would overwrite the 1 written by
-			// `ip_thread_start` if the latter was called before we
-			// call `ip_cleanup`. This will happen if the IP thread
-			// crashes, in which case we would first call
-			// `ip_thread_start` in the error handler, and then
-			// reset the context to `ip_thread_entry` (itself then
-			// calling `ip_cleanup`).
-			threadEntryGuard = 0;
-
 			xIPTaskHandle = networkThreadID;
 			FreeRTOS_printf(
 			  ("ip_thread_entry starting, thread ID is %p\n", xIPTaskHandle));
@@ -149,6 +127,25 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 			// FreeRTOS event loop. This will only return if a user
 			// thread crashed.
 			prvIPTask(NULL);
+
+			// Reset the guard. We only want to do this in the case
+			// of a user thread crash, and we will only ever reach
+			// this line if a user thread crashes (otherwise we'd
+			// end up in the error handler).
+			//
+			// This is guaranteed not to race with
+			// `ip_thread_start` since that function will only get
+			// called after we release the IP thread lock at the
+			// bottom of this function.
+			//
+			// The reason why we only reset this in the case of a
+			// user thread crash is that the synchronization is
+			// trivial in the case of a network thread crash: we
+			// know that we will only enter the next loop iteration
+			// after the guard has been set to 1 by
+			// `ip_thread_start`, since the error handler only
+			// returns once `network_restart` has returned.
+			threadEntryGuard = 0;
 		}
 		CHERIOT_HANDLER
 		{
diff --git a/lib/tcpip/tcpip_error_handler.h b/lib/tcpip/tcpip_error_handler.h
index 5ccaff0..3f14275 100644
--- a/lib/tcpip/tcpip_error_handler.h
+++ b/lib/tcpip/tcpip_error_handler.h
@@ -39,6 +39,7 @@ extern struct RecursiveMutexState __CriticalSectionFlagLock;
 extern struct RecursiveMutexState __SuspendFlagLock;
 extern QueueHandle_t              xNetworkEventQueue;
 extern FlagLockPriorityInherited  sealedSocketsListLock;
+extern uint32_t                   threadEntryGuard;
 
 /**
  * Restart the network stack. See documentation in `startup.cc`
@@ -141,6 +142,22 @@ extern "C" void reset_network_stack_state(bool isIpThread)
 			DebugErrorHandler::log("The network thread crashed while "
 			                       "restarting. This may be unrecoverable.");
 		}
+		else if (isIpThread)
+		{
+			// Reset the IP thread entry guard. We only want to do
+			// this in the case of a user thread crash, and we will
+			// only ever reach this line if a user thread crashed
+			// first (causing the network thread to crash).
+			//
+			// This is guaranteed not to race with
+			// `ip_thread_start` since that function will only get
+			// called after we return.
+			//
+			// For more information, look at the other place where
+			// we reset `threadEntryGuard` in
+			// `FreeRTOS_IP_wrapper.c`.
+			threadEntryGuard = 0;
+		}
 
 		// Another instance of the error handler is running, do not do
 		// anything.

From 0941483aea56572fe18aab177eb2b0ec2e05a6a9 Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
Date: Wed, 4 Dec 2024 11:39:59 -0800
Subject: [PATCH 5/6] Remove `networkThreadID`.

This is not used anymore, we stopped using it with the
stack-overflow-resilient handler.

Signed-off-by: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
---
 lib/tcpip/FreeRTOS_IP_wrapper.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/lib/tcpip/FreeRTOS_IP_wrapper.c b/lib/tcpip/FreeRTOS_IP_wrapper.c
index ab4a1cd..0e29492 100644
--- a/lib/tcpip/FreeRTOS_IP_wrapper.c
+++ b/lib/tcpip/FreeRTOS_IP_wrapper.c
@@ -24,11 +24,6 @@ void ip_thread_start(void);
  */
 uint32_t threadEntryGuard;
 
-/**
- * Store the thread ID of the TCP/IP thread for use in the error handler.
- */
-uint16_t networkThreadID;
-
 /**
  * Global lock acquired by the IP thread at startup time. In normal running
  * conditions, this lock is never released and acquiring it after startup will
@@ -105,8 +100,6 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 {
 	FreeRTOS_printf(("ip_thread_entry\n"));
 
-	networkThreadID = thread_id_get();
-
 	while (1)
 	{
 		CHERIOT_DURING
@@ -118,7 +111,7 @@ void __cheri_compartment("TCPIP") ip_thread_entry(void)
 				futex_wait(&threadEntryGuard, 0);
 			}
 
-			xIPTaskHandle = networkThreadID;
+			xIPTaskHandle = thread_id_get();
 			FreeRTOS_printf(
 			  ("ip_thread_entry starting, thread ID is %p\n", xIPTaskHandle));
 

From b02a101eabcb9d497e1b406c9fa3572c379ef4c5 Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
Date: Thu, 5 Dec 2024 16:26:30 -0800
Subject: [PATCH 6/6] Fix HTTP server example when closing listening socket
 fails.

Currently the example ignores the failure and tries to re-open the
listening socket, running into an infinite loop. This is not meaningful,
if we cannot close the listening socket we better stop since we will
never be able to re-bind onto the server port anymore.

Signed-off-by: Hugo Lefeuvre <hugo.lefeuvre@ubc.ca>
---
 examples/05.HTTP_SERVER/http_server.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/05.HTTP_SERVER/http_server.cc b/examples/05.HTTP_SERVER/http_server.cc
index 4a084e2..35af552 100644
--- a/examples/05.HTTP_SERVER/http_server.cc
+++ b/examples/05.HTTP_SERVER/http_server.cc
@@ -196,6 +196,11 @@ void __cheri_compartment("http_server_example") example()
 		if (retries == 0)
 		{
 			Debug::log("Failed to close the listening socket.");
+
+			// We have to stop now - if we cannot close the
+			// listening socket, we will not be able to bind onto
+			// the server port anymore. This is bad!
+			break;
 		}
 	}