From bd6e40deeff34d39c1a05809a23ea9399a15e717 Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Mon, 24 Jun 2024 14:32:02 +0200 Subject: [PATCH 1/2] Expose FreeRTOS+TCP checksum APIs to the firewall. Signed-off-by: Hugo Lefeuvre (cherry picked from commit af215ef07c09cf23b84e1a9695739ae5b15682a7) --- lib/tcpip/checksum-internal.h | 50 +++++++++++++++++++++++++++++++++++ lib/tcpip/network_wrapper.cc | 35 ++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 lib/tcpip/checksum-internal.h diff --git a/lib/tcpip/checksum-internal.h b/lib/tcpip/checksum-internal.h new file mode 100644 index 0000000..c09c0bc --- /dev/null +++ b/lib/tcpip/checksum-internal.h @@ -0,0 +1,50 @@ +// Copyright SCI Semiconductor and CHERIoT Contributors. +// SPDX-License-Identifier: MIT + +#pragma once +#include +#include +#include +/** + * Internal APIs that the TCP/IP compartment exposes for the firewall to use. + * + * These should be called only from the firewall compartment (validated by + * checking the compartment linkage report). These APIs trust the caller and + * do *not* check arguments. + */ + +/** + * Returns the IPv4 checksum for passed packet. + * + * This function is stateless and can be called at any point of the lifetime of + * the TCP/IP stack. + * + * The IPv4 header (only) should be passed in `ipv4Header`, along with its + * length in `headerLength`. + * + * The returned checksum is in network byte order and can be used as-is for + * transmission in the IPv4 header. + */ +uint16_t __cheri_compartment("TCPIP") + network_calculate_ipv4_checksum(const uint8_t *ipv4Header, + size_t headerLength); + +/** + * Returns the TCP checksum for passed packet. + * + * This function is stateless and can be called at any point of the lifetime of + * the TCP/IP stack. + * + * Unlike `network_calculate_ipv4_checksum`, this takes the entire Ethernet + * frame into `frame`, along with its length in `frameLength`. + * + * The offset of the TCP checksum in the `frame` buffer must be passed in + * `tcpChecksumOffset`. + * + * The returned checksum is in network byte order and can be used as-is for + * transmission in the TCP header. + */ +uint16_t __cheri_compartment("TCPIP") + network_calculate_tcp_checksum(const uint8_t *frame, + size_t frameLength, + size_t tcpChecksumOffset); diff --git a/lib/tcpip/network_wrapper.cc b/lib/tcpip/network_wrapper.cc index 2fb134c..122d956 100644 --- a/lib/tcpip/network_wrapper.cc +++ b/lib/tcpip/network_wrapper.cc @@ -7,6 +7,7 @@ #include #include "../firewall/firewall.hh" +#include "checksum-internal.h" #include "network-internal.h" #include "tcpip-internal.h" @@ -472,6 +473,40 @@ namespace } } // namespace +uint16_t network_calculate_ipv4_checksum(const uint8_t *ipv4Header, + size_t headerLength) +{ + // Note: no restarting checks because stateless. + + // `usGenerateChecksum` returns a checksum which is 1) not negated (we + // must bitwise NOT it) and 2) in host byte order. + return htons(~usGenerateChecksum( + 0 /* generate checksum for the full packet */, ipv4Header, headerLength)); +} + +uint16_t network_calculate_tcp_checksum(const uint8_t *frame, + size_t frameLength, + size_t tcpChecksumOffset) +{ + // Note: no restarting checks because stateless. + + // `usGenerateProtocolChecksum` *writes* the checksum into the packet + // buffer. It is not possible to have it return the checksum instead of + // modifying the buffer. This is really annoying because we only have a + // read-only capability to the frame, as we want to *return* the + // checksum. + // + // To workaround this, make a local copy of the Ethernet frame and + // extract the computed checksum from it using `tcpChecksumOffset`. + // Putting this copy on the stack should be fine since this is only + // used in RST packets which have no payload. + uint8_t copy[frameLength]; + memcpy(copy, frame, frameLength); + uint16_t *checksum = reinterpret_cast(©[tcpChecksumOffset]); + usGenerateProtocolChecksum(copy, frameLength, true /* write checksum */); + return *checksum; +} + int network_host_resolve(const char *hostname, bool useIPv6, NetworkAddress *address) From 1e43540c8a4ce483653179bb558fb8ee7de2cb3f Mon Sep 17 00:00:00 2001 From: Hugo Lefeuvre Date: Mon, 24 Jun 2024 17:28:18 +0200 Subject: [PATCH 2/2] Reply to spurious TCP packets with a RST. Spurious TCP packets appear when the system is force-restarting due to an error. In such a case, open TCP connections are not properly teared down, and the remote TCP endpoint sends retransmissions to the newly restarted system. These spurious TCP packets are problematic because they fill packet buffers and quickly cause packet drops. This commit enables the firewall to reply to spurious TCP packets with TCP RST to stop the flow of spurious packets. We do this with a TCP RST "packet template" which we pre-set at startup and complete with MAC address, IP address, port, sequence number, and checksum, when sending the RST. To do this, we need to: - add needed 32-bit `ntohs` and `htons` - add a representation of a TCP header in the firewall compartment Signed-off-by: Hugo Lefeuvre (cherry picked from commit ec5400607d5d681c560c66e3ca2bdb725c7fb742) --- lib/firewall/firewall.cc | 244 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 230 insertions(+), 14 deletions(-) diff --git a/lib/firewall/firewall.cc b/lib/firewall/firewall.cc index 5a833f3..f14480c 100644 --- a/lib/firewall/firewall.cc +++ b/lib/firewall/firewall.cc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT #include "firewall.hh" +#include "../tcpip/checksum-internal.h" #include #include #include @@ -32,6 +33,26 @@ namespace __builtin_bswap16(value) #else value +#endif + ; + } + uint32_t constexpr ntohs(uint32_t value) + { + return +#ifdef __LITTLE_ENDIAN__ + __builtin_bswap32(value) +#else + value +#endif + ; + } + uint32_t constexpr htons(uint32_t value) + { + return +#ifdef __LITTLE_ENDIAN__ + __builtin_bswap32(value) +#else + value #endif ; } @@ -172,13 +193,57 @@ namespace } } __packed; + static_assert(sizeof(IPv4Header) == 20); + struct TCPUDPCommonPrefix { uint16_t sourcePort; uint16_t destinationPort; } __packed; - static_assert(sizeof(IPv4Header) == 20); + struct TCPHeader + { + /** + * Source port. + */ + uint16_t sourcePort; + /** + * Destination port. + */ + uint16_t destinationPort; + /** + * Sequence number. + */ + uint32_t sequenceNumber; + /** + * Acknowledgement number. + */ + uint32_t acknowledgementNumber; + /** + * Reserved bits, data offset, and flags. + */ + uint16_t reserved : 4, dataOffset : 4, fin : 1, syn : 1, rst : 1, + psh : 1, ack : 1, urg : 1, ece : 1, cwr : 1; + /** + * Window size. + */ + uint16_t windowSize; + /** + * Checksum. + */ + uint16_t checksum; + /** + * Urgent pointer. + */ + uint16_t urgentPointer; + } __packed; + + struct FullPacket + { + EthernetHeader ethernet; + IPv4Header ipv4; + TCPHeader tcp; + } __packed; /** * Simple firewall table for IPv4 endpoints. @@ -320,6 +385,10 @@ namespace }); if (!found) { + // Note that a failure to remove the endpoint + // is not always a bug. This is meant to happen + // if the DNS resolution failed when binding a + // socket. Debug::log("Failed to remove endpoint (local: {})", localPort); } } @@ -361,6 +430,34 @@ namespace uint32_t dnsServerAddress; _Atomic(uint32_t) dnsIsPermitted; + /** + * This buffer will be pre-set into a TCP RST packet template during + * the initialization of the firewall. When the firewall needs to send + * a TCP RST, this template is updated with matching MACs, addresses, + * ports, sequence number, and checksums, and sent. + */ + static struct FullPacket rstPacketTemplate = {0}; + + /** + * Pre-set the RST packet template. + */ + void init_rst_template() + { + rstPacketTemplate.ethernet.etherType = EtherType::IPv4; + // 5 x 32 bit = 20 bytes + rstPacketTemplate.ipv4.versionAndHeaderLength = (4 << 4) | 5; + // The RST packet does not have a payload. + rstPacketTemplate.ipv4.packetLength = + ntohs(static_cast(sizeof(IPv4Header) + sizeof(TCPHeader))); + // Default TTL as recommended by RFC 1700. + rstPacketTemplate.ipv4.timeToLive = 64; + rstPacketTemplate.ipv4.protocol = IPProtocolNumber::TCP; + // 5 * 32 bit = 20 bytes (again) + rstPacketTemplate.tcp.dataOffset = 5; + // Enable TCP RST flag. + rstPacketTemplate.tcp.rst = 1; + } + bool packet_filter_ipv4(const uint8_t *data, size_t length, uint32_t(IPv4Header::*remoteAddress), @@ -502,16 +599,118 @@ namespace return true; } + /** + * If passed packet is an IPv4 TCP packet, reply with a RST to the + * sender. This takes the whole ethernet frame into `data` (and the + * size of the buffer in `length`). + */ + void try_reset_ipv4_tcp(const uint8_t *data, size_t length) + { + if (__predict_false(length < + sizeof(EthernetHeader) + sizeof(IPv4Header))) + { + Debug::log("Ignoring inbound packet with length {}", length); + return; + } + + EthernetHeader *ethernetHeader = + reinterpret_cast(const_cast(data)); + auto *ipv4Header = + reinterpret_cast(data + sizeof(EthernetHeader)); + if (ipv4Header->protocol == IPProtocolNumber::TCP) + { + if (ipv4Header->body_offset() < sizeof(ipv4Header)) + { + Debug::log("Body offset is {} but IPv4 header is {} bytes", + ipv4Header->body_offset(), + sizeof(ipv4Header)); + return; + } + if (ipv4Header->body_offset() + sizeof(TCPHeader) > length) + { + Debug::log("Ignoring inbound packet with length {}", length); + return; + } + const TCPHeader *tcpHeader = reinterpret_cast( + data + sizeof(EthernetHeader) + ipv4Header->body_offset()); + + // Do not send a RST if the received TCP packet is + // itself a RST. + if (tcpHeader->rst == 1) + { + Debug::log("Ignoring inbound TCP RST packet."); + return; + } + + // Create a read-only capability to pass to the TCP/IP + // stack when we calculate checksums. + CHERI::Capability rstPacketTemplateROCap{ + reinterpret_cast(&rstPacketTemplate)}; + // Remove all permissions except load. This also + // removes global, so that this cannot be captured. + rstPacketTemplateROCap.permissions() &= + CHERI::PermissionSet{CHERI::Permission::Load}; + + /// Build the RST packet. + // Source and destination MACs. + std::copy(std::begin(ethernetHeader->source), + std::end(ethernetHeader->source), + std::begin(rstPacketTemplate.ethernet.destination)); + std::copy(std::begin(ethernetHeader->destination), + std::end(ethernetHeader->destination), + std::begin(rstPacketTemplate.ethernet.source)); + // Source and destination IPs. + rstPacketTemplate.ipv4.sourceAddress = + ipv4Header->destinationAddress; + rstPacketTemplate.ipv4.destinationAddress = + ipv4Header->sourceAddress; + // IPv4 checksum. The value returned is in network byte + // order. Make sure to reset the checksum field's value + // before calculation. + rstPacketTemplate.ipv4.headerChecksum = 0; + rstPacketTemplate.ipv4.headerChecksum = + network_calculate_ipv4_checksum(rstPacketTemplateROCap + + sizeof(EthernetHeader), + sizeof(IPv4Header)); + // Source and destination ports. + rstPacketTemplate.tcp.sourcePort = tcpHeader->destinationPort; + rstPacketTemplate.tcp.destinationPort = tcpHeader->sourcePort; + // Set the sequence number to the ack. + rstPacketTemplate.tcp.sequenceNumber = + tcpHeader->acknowledgementNumber; + // TCP checksum. The value returned is in network byte + // order. No need to reset the field here as it isn't + // included in the calculation. + rstPacketTemplate.tcp.checksum = network_calculate_tcp_checksum( + rstPacketTemplateROCap, + sizeof(FullPacket), + sizeof(EthernetHeader) + sizeof(IPv4Header) + 16); + + /// Send the RST packet. + Debug::log("Sending a RST packet."); + LockGuard g{sendLock}; + auto ðernet = lazy_network_interface(); + // Do not go through the firewall: the packet would be + // rejected since the destination is not present in the + // table. + ethernet.send_frame( + rstPacketTemplateROCap, + sizeof(FullPacket), + [](const uint8_t *data, size_t length) { return true; }); + } + } + bool packet_filter_ingress(const uint8_t *data, size_t length) { - uint32_t stateSnapshot = tcpipRestartState->load(); + uint32_t stateSnapshot = tcpipRestartState->load(); + bool isOngoingReset = false; if (stateSnapshot != 0 && ((stateSnapshot & RestartStateDriverKickedBit) == 0)) { // We are in a reset and the driver has not yet been // restarted. Debug::log("Dropping packet due to network stack restart."); - return false; + isOngoingReset = true; } // Not a valid Ethernet frame (64 bytes including four-byte FCS, which @@ -523,26 +722,42 @@ namespace } EthernetHeader *ethernetHeader = reinterpret_cast(const_cast(data)); + bool accept = false; switch (ethernetHeader->etherType) { // For now, testing with v6 disabled. case EtherType::IPv6: - return true; + accept = true; + break; case EtherType::ARP: Debug::log("Saw ARP frame"); - return true; + accept = true; + break; case EtherType::IPv4: - return packet_filter_ipv4(data + sizeof(EthernetHeader), - length - sizeof(EthernetHeader), - &IPv4Header::sourceAddress, - &TCPUDPCommonPrefix::destinationPort, - &TCPUDPCommonPrefix::sourcePort, - false); - default: - return false; + if (!isOngoingReset) + { + accept = + packet_filter_ipv4(data + sizeof(EthernetHeader), + length - sizeof(EthernetHeader), + &IPv4Header::sourceAddress, + &TCPUDPCommonPrefix::destinationPort, + &TCPUDPCommonPrefix::sourcePort, + false); + } + if (!accept) + { + // If this is a TCP packet, send a RST + // to the source. + // + // Only reset for IPv4 for now. Pass + // the whole Ethernet packet (unlike + // `packet_filter_ipv4`). + try_reset_ipv4_tcp(data, length); + } + break; } - return false; + return accept && !isOngoingReset; } std::atomic receivedCounter; @@ -787,6 +1002,7 @@ bool ethernet_driver_start(std::atomic *state) Debug::log("Initialising network interface"); auto ðernet = lazy_network_interface(); ethernet.mac_address_set(); + init_rst_template(); // Poke the barrier and make the driver thread start. barrier = 2; barrier.notify_one();