Skip to content

Commit cce7dd9

Browse files
committed
Reply to spurious TCP packets with a RST.
Spurious TCP packets appear when the system is force-restarting due to an error. In such a case, open TCP connections are not properly teared down, and the remote TCP endpoint sends retransmissions to the newly restarted system. These spurious TCP packets are problematic because they fill packet buffers and quickly cause packet drops. This commit enables the firewall to reply to spurious TCP packets with TCP RST to stop the flow of spurious packets. We do this with a TCP RST "packet template" which we pre-set at startup and complete with MAC address, IP address, port, sequence number, and checksum, when sending the RST. To do this, we need to: - add needed 32-bit `ntohs` and `htons` - add a representation of a TCP header in the firewall compartment Signed-off-by: Hugo Lefeuvre <[email protected]> (cherry picked from commit ec54006)
1 parent 4755619 commit cce7dd9

File tree

1 file changed

+230
-14
lines changed

1 file changed

+230
-14
lines changed

lib/firewall/firewall.cc

Lines changed: 230 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: MIT
33

44
#include "firewall.hh"
5+
#include "../tcpip/checksum-internal.h"
56
#include <atomic>
67
#include <compartment-macros.h>
78
#include <debug.hh>
@@ -32,6 +33,26 @@ namespace
3233
__builtin_bswap16(value)
3334
#else
3435
value
36+
#endif
37+
;
38+
}
39+
uint32_t constexpr ntohs(uint32_t value)
40+
{
41+
return
42+
#ifdef __LITTLE_ENDIAN__
43+
__builtin_bswap32(value)
44+
#else
45+
value
46+
#endif
47+
;
48+
}
49+
uint32_t constexpr htons(uint32_t value)
50+
{
51+
return
52+
#ifdef __LITTLE_ENDIAN__
53+
__builtin_bswap32(value)
54+
#else
55+
value
3556
#endif
3657
;
3758
}
@@ -172,13 +193,57 @@ namespace
172193
}
173194
} __packed;
174195

196+
static_assert(sizeof(IPv4Header) == 20);
197+
175198
struct TCPUDPCommonPrefix
176199
{
177200
uint16_t sourcePort;
178201
uint16_t destinationPort;
179202
} __packed;
180203

181-
static_assert(sizeof(IPv4Header) == 20);
204+
struct TCPHeader
205+
{
206+
/**
207+
* Source port.
208+
*/
209+
uint16_t sourcePort;
210+
/**
211+
* Destination port.
212+
*/
213+
uint16_t destinationPort;
214+
/**
215+
* Sequence number.
216+
*/
217+
uint32_t sequenceNumber;
218+
/**
219+
* Acknowledgement number.
220+
*/
221+
uint32_t acknowledgementNumber;
222+
/**
223+
* Reserved bits, data offset, and flags.
224+
*/
225+
uint16_t reserved : 4, dataOffset : 4, fin : 1, syn : 1, rst : 1,
226+
psh : 1, ack : 1, urg : 1, ece : 1, cwr : 1;
227+
/**
228+
* Window size.
229+
*/
230+
uint16_t windowSize;
231+
/**
232+
* Checksum.
233+
*/
234+
uint16_t checksum;
235+
/**
236+
* Urgent pointer.
237+
*/
238+
uint16_t urgentPointer;
239+
} __packed;
240+
241+
struct FullPacket
242+
{
243+
EthernetHeader ethernet;
244+
IPv4Header ipv4;
245+
TCPHeader tcp;
246+
} __packed;
182247

183248
/**
184249
* Simple firewall table for IPv4 endpoints.
@@ -320,6 +385,10 @@ namespace
320385
});
321386
if (!found)
322387
{
388+
// Note that a failure to remove the endpoint
389+
// is not always a bug. This is meant to happen
390+
// if the DNS resolution failed when binding a
391+
// socket.
323392
Debug::log("Failed to remove endpoint (local: {})", localPort);
324393
}
325394
}
@@ -361,6 +430,34 @@ namespace
361430
uint32_t dnsServerAddress;
362431
_Atomic(uint32_t) dnsIsPermitted;
363432

433+
/**
434+
* This buffer will be pre-set into a TCP RST packet template during
435+
* the initialization of the firewall. When the firewall needs to send
436+
* a TCP RST, this template is updated with matching MACs, addresses,
437+
* ports, sequence number, and checksums, and sent.
438+
*/
439+
static struct FullPacket rstPacketTemplate = {0};
440+
441+
/**
442+
* Pre-set the RST packet template.
443+
*/
444+
void init_rst_template()
445+
{
446+
rstPacketTemplate.ethernet.etherType = EtherType::IPv4;
447+
// 5 x 32 bit = 20 bytes
448+
rstPacketTemplate.ipv4.versionAndHeaderLength = (4 << 4) | 5;
449+
// The RST packet does not have a payload.
450+
rstPacketTemplate.ipv4.packetLength =
451+
ntohs(static_cast<uint16_t>(sizeof(IPv4Header) + sizeof(TCPHeader)));
452+
// Default TTL as recommended by RFC 1700.
453+
rstPacketTemplate.ipv4.timeToLive = 64;
454+
rstPacketTemplate.ipv4.protocol = IPProtocolNumber::TCP;
455+
// 5 * 32 bit = 20 bytes (again)
456+
rstPacketTemplate.tcp.dataOffset = 5;
457+
// Enable TCP RST flag.
458+
rstPacketTemplate.tcp.rst = 1;
459+
}
460+
364461
bool packet_filter_ipv4(const uint8_t *data,
365462
size_t length,
366463
uint32_t(IPv4Header::*remoteAddress),
@@ -502,16 +599,118 @@ namespace
502599
return true;
503600
}
504601

602+
/**
603+
* If passed packet is an IPv4 TCP packet, reply with a RST to the
604+
* sender. This takes the whole ethernet frame into `data` (and the
605+
* size of the buffer in `length`).
606+
*/
607+
void try_reset_ipv4_tcp(const uint8_t *data, size_t length)
608+
{
609+
if (__predict_false(length <
610+
sizeof(EthernetHeader) + sizeof(IPv4Header)))
611+
{
612+
Debug::log("Ignoring inbound packet with length {}", length);
613+
return;
614+
}
615+
616+
EthernetHeader *ethernetHeader =
617+
reinterpret_cast<EthernetHeader *>(const_cast<uint8_t *>(data));
618+
auto *ipv4Header =
619+
reinterpret_cast<const IPv4Header *>(data + sizeof(EthernetHeader));
620+
if (ipv4Header->protocol == IPProtocolNumber::TCP)
621+
{
622+
if (ipv4Header->body_offset() < sizeof(ipv4Header))
623+
{
624+
Debug::log("Body offset is {} but IPv4 header is {} bytes",
625+
ipv4Header->body_offset(),
626+
sizeof(ipv4Header));
627+
return;
628+
}
629+
if (ipv4Header->body_offset() + sizeof(TCPHeader) > length)
630+
{
631+
Debug::log("Ignoring inbound packet with length {}", length);
632+
return;
633+
}
634+
const TCPHeader *tcpHeader = reinterpret_cast<const TCPHeader *>(
635+
data + sizeof(EthernetHeader) + ipv4Header->body_offset());
636+
637+
// Do not send a RST if the received TCP packet is
638+
// itself a RST.
639+
if (tcpHeader->rst == 1)
640+
{
641+
Debug::log("Ignoring inbound TCP RST packet.");
642+
return;
643+
}
644+
645+
// Create a read-only capability to pass to the TCP/IP
646+
// stack when we calculate checksums.
647+
CHERI::Capability<uint8_t> rstPacketTemplateROCap{
648+
reinterpret_cast<uint8_t *>(&rstPacketTemplate)};
649+
// Remove all permissions except load. This also
650+
// removes global, so that this cannot be captured.
651+
rstPacketTemplateROCap.permissions() &=
652+
CHERI::PermissionSet{CHERI::Permission::Load};
653+
654+
/// Build the RST packet.
655+
// Source and destination MACs.
656+
std::copy(std::begin(ethernetHeader->source),
657+
std::end(ethernetHeader->source),
658+
std::begin(rstPacketTemplate.ethernet.destination));
659+
std::copy(std::begin(ethernetHeader->destination),
660+
std::end(ethernetHeader->destination),
661+
std::begin(rstPacketTemplate.ethernet.source));
662+
// Source and destination IPs.
663+
rstPacketTemplate.ipv4.sourceAddress =
664+
ipv4Header->destinationAddress;
665+
rstPacketTemplate.ipv4.destinationAddress =
666+
ipv4Header->sourceAddress;
667+
// IPv4 checksum. The value returned is in network byte
668+
// order. Make sure to reset the checksum field's value
669+
// before calculation.
670+
rstPacketTemplate.ipv4.headerChecksum = 0;
671+
rstPacketTemplate.ipv4.headerChecksum =
672+
network_calculate_ipv4_checksum(rstPacketTemplateROCap +
673+
sizeof(EthernetHeader),
674+
sizeof(IPv4Header));
675+
// Source and destination ports.
676+
rstPacketTemplate.tcp.sourcePort = tcpHeader->destinationPort;
677+
rstPacketTemplate.tcp.destinationPort = tcpHeader->sourcePort;
678+
// Set the sequence number to the ack.
679+
rstPacketTemplate.tcp.sequenceNumber =
680+
tcpHeader->acknowledgementNumber;
681+
// TCP checksum. The value returned is in network byte
682+
// order. No need to reset the field here as it isn't
683+
// included in the calculation.
684+
rstPacketTemplate.tcp.checksum = network_calculate_tcp_checksum(
685+
rstPacketTemplateROCap,
686+
sizeof(FullPacket),
687+
sizeof(EthernetHeader) + sizeof(IPv4Header) + 16);
688+
689+
/// Send the RST packet.
690+
Debug::log("Sending a RST packet.");
691+
LockGuard g{sendLock};
692+
auto &ethernet = lazy_network_interface();
693+
// Do not go through the firewall: the packet would be
694+
// rejected since the destination is not present in the
695+
// table.
696+
ethernet.send_frame(
697+
rstPacketTemplateROCap,
698+
sizeof(FullPacket),
699+
[](const uint8_t *data, size_t length) { return true; });
700+
}
701+
}
702+
505703
bool packet_filter_ingress(const uint8_t *data, size_t length)
506704
{
507-
uint32_t stateSnapshot = tcpipRestartState->load();
705+
uint32_t stateSnapshot = tcpipRestartState->load();
706+
bool isOngoingReset = false;
508707
if (stateSnapshot != 0 &&
509708
((stateSnapshot & RestartStateDriverKickedBit) == 0))
510709
{
511710
// We are in a reset and the driver has not yet been
512711
// restarted.
513712
Debug::log("Dropping packet due to network stack restart.");
514-
return false;
713+
isOngoingReset = true;
515714
}
516715

517716
// Not a valid Ethernet frame (64 bytes including four-byte FCS, which
@@ -523,26 +722,42 @@ namespace
523722
}
524723
EthernetHeader *ethernetHeader =
525724
reinterpret_cast<EthernetHeader *>(const_cast<uint8_t *>(data));
725+
bool accept = false;
526726
switch (ethernetHeader->etherType)
527727
{
528728
// For now, testing with v6 disabled.
529729
case EtherType::IPv6:
530-
return true;
730+
accept = true;
731+
break;
531732
case EtherType::ARP:
532733
Debug::log("Saw ARP frame");
533-
return true;
734+
accept = true;
735+
break;
534736
case EtherType::IPv4:
535-
return packet_filter_ipv4(data + sizeof(EthernetHeader),
536-
length - sizeof(EthernetHeader),
537-
&IPv4Header::sourceAddress,
538-
&TCPUDPCommonPrefix::destinationPort,
539-
&TCPUDPCommonPrefix::sourcePort,
540-
false);
541-
default:
542-
return false;
737+
if (!isOngoingReset)
738+
{
739+
accept =
740+
packet_filter_ipv4(data + sizeof(EthernetHeader),
741+
length - sizeof(EthernetHeader),
742+
&IPv4Header::sourceAddress,
743+
&TCPUDPCommonPrefix::destinationPort,
744+
&TCPUDPCommonPrefix::sourcePort,
745+
false);
746+
}
747+
if (!accept)
748+
{
749+
// If this is a TCP packet, send a RST
750+
// to the source.
751+
//
752+
// Only reset for IPv4 for now. Pass
753+
// the whole Ethernet packet (unlike
754+
// `packet_filter_ipv4`).
755+
try_reset_ipv4_tcp(data, length);
756+
}
757+
break;
543758
}
544759

545-
return false;
760+
return accept && !isOngoingReset;
546761
}
547762

548763
std::atomic<uint32_t> receivedCounter;
@@ -787,6 +1002,7 @@ bool ethernet_driver_start(std::atomic<uint8_t> *state)
7871002
Debug::log("Initialising network interface");
7881003
auto &ethernet = lazy_network_interface();
7891004
ethernet.mac_address_set();
1005+
init_rst_template();
7901006
// Poke the barrier and make the driver thread start.
7911007
barrier = 2;
7921008
barrier.notify_one();

0 commit comments

Comments
 (0)