Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion felix/bpf-gpl/conntrack_types.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Project Calico BPF dataplane programs.
// Copyright (c) 2020-2021 Tigera, Inc. All rights reserved.
// Copyright (c) 2020-2025 Tigera, Inc. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

#ifndef __CALI_CONNTRACK_TYPES_H__
Expand Down Expand Up @@ -39,6 +39,7 @@ enum cali_ct_type {
#define CALI_CT_FLAG_NP_REMOTE 0x1000 /* marks connections from local host to remote backend of a nodeport */
#define CALI_CT_FLAG_NP_NO_DSR 0x2000 /* marks connections from a client which is excluded from DSR */
#define CALI_CT_FLAG_SKIP_REDIR_PEER 0x4000 /* marks connections from a client which is excluded from redir */
#define CALI_CT_FLAG_CLUSTER_EXTERNAL 0x8000 /* marks connections with source or destination outside cluster */

struct calico_ct_leg {
__u64 bytes;
Expand Down
8 changes: 6 additions & 2 deletions felix/bpf-gpl/qos.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,13 @@ static CALI_BPF_INLINE int qos_enforce_packet_rate(struct cali_tc_ctx *ctx)
return TC_ACT_SHOT;
}

static CALI_BPF_INLINE bool qos_set_dscp(struct cali_tc_ctx *ctx)
static CALI_BPF_INLINE bool qos_dscp_needs_update(struct cali_tc_ctx *ctx)
{
return ((ctx->state->flags & CALI_ST_CLUSTER_EXTERNAL) && EGRESS_DSCP >= 0);
}

static CALI_BPF_INLINE bool qos_dscp_set(struct cali_tc_ctx *ctx)
{
// TODO (mazdak): set DSCP only if traffic is leaving cluster
__s8 dscp = EGRESS_DSCP;
CALI_DEBUG("setting dscp to %d", dscp);

Expand Down
12 changes: 12 additions & 0 deletions felix/bpf-gpl/routes.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ static CALI_BPF_INLINE enum cali_rt_flags cali_rt_lookup_flags(ipv46_addr_t *add
#define cali_rt_flags_local_tunneled_host(t) (((t) & (CALI_RT_LOCAL | CALI_RT_HOST | CALI_RT_TUNNELED)) == (CALI_RT_LOCAL | CALI_RT_HOST | CALI_RT_TUNNELED))
#define cali_rt_flags_is_in_pool(t) (((t) & CALI_RT_IN_POOL) == CALI_RT_IN_POOL)
#define cali_rt_flags_skip_ingress_redirect(t) (((t) & CALI_RT_SKIP_INGRESS_REDIRECT))
#define cali_rt_flags_external(t) (!((t) & (CALI_RT_WORKLOAD | CALI_RT_HOST)))

static CALI_BPF_INLINE bool rt_addr_is_local_host(ipv46_addr_t *addr)
{
Expand All @@ -117,6 +118,17 @@ static CALI_BPF_INLINE bool rt_addr_is_local_tunneled_host(ipv46_addr_t *addr)
return cali_rt_flags_local_tunneled_host(cali_rt_lookup_flags(addr));
}

static CALI_BPF_INLINE bool rt_addr_is_external(ipv46_addr_t *addr)
{
return cali_rt_flags_external(cali_rt_lookup_flags(addr));
}

static CALI_BPF_INLINE bool rt_addr_is_host_or_in_pool(ipv46_addr_t *addr)
{
__u32 flags = cali_rt_lookup_flags(addr);
return cali_rt_flags_host(flags) || cali_rt_flags_is_in_pool(flags);
}

// Don't perform SNAT if either:
// - packet is destined to an address in an IP pool;
// - packet is destined to local host; or
Expand Down
30 changes: 26 additions & 4 deletions felix/bpf-gpl/tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,9 @@ static CALI_BPF_INLINE void calico_tc_process_ct_lookup(struct cali_tc_ctx *ctx)
if (ctx->state->ct_result.flags & CALI_CT_FLAG_NAT_OUT) {
ctx->state->flags |= CALI_ST_NAT_OUTGOING;
}
if (ctx->state->ct_result.flags & CALI_CT_FLAG_CLUSTER_EXTERNAL) {
ctx->state->flags |= CALI_ST_CLUSTER_EXTERNAL;
}

if (CALI_F_TO_HOST && !CALI_F_NAT_IF &&
(ct_result_rc(ctx->state->ct_result.rc) == CALI_CT_ESTABLISHED ||
Expand Down Expand Up @@ -545,17 +548,33 @@ static CALI_BPF_INLINE void calico_tc_process_ct_lookup(struct cali_tc_ctx *ctx)
ctx->state->flags |= CALI_ST_NAT_OUTGOING;
}
}
// Check if traffic is leaving cluster. We might need to set DSCP later.
if (cali_rt_flags_is_in_pool(r->flags) && rt_addr_is_external(&ctx->state->post_nat_ip_dst)) {
CALI_DEBUG("Outside cluster dest " IP_FMT "", debug_ip(ctx->state->post_nat_ip_dst));
ctx->state->flags |= CALI_ST_CLUSTER_EXTERNAL;
}
/* If 3rd party CNI is used and dest is outside cluster. See commit fc711b192f for details. */
if (!(r->flags & CALI_RT_IN_POOL)) {
if (!(cali_rt_flags_is_in_pool(r->flags))) {
CALI_DEBUG("Source " IP_FMT " not in IP pool", debug_ip(ctx->state->ip_src));
r = cali_rt_lookup(&ctx->state->post_nat_ip_dst);
if (!r || !(r->flags & (CALI_RT_WORKLOAD | CALI_RT_HOST))) {
if (rt_addr_is_external(&ctx->state->post_nat_ip_dst)) {
CALI_DEBUG("Outside cluster dest " IP_FMT "", debug_ip(ctx->state->post_nat_ip_dst));
ctx->state->flags |= CALI_ST_SKIP_FIB;
}
}
}

// If either source or destination is outside cluster, set flag as might need to update DSCP later.
if ((CALI_F_TO_HEP) && (rt_addr_is_local_host(&ctx->state->ip_src)) &&
(rt_addr_is_external(&ctx->state->post_nat_ip_dst))) {
CALI_DEBUG("Outside cluster dest " IP_FMT "", debug_ip(ctx->state->post_nat_ip_dst));
ctx->state->flags |= CALI_ST_CLUSTER_EXTERNAL;
}
if ((CALI_F_FROM_HEP) && (rt_addr_is_host_or_in_pool(&ctx->state->post_nat_ip_dst)) &&
(rt_addr_is_external(&ctx->state->ip_src))) {
CALI_DEBUG("Outside cluster source " IP_FMT "", debug_ip(ctx->state->ip_src));
ctx->state->flags |= CALI_ST_CLUSTER_EXTERNAL;
}

/* [SMC] I had to add this revalidation when refactoring the conntrack code to use the context and
* adding possible packet pulls in the VXLAN logic. I believe it is spurious but the verifier is
* not clever enough to spot that we'd have already bailed out if one of the pulls failed. */
Expand Down Expand Up @@ -1327,7 +1346,7 @@ int calico_tc_skb_accepted_entrypoint(struct __sk_buff *skb)
deny_reason(ctx, CALI_REASON_DROPPED_BY_QOS);
goto deny;
}
if ((CALI_F_FROM_WEP || CALI_F_TO_HEP) && EGRESS_DSCP >= 0 && !qos_set_dscp(ctx)) {
if ((CALI_F_FROM_WEP || CALI_F_TO_HEP) && qos_dscp_needs_update(ctx) && !qos_dscp_set(ctx)) {
goto deny;
}
ctx->fwd = calico_tc_skb_accepted(ctx);
Expand Down Expand Up @@ -1408,6 +1427,9 @@ int calico_tc_skb_new_flow_entrypoint(struct __sk_buff *skb)
if (state->flags & CALI_ST_NAT_OUTGOING) {
ct_ctx_nat->flags |= CALI_CT_FLAG_NAT_OUT;
}
if (state->flags & CALI_ST_CLUSTER_EXTERNAL) {
ct_ctx_nat->flags |= CALI_CT_FLAG_CLUSTER_EXTERNAL;
}
if (CALI_F_TO_HOST && state->flags & CALI_ST_SKIP_FIB) {
ct_ctx_nat->flags |= CALI_CT_FLAG_SKIP_FIB;
}
Expand Down
3 changes: 3 additions & 0 deletions felix/bpf-gpl/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ enum cali_state_flags {
CALI_ST_SKIP_REDIR_PEER = 0x800,
/* CALI_ST_SKIP_REDIR_ONCE skips redirection once for this particular packet */
CALI_ST_SKIP_REDIR_ONCE = 0x1000,
/* CALI_ST_CLUSTER_EXTERNAL is set if the packet is heading toward or originating from
* an endpoint outside the cluster */
CALI_ST_CLUSTER_EXTERNAL = 0x2000,
};

struct fwd {
Expand Down
31 changes: 16 additions & 15 deletions felix/bpf/conntrack/v3/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,21 +210,22 @@ const (
TypeNATForward
TypeNATReverse

FlagNATOut uint16 = (1 << 0)
FlagNATFwdDsr uint16 = (1 << 1)
FlagNATNPFwd uint16 = (1 << 2)
FlagSkipFIB uint16 = (1 << 3)
FlagReserved4 uint16 = (1 << 4)
FlagReserved5 uint16 = (1 << 5)
FlagExtLocal uint16 = (1 << 6)
FlagViaNATIf uint16 = (1 << 7)
FlagSrcDstBA uint16 = (1 << 8)
FlagHostPSNAT uint16 = (1 << 9)
FlagSvcSelf uint16 = (1 << 10)
FlagNPLoop uint16 = (1 << 11)
FlagNPRemote uint16 = (1 << 12)
FlagNoDSR uint16 = (1 << 13)
FlagNoRedirPeer uint16 = (1 << 14)
FlagNATOut uint16 = (1 << 0)
FlagNATFwdDsr uint16 = (1 << 1)
FlagNATNPFwd uint16 = (1 << 2)
FlagSkipFIB uint16 = (1 << 3)
FlagReserved4 uint16 = (1 << 4)
FlagReserved5 uint16 = (1 << 5)
FlagExtLocal uint16 = (1 << 6)
FlagViaNATIf uint16 = (1 << 7)
FlagSrcDstBA uint16 = (1 << 8)
FlagHostPSNAT uint16 = (1 << 9)
FlagSvcSelf uint16 = (1 << 10)
FlagNPLoop uint16 = (1 << 11)
FlagNPRemote uint16 = (1 << 12)
FlagNoDSR uint16 = (1 << 13)
FlagNoRedirPeer uint16 = (1 << 14)
FlagClusterExternal uint16 = (1 << 15)
)

func (e Value) ReverseNATKey() KeyInterface {
Expand Down
2 changes: 1 addition & 1 deletion felix/bpf/ut/icmp_too_big_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ func TestICMPTooBigNATNodePort(t *testing.T) {
v, ok := ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATNPFwd))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATNPFwd | conntrack3.FlagClusterExternal))

_, _, _, _, pkt2Bytes, err := testPacket(4, nil, &origIPHeader, udpDefault, make([]byte, 1600))
Expect(err).NotTo(HaveOccurred())
Expand Down
20 changes: 10 additions & 10 deletions felix/bpf/ut/nat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ func TestNATPodPodXNode(t *testing.T) {
Expect(ok).To(BeTrue())
// No NATing, service already resolved
Expect(v.Type()).To(Equal(conntrack.TypeNormal))
Expect(v.Flags()).To(Equal(uint16(0)))
Expect(v.Flags()).To(Equal(conntrack3.FlagClusterExternal))

// Arriving at workload at node 2
expectMark(tcdefs.MarkSeen)
Expand Down Expand Up @@ -432,7 +432,7 @@ func TestNATNodePort(t *testing.T) {
v, ok := ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATNPFwd))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATNPFwd | conntrack3.FlagClusterExternal))

expectMark(tcdefs.MarkSeenBypassForward)
// Leaving node 1
Expand Down Expand Up @@ -550,7 +550,7 @@ func TestNATNodePort(t *testing.T) {
v, ok = ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal | conntrack3.FlagClusterExternal))

dumpARPMap(arpMap)

Expand Down Expand Up @@ -1222,7 +1222,7 @@ func TestNATNodePortNoFWD(t *testing.T) {
v, ok := ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal | conntrack3.FlagClusterExternal))

// Arriving at workload
runBpfTest(t, "calico_to_workload_ep", rulesDefaultAllow, func(bpfrun bpfProgRunFn) {
Expand Down Expand Up @@ -2126,7 +2126,7 @@ func TestNATNodePortIngressDSR(t *testing.T) {
v, ok := ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATFwdDsr | conntrack3.FlagNATNPFwd))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATFwdDsr | conntrack3.FlagNATNPFwd | conntrack3.FlagClusterExternal))
}

func TestNATNodePortDSROptout(t *testing.T) {
Expand Down Expand Up @@ -2227,7 +2227,7 @@ func TestNATNodePortDSROptout(t *testing.T) {
v, ok := ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATFwdDsr | conntrack3.FlagNATNPFwd))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATFwdDsr | conntrack3.FlagNATNPFwd | conntrack3.FlagClusterExternal))

// N.B. we skip the forward part from node, we just needed to have the right packet.

Expand Down Expand Up @@ -2329,7 +2329,7 @@ func TestNATNodePortDSROptout(t *testing.T) {
v, ok = ct[conntrack.NewKey(uint8(ipv4.Protocol), ipv4.SrcIP, uint16(udp.SrcPort), natIP.To4(), natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal | conntrack3.FlagNoDSR))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal | conntrack3.FlagNoDSR | conntrack3.FlagClusterExternal))

skbMark = tcdefs.MarkSeen

Expand Down Expand Up @@ -2995,7 +2995,7 @@ func TestNATPodPodXNodeV6(t *testing.T) {
Expect(ok).To(BeTrue())
// No NATing, service already resolved
Expect(v.Type()).To(Equal(conntrack.TypeNormal))
Expect(v.Flags()).To(Equal(uint16(0)))
Expect(v.Flags()).To(Equal(conntrack3.FlagClusterExternal))

// Arriving at workload at node 2
expectMark(tcdefs.MarkSeen)
Expand Down Expand Up @@ -3220,7 +3220,7 @@ func TestNATNodePortV6(t *testing.T) {
v, ok := ct[conntrack.NewKeyV6(uint8(17 /* UDP */), ipv6.SrcIP, uint16(udp.SrcPort), natIP, natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATNPFwd))
Expect(v.Flags()).To(Equal(conntrack3.FlagNATNPFwd | conntrack3.FlagClusterExternal))

expectMark(tcdefs.MarkSeenBypassForward)
// Leaving node 1
Expand Down Expand Up @@ -3338,7 +3338,7 @@ func TestNATNodePortV6(t *testing.T) {
v, ok = ct[conntrack.NewKeyV6(uint8(17 /* UDP */), ipv6.SrcIP, uint16(udp.SrcPort), natIP, natPort)]
Expect(ok).To(BeTrue())
Expect(v.Type()).To(Equal(conntrack.TypeNATReverse))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal))
Expect(v.Flags()).To(Equal(conntrack3.FlagExtLocal | conntrack3.FlagClusterExternal))

dumpARPMapV6(arpMapV6)

Expand Down
Loading