Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 235 additions & 0 deletions pkg/neg/syncers/endpoints_calculator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,241 @@ func TestClusterWantedNEGsCount(t *testing.T) {
}
}

// TestClusterNEGsStability verifies that we limit the number of endpoints removal.
// Removing endpoint from a node causes 30 second connection draining.
// For that reason we should avoid removing endpoints for as long as possible.
func TestClusterNEGsStability(t *testing.T) {
t.Parallel()

type stage struct {
desc string
nodes map[string][]*nodeWithSubnet
want map[negtypes.NEGLocation]negtypes.NetworkEndpointSet
}

// For clarity of test cases we limit the size to 7 - which will still split unevenly in 3 zones
const subsetSize = 7

scenarios := []struct {
desc string
stages []stage
}{{
desc: "equal nodes",
stages: []stage{{
desc: "start",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 10),
"zone2": makeNodes(2000, 10),
"zone3": makeNodes(3000, 10),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2003"}: {}, {Node: "node2004"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}, {Node: "node3009"}: {}},
},
}, {
desc: "no change",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 10),
"zone2": makeNodes(2000, 10),
"zone3": makeNodes(3000, 10),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2003"}: {}, {Node: "node2004"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}, {Node: "node3009"}: {}},
},
}, {
desc: "increase nodes in all zones, no change expected",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 100),
"zone2": makeNodes(2000, 100),
"zone3": makeNodes(3000, 100),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2003"}: {}, {Node: "node2004"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}, {Node: "node3009"}: {}},
},
}, {
desc: "decrease all in all zones, no change expected",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 10),
"zone2": makeNodes(2000, 10),
"zone3": makeNodes(3000, 100),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2003"}: {}, {Node: "node2004"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}, {Node: "node3009"}: {}},
},
}, {
desc: "decrease, node disapear, change expected",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 5),
"zone2": makeNodes(2000, 5),
"zone3": makeNodes(3000, 5),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1002"}: {}, {Node: "node1003"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2003"}: {}, {Node: "node2004"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3002"}: {}, {Node: "node3003"}: {}},
},
}},
}, {
desc: "unequal nodes",
stages: []stage{{
desc: "start",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 10),
"zone2": makeNodes(2000, 20),
"zone3": makeNodes(3000, 10),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2011"}: {}, {Node: "node2017"}: {}, {Node: "node2019"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}},
},
}, {
desc: "no change",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 10),
"zone2": makeNodes(2000, 20),
"zone3": makeNodes(3000, 10),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2011"}: {}, {Node: "node2017"}: {}, {Node: "node2019"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}},
},
}, {
desc: "make zone1 largest, no change expected",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 30),
"zone2": makeNodes(2000, 20),
"zone3": makeNodes(3000, 10),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1003"}: {}, {Node: "node1009"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2011"}: {}, {Node: "node2017"}: {}, {Node: "node2019"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3003"}: {}},
},
}, {
desc: "shrink zone1, move endpoints from zone1",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 1),
"zone2": makeNodes(2000, 20),
"zone3": makeNodes(3000, 10),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {},
{Zone: "zone2", Subnet: "default"}: {
{Node: "node2004"}: {}, // New
{Node: "node2011"}: {},
{Node: "node2017"}: {},
{Node: "node2019"}: {},
},
{Zone: "zone3", Subnet: "default"}: {
{Node: "node3000"}: {},
{Node: "node3003"}: {},
{Node: "node3009"}: {}, // New
},
},
}},
}, {
desc: "zones flickering under subsetlimit",
stages: []stage{{
desc: "start",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 2),
"zone2": makeNodes(2000, 2),
"zone3": makeNodes(3000, 2),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1000"}: {}, {Node: "node1001"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2000"}: {}, {Node: "node2001"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3001"}: {}},
},
}, {
desc: "zone1 bump",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 3),
"zone2": makeNodes(2000, 2),
"zone3": makeNodes(3000, 2),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1000"}: {}, {Node: "node1001"}: {}, {Node: "node1002"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2000"}: {}, {Node: "node2001"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3001"}: {}},
},
}, {
desc: "zone2 bump",
nodes: map[string][]*nodeWithSubnet{
"zone1": makeNodes(1000, 2),
"zone2": makeNodes(2000, 3),
"zone3": makeNodes(3000, 2),
},
want: map[negtypes.NEGLocation]negtypes.NetworkEndpointSet{
{Zone: "zone1", Subnet: "default"}: {{Node: "node1000"}: {}, {Node: "node1001"}: {}},
{Zone: "zone2", Subnet: "default"}: {{Node: "node2000"}: {}, {Node: "node2001"}: {}, {Node: "node2002"}: {}},
{Zone: "zone3", Subnet: "default"}: {{Node: "node3000"}: {}, {Node: "node3001"}: {}},
},
}},
}}

for _, tc := range scenarios {
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

// Arrange
nodeInformer := zonegetter.FakeNodeInformer()
zoneGetter, err := zonegetter.NewFakeZoneGetter(nodeInformer, nodeInformer, defaultTestSubnetURL, false)
if err != nil {
t.Fatalf("failed to initialize zone getter: %v", err)
}
defaultNetwork := network.NetworkInfo{IsDefault: true, K8sNetwork: "default", SubnetworkURL: defaultTestSubnetURL}

// We use ILB so that it doesn't trigger code that linearly calculates number of NEGs needed
// based on actual number of Pods.
c := NewClusterL4EndpointsCalculator(
listers.NewNodeLister(nodeInformer.GetIndexer()),
zoneGetter, "svc", klog.TODO(), &defaultNetwork, negtypes.L4InternalLB,
)
c.subsetSizeLimit = subsetSize

var endpointsMap map[negtypes.NEGLocation]negtypes.NetworkEndpointSet
for _, stage := range tc.stages {
t.Run(stage.desc, func(t *testing.T) {
// Arrange
// Set up nodes
for zone, nodes := range stage.nodes {
zonegetter.DeleteFakeNodesInZone(t, zone, zoneGetter)
names := make([]string, 0, len(nodes))
for _, node := range nodes {
names = append(names, node.node.Name)
}
if err := zonegetter.AddFakeNodes(zoneGetter, zone, names...); err != nil {
t.Fatalf("failed to add fake node: %v", err)
}
}

// Act
res, _, _, err := c.CalculateEndpoints(nil, endpointsMap)
if err != nil {
t.Fatalf("expected no err, got %v", err)
}

// Assert
if diff := cmp.Diff(stage.want, res); diff != "" {
t.Errorf("want != got, -want +got:\n%s", diff)
}
endpointsMap = stage.want
})
}
})
}
}

func TestValidateEndpoints(t *testing.T) {
testPortName := ""
emptyNamedPort := ""
Expand Down
89 changes: 79 additions & 10 deletions pkg/neg/syncers/subsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,32 +177,44 @@ func newNodeWithSubnet(node *v1.Node, subnet string) *nodeWithSubnet {
// getSubsetPerZone creates a subset of nodes from the given list of nodes, for each zone provided.
// The output is a map of zone string to NEG subset.
// In order to pick as many nodes as possible given the total limit, the following algorithm is used:
// 1) The zones are sorted in increasing order of the total number of nodes.
// 2) The number of nodes to be selected is divided equally among the zones. If there are 4 zones and the limit is 250,
// A) Leave existing endpoints where they are. Detaching endpoints is time consuming and causes connection draining. We want to avoid that.
// B) For the rest of endpoints following algorithm is used:
//
// the algorithm attempts to pick 250/4 from the first zone. If 'n' nodes were selected from zone1, the limit for
// zone2 is (250 - n)/3. For the third zone, it is (250 - n - m)/2, if m nodes were picked from zone2.
// Since the number of nodes will keep increasing in successive zones due to the sorting, even if fewer nodes were
// present in some zones, more nodes will be picked from other nodes, taking the total subset size to the given limit
// whenever possible.
// 1. The zones are sorted in increasing order of the total number of nodes.
//
// 2. The number of nodes to be selected is divided equally among the zones. If there are 4 zones and the limit is 250,
//
// the algorithm attempts to pick 250/4 from the first zone. If 'n' nodes were selected from zone1, the limit for
// zone2 is (250 - n)/3. For the third zone, it is (250 - n - m)/2, if m nodes were picked from zone2.
// Since the number of nodes will keep increasing in successive zones due to the sorting, even if fewer nodes were
// present in some zones, more nodes will be picked from other nodes, taking the total subset size to the given limit
// whenever possible.
func getSubsetPerZone(nodesPerZone map[string][]*nodeWithSubnet, totalLimit int, svcID string, currentMap map[negtypes.NEGLocation]negtypes.NetworkEndpointSet, logger klog.Logger, networkInfo *network.NetworkInfo) (map[negtypes.NEGLocation]negtypes.NetworkEndpointSet, error) {
result := make(map[negtypes.NEGLocation]negtypes.NetworkEndpointSet)

subsetSize := 0
// initialize zonesRemaining to the total number of zones.
zonesRemaining := len(nodesPerZone)
// Sort zones in increasing order of node count.
zoneList := sortZones(nodesPerZone)

defaultSubnet, err := utils.KeyName(networkInfo.SubnetworkURL)
if err != nil {
logger.Error(err, "Errored getting default subnet from NetworkInfo when calculating L4 endpoints")
return nil, err
}

// Remove nodes that are already in use in currentMap
totalLimit, nodesPerZone = pickOutUsedEndpoints(currentMap, nodesPerZone, totalLimit, result)

// Sort zones in increasing order of the remaining node count.
zoneList := sortZones(nodesPerZone)

for _, zone := range zoneList {
// make sure there is an entry for the defaultSubnet in each zone, even if there will be no endpoints in there (maintains the old behavior).
result[negtypes.NEGLocation{Zone: zone.Name, Subnet: defaultSubnet}] = negtypes.NewNetworkEndpointSet()
defaultSubnetLocation := negtypes.NEGLocation{Zone: zone.Name, Subnet: defaultSubnet}
if _, ok := result[defaultSubnetLocation]; !ok {
result[defaultSubnetLocation] = negtypes.NewNetworkEndpointSet()
}

// split the limit across the leftover zones.
subsetSize = totalLimit / zonesRemaining
logger.Info("Picking subset for a zone", "subsetSize", subsetSize, "zone", zone, "svcID", svcID)
Expand Down Expand Up @@ -230,6 +242,63 @@ func getSubsetPerZone(nodesPerZone map[string][]*nodeWithSubnet, totalLimit int,
return result, nil
}

// Removes nodes that are already used in the currentMap.
//
// Adds endpoints to the result in place.
// Returns totalLimit left and nodesPerZone after removal.
func pickOutUsedEndpoints(currentMap map[negtypes.NEGLocation]negtypes.NetworkEndpointSet, nodesPerZone map[string][]*nodeWithSubnet, totalLimit int, result map[negtypes.NEGLocation]negtypes.NetworkEndpointSet) (int, map[string][]*nodeWithSubnet) {
// We can use map to have O(1) find and delete
m := nodesToMap(nodesPerZone)

for location, endpoints := range currentMap {
for endpoint := range endpoints {
key := nameSubnetKey{endpoint.Node, location.Subnet}
if _, ok := m[location.Zone][key]; !ok {
continue
}

delete(m[location.Zone], key)

if _, ok := result[location]; !ok {
result[location] = negtypes.NewNetworkEndpointSet()
}
result[location].Insert(endpoint)
totalLimit--
}
}

return totalLimit, mapToNodes(m)
}

type nameSubnetKey struct {
name string
subnet string
}

func nodesToMap(nodesPerZone map[string][]*nodeWithSubnet) map[string]map[nameSubnetKey]*nodeWithSubnet {
m := make(map[string]map[nameSubnetKey]*nodeWithSubnet)
for zone, nodes := range nodesPerZone {
m[zone] = make(map[nameSubnetKey]*nodeWithSubnet)
for _, node := range nodes {
m[zone][nameSubnetKey{node.node.Name, node.subnet}] = node
}
}
return m
}

func mapToNodes(m map[string]map[nameSubnetKey]*nodeWithSubnet) map[string][]*nodeWithSubnet {
nodesPerZone := make(map[string][]*nodeWithSubnet)
for zone, nodes := range m {
// We NEED to have at least an empty slice for each zone
// as some code depends on this behavior.
nodesPerZone[zone] = make([]*nodeWithSubnet, 0)
for _, node := range nodes {
nodesPerZone[zone] = append(nodesPerZone[zone], node)
}
}
return nodesPerZone
}

// getNetworkEndpointsForZone gets all endpoints for a matching zone.
// it will get all nodes in the zone no matter which subnet the nodes are in.
func getNetworkEndpointsForZone(zone string, currentMap map[negtypes.NEGLocation]negtypes.NetworkEndpointSet) []negtypes.NetworkEndpoint {
Expand Down