Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/config/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func (sc ServiceConfig) ToModelDimensionsRegexp() []model.DimensionsRegexp {
dr = append(dr, model.DimensionsRegexp{
Regexp: regexp,
DimensionsNames: dimensionNames,
Namespace: sc.Namespace,
})
}

Expand Down
232 changes: 121 additions & 111 deletions pkg/job/maxdimassociator/associator.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,80 +15,104 @@
import (
"cmp"
"context"
"fmt"
"log/slog"
"slices"
"sort"
"strings"

"github.com/cespare/xxhash/v2"
"github.com/grafana/regexp"
prom_model "github.com/prometheus/common/model"

"github.com/prometheus-community/yet-another-cloudwatch-exporter/pkg/model"
)

var amazonMQBrokerSuffix = regexp.MustCompile("-[0-9]+$")


Check failure on line 31 in pkg/job/maxdimassociator/associator.go

View workflow job for this annotation

GitHub Actions / lint

File is not properly formatted (gofmt)
// Associator implements a "best effort" algorithm to automatically map the output
// of the ListMetrics API to the list of resources retrieved from the Tagging API.
// The core logic is based on a manually maintained list of regexes that extract
// dimensions names from ARNs (see services.go). YACE supports auto-discovery for
// those AWS namespaces where the ARN regexes are correctly defined.
type Associator struct {
// mappings is a slice of dimensions-based mappings, one for each regex of a given namespace
mappings []*dimensionsRegexpMapping
// mapping from join key to tagged resource. The join key is build using the namespace and the required dimensions of the metric.
mappings map[uint64]*model.TaggedResource

logger *slog.Logger
debugEnabled bool

// mapping from namespace to diffrent join key hash functions for a metric. The functions are sorted by the number of dimensions.

Check failure on line 44 in pkg/job/maxdimassociator/associator.go

View workflow job for this annotation

GitHub Actions / lint

`diffrent` is a misspelling of `different` (misspell)
namespaceToJoinKeyHashFunction map[string][]hashKeyFunction
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The associator is not intended to be used across different AWS Namespaces. It is always run within the context of a single DiscoveryJob which has a single namespace,

var assoc resourceAssociator
if len(svc.DimensionRegexps) > 0 && len(resources) > 0 {
assoc = maxdimassociator.NewAssociator(logger, discoveryJob.DimensionsRegexps, resources)
} else {
// If we don't have dimension regex's and resources there's nothing to associate but metrics shouldn't be skipped
assoc = nopAssociator{}
}

}

type dimensionsRegexpMapping struct {
// dimensions is a slice of dimensions names in a regex (normally 1 name is enough
// to identify the resource type by its ARN, sometimes 2 or 3 dimensions names are
// needed to identify sub-resources)
type hashKeyFunction struct {
dimensions []string
}

// dimensionsMapping maps the set of dimensions (names and values) to a resource.
// Dimensions names and values are encoded as a uint64 fingerprint.
dimensionsMapping map[uint64]*model.TaggedResource
func newHashKeyFunction(dimensions []string) hashKeyFunction {
sort.Strings(dimensions)
return hashKeyFunction{
dimensions: dimensions,
}
}

func (rm dimensionsRegexpMapping) toString() string {
sb := strings.Builder{}
sb.WriteString("{dimensions=[")
for _, dim := range rm.dimensions {
sb.WriteString(dim)
func (hf hashKeyFunction) hash(namespace string, cwMetric *model.Metric, shouldTryFixDimension bool) (uint64, bool, bool) {
h := xxhash.New()

h.WriteString(namespace)

Check failure on line 62 in pkg/job/maxdimassociator/associator.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `h.WriteString` is not checked (errcheck)

dimensions := make([]string, 0, len(cwMetric.Dimensions))
for _, dimension := range cwMetric.Dimensions {
dimensions = append(dimensions, dimension.Name)
}
sort.Strings(dimensions)

if !containsAll(dimensions, hf.dimensions) {
return 0, false, false
}
sb.WriteString("], dimensions_mappings={")
for sign, res := range rm.dimensionsMapping {
sb.WriteString(fmt.Sprintf("%d", sign))
sb.WriteString("=")
sb.WriteString(res.ARN)
sb.WriteString(",")

dimFixApplied := false
for _, dimension := range hf.dimensions {
h.WriteString(dimension)

Check failure on line 76 in pkg/job/maxdimassociator/associator.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `h.WriteString` is not checked (errcheck)

for _, mDimension := range cwMetric.Dimensions {

if shouldTryFixDimension {
mDimension, dimFixApplied = fixDimension(namespace, mDimension)
}

if mDimension.Name == dimension {
h.WriteString(mDimension.Value)

Check failure on line 85 in pkg/job/maxdimassociator/associator.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `h.WriteString` is not checked (errcheck)
}
}
}
sb.WriteString("}}")
return sb.String()

return h.Sum64(), true, dimFixApplied
}

// NewAssociator builds all mappings for the given dimensions regexps and list of resources.
func NewAssociator(logger *slog.Logger, dimensionsRegexps []model.DimensionsRegexp, resources []*model.TaggedResource) Associator {
assoc := Associator{
mappings: []*dimensionsRegexpMapping{},
mappings: map[uint64]*model.TaggedResource{},
logger: logger,
debugEnabled: logger.Handler().Enabled(context.Background(), slog.LevelDebug), // caching if debug is enabled

namespaceToJoinKeyHashFunction: map[string][]hashKeyFunction{},
}

// Keep track of resources that have already been mapped.
// Keep track of resources that have already been indexed.
// Each resource will be matched against at most one regex.
// TODO(cristian): use a more memory-efficient data structure
mappedResources := make([]bool, len(resources))

for _, dr := range dimensionsRegexps {
m := &dimensionsRegexpMapping{
dimensions: dr.DimensionsNames,
dimensionsMapping: map[uint64]*model.TaggedResource{},
}

for idx, r := range resources {
if r.Namespace != dr.Namespace {
continue
}

// Skip resource that are already indexed.
if mappedResources[idx] {
continue
}
Expand All @@ -98,17 +122,17 @@
continue
}

labels := make(map[string]string, len(match))
labels := make(map[string]string, len(match)*2)
for i := 1; i < len(match); i++ {
labels[dr.DimensionsNames[i-1]] = match[i]
}
signature := prom_model.LabelsToSignature(labels)
m.dimensionsMapping[signature] = r
mappedResources[idx] = true
}
joinKey := assoc.getJoinKeyFromTaggedResource(r.Namespace, labels)

if len(m.dimensionsMapping) > 0 {
assoc.mappings = append(assoc.mappings, m)
assoc.mappings[joinKey] = r
sort.Strings(dr.DimensionsNames)
hashKeyFunction := newHashKeyFunction(dr.DimensionsNames)
assoc.namespaceToJoinKeyHashFunction[r.Namespace] = append(assoc.namespaceToJoinKeyHashFunction[r.Namespace], hashKeyFunction)
mappedResources[idx] = true
}

// The mapping might end up as empty in cases e.g. where
Expand All @@ -122,17 +146,20 @@
}
}

// sort all mappings by decreasing number of dimensions names
// sort all key sets by decreasing number of dimensions names
// (this is essential so that during matching we try to find the metric
// with the most specific set of dimensions)
slices.SortStableFunc(assoc.mappings, func(a, b *dimensionsRegexpMapping) int {
return -1 * cmp.Compare(len(a.dimensions), len(b.dimensions))
})
for namespace := range assoc.namespaceToJoinKeyHashFunction {
slices.SortStableFunc(assoc.namespaceToJoinKeyHashFunction[namespace], func(a, b hashKeyFunction) int {
return -1 * cmp.Compare(len(a.dimensions), len(b.dimensions))
})
}

if assoc.debugEnabled {
for idx, regexpMapping := range assoc.mappings {
logger.Debug("associator mapping", "mapping_idx", idx, "mapping", regexpMapping.toString())
for idx, resource := range assoc.mappings {
logger.Debug("associator mapping", "mapping_idx", idx, "mapping", resource.ARN)
}
// TODO log namespaceToDimensions
}

return assoc
Expand Down Expand Up @@ -161,85 +188,68 @@
logger.Debug("associate loop start", "dimensions", strings.Join(dimensions, ","))
}

// Attempt to find the regex mapping which contains the most
keySets, ok := assoc.namespaceToJoinKeyHashFunction[cwMetric.Namespace]
if !ok {
logger.Debug("no dimensions sets found for namespace", "namespace", cwMetric.Namespace)
return nil, false
}

// Attempt to find the key set which contains the most
// (but not necessarily all) the metric's dimensions names.
// Regex mappings are sorted by decreasing number of dimensions names,
// Key sets are sorted by decreasing number of dimensions names,
// which favours find the mapping with most dimensions.
dimFixApplied := false
mappingFound := false
for idx, regexpMapping := range assoc.mappings {
if containsAll(dimensions, regexpMapping.dimensions) {
if assoc.debugEnabled {
logger.Debug("found mapping", "mapping_idx", idx, "mapping", regexpMapping.toString())
}
match := false
joinKey := uint64(0)
var taggedResource *model.TaggedResource
for _, hashKeyFunc := range keySets {
shouldTryFixDimension := true
joinKey, mappingFound, dimFixApplied = hashKeyFunc.hash(cwMetric.Namespace, cwMetric, shouldTryFixDimension)
// Try again without dimension fix.
if !mappingFound {
joinKey, mappingFound, _ = hashKeyFunc.hash(cwMetric.Namespace, cwMetric, false)
}

// A regex mapping has been found. The metric has all (and possibly more)
// the dimensions computed for the mapping. Now compute a signature
// of the labels (names and values) of the dimensions of this mapping, and try to
// find a resource match.
// This loop can run up to two times:
// On the first iteration, special-case dimension value
// fixes to match the value up with the resource ARN are applied to particular namespaces.
// The second iteration will only run if a fix was applied for one of the special-case
// namespaces and no match was found. It will try to find a match without applying the fixes.
// This covers cases where the dimension value does line up with the resource ARN.
mappingFound = true
dimFixApplied := false
shouldTryFixDimension := true
// If no dimension fixes were applied, no need to try running again without the fixer.
for dimFixApplied || shouldTryFixDimension {

var labels map[string]string
labels, dimFixApplied = buildLabelsMap(cwMetric, regexpMapping, shouldTryFixDimension)
signature := prom_model.LabelsToSignature(labels)

// Check if there's an entry for the labels (names and values) of the metric,
// and return the resource in case.
if resource, ok := regexpMapping.dimensionsMapping[signature]; ok {
logger.Debug("resource matched", "signature", signature)
return resource, false
}

// No resource was matched for the current signature.
logger.Debug("resource signature attempt not matched", "signature", signature)
shouldTryFixDimension = false
}
// Try next dimensions set if still no mapping found.
if !mappingFound {
logger.Debug("no mapping found for metric", "metric", cwMetric.MetricName)
continue
}

// No resource was matched for any signature, continue iterating across the
// rest of regex mappings to attempt to find another one with fewer dimensions.
logger.Debug("resource not matched")
taggedResource, match = assoc.mappings[joinKey]
// Try again without dimension fix.
if !match && dimFixApplied {
joinKey, _, _ = hashKeyFunc.hash(cwMetric.Namespace, cwMetric, false)
taggedResource, match = assoc.mappings[joinKey]
}
}

// At this point, we haven't been able to match the metric against
// any resource based on the dimensions the associator knows.
// If a regex mapping was ever found in the loop above but no entry
// (i.e. matching labels names and values) matched the metric dimensions,
// skip the metric altogether.
// Otherwise, if we didn't find any regex mapping it means we can't
// correctly map the dimensions names to a resource arn regex,
// but we still want to keep the metric and create a "global" metric.
logger.Debug("associate loop end", "skip", mappingFound)
return nil, mappingFound
// If we found a match we don't need to try next dimensions set.
if match {
break
}
}
return taggedResource, mappingFound && !match
}

// buildLabelsMap returns a map of labels names and values, as well as whether the dimension fixer was applied.
// For some namespaces, values might need to be modified in order
// to match the dimension value extracted from ARN.
func buildLabelsMap(cwMetric *model.Metric, regexpMapping *dimensionsRegexpMapping, shouldTryFixDimension bool) (map[string]string, bool) {
labels := make(map[string]string, len(cwMetric.Dimensions))
dimFixApplied := false
for _, rDimension := range regexpMapping.dimensions {
for _, mDimension := range cwMetric.Dimensions {
if shouldTryFixDimension {
mDimension, dimFixApplied = fixDimension(cwMetric.Namespace, mDimension)
}
// TODO: use same logic for both keys.
func (assoc Associator) getJoinKeyFromTaggedResource(namespace string, labels map[string]string) uint64 {
h := xxhash.New()

if rDimension == mDimension.Name {
labels[mDimension.Name] = mDimension.Value
}
}
h.WriteString(namespace)

keys := make([]string, 0, len(labels))
for key := range labels {
keys = append(keys, key)
}
return labels, dimFixApplied
sort.Strings(keys)

for _, key := range keys {
h.WriteString(key)
h.WriteString(labels[key])
}

return h.Sum64()
}

// fixDimension modifies the dimension value to accommodate special cases where
Expand Down
1 change: 1 addition & 0 deletions pkg/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ type MetricConfig struct {
type DimensionsRegexp struct {
Regexp *regexp.Regexp
DimensionsNames []string
Namespace string
}

type LabelSet map[string]struct{}
Expand Down
Loading