diff --git a/chart/templates/service.yaml b/chart/templates/service.yaml new file mode 100644 index 0000000..57a348c --- /dev/null +++ b/chart/templates/service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "keess.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "keess.labels" . | nindent 4 }} + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "{{ .Values.service.port }}" + prometheus.io/path: "/metrics" + {{- with .Values.service.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type | default "ClusterIP" }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "keess.selectorLabels" . | nindent 4 }} diff --git a/chart/values.yaml b/chart/values.yaml index a0eac79..33f6ac4 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -75,7 +75,13 @@ tolerations: [] affinity: {} service: + type: ClusterIP port: 8080 + annotations: {} + # Add custom annotations for the service + # prometheus.io/scrape: "true" + # prometheus.io/port: "8080" + # prometheus.io/path: "/metrics" # If you need to synchronize across different clusters you need to specify this configuration. # This will assume a static kubeconfig file. diff --git a/cmd/run.go b/cmd/run.go index 2087f4d..5a53266 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -25,11 +25,13 @@ import ( "context" "fmt" "keess/pkg/keess" + "keess/pkg/keess/metrics" "keess/pkg/keess/service" "net/http" "os" "time" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" "go.uber.org/zap" "go.uber.org/zap/zapcore" @@ -77,13 +79,16 @@ var runCmd = &cobra.Command{ configReloaderMaxRetries, _ := cmd.Flags().GetInt("configReloaderMaxRetries") configReloaderDebounceTimer, _ := cmd.Flags().GetInt("configReloaderDebounceTimer") - logger.Sugar().Infof("Starting Keess. Running on local cluster: %s", localCluster) + logger.Sugar().Infof("Starting Keess v%s. Running on local cluster: %s", Version, localCluster) logger.Sugar().Debugf("Namespace polling interval: %d seconds", namespacePollingInterval) logger.Sugar().Debugf("Polling interval: %d seconds", pollingInterval) logger.Sugar().Debugf("Housekeeping interval: %d seconds", housekeepingInterval) logger.Sugar().Debugf("Log level: %s", logLevel) logger.Sugar().Debugf("Kubeconfig path: %s", kubeConfigPath) + // Register Prometheus metrics + metrics.RegisterMetrics() + config, err := rest.InClusterConfig() if err != nil { config, err = keess.BuildConfigWithContextFromFlags(localCluster, kubeConfigPath) @@ -161,7 +166,9 @@ var runCmd = &cobra.Command{ // Create an HTTP server and add the health check handler as a handler http.HandleFunc("/health", healthHandler) + http.Handle("/metrics", promhttp.Handler()) + logger.Sugar().Info("Starting HTTP server on :8080 ...") if err := http.ListenAndServe(":8080", nil); err != nil { logger.Sugar().Fatalf("Failed to start HTTP server: %v", err) } diff --git a/cmd/version.go b/cmd/version.go index 0d16daa..78e407d 100644 --- a/cmd/version.go +++ b/cmd/version.go @@ -7,7 +7,7 @@ import ( ) // Version of the application, set this variable during build -var version = "1.3.0" +var Version = "1.3.1" // versionCmd represents the version command var versionCmd = &cobra.Command{ @@ -16,7 +16,7 @@ var versionCmd = &cobra.Command{ Short: "Print the version number of the application", Long: `Print the version number of the application`, Run: func(cmd *cobra.Command, args []string) { - fmt.Println("Keess v" + version) + fmt.Println("Keess v" + Version) }, } diff --git a/go.mod b/go.mod index 6444e34..e6fe347 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ toolchain go1.24.5 require ( github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.38.0 + github.com/prometheus/client_golang v1.23.2 github.com/spf13/cobra v1.9.1 github.com/spf13/viper v1.20.1 go.uber.org/zap v1.27.0 @@ -16,6 +17,8 @@ require ( ) require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/fxamacker/cbor/v2 v2.8.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect @@ -23,9 +26,13 @@ require ( github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/x448/float16 v0.8.4 // indirect go.uber.org/automaxprocs v1.6.0 // indirect - golang.org/x/tools v0.33.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/tools v0.35.0 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect sigs.k8s.io/randfill v1.0.0 // indirect ) @@ -54,16 +61,16 @@ require ( github.com/spf13/afero v1.14.0 // indirect github.com/spf13/cast v1.7.1 // indirect github.com/spf13/pflag v1.0.6 // indirect - github.com/stretchr/testify v1.10.0 + github.com/stretchr/testify v1.11.1 github.com/subosito/gotenv v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/net v0.41.0 // indirect - golang.org/x/oauth2 v0.29.0 // indirect - golang.org/x/sys v0.33.0 // indirect - golang.org/x/term v0.32.0 // indirect - golang.org/x/text v0.26.0 // indirect + golang.org/x/net v0.43.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/term v0.34.0 // indirect + golang.org/x/text v0.28.0 // indirect golang.org/x/time v0.11.0 // indirect - google.golang.org/protobuf v1.36.6 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect diff --git a/go.sum b/go.sum index a5ed5ec..0c504d6 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -43,10 +47,14 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -69,6 +77,14 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -90,8 +106,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= @@ -106,6 +122,8 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -115,38 +133,38 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= -golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= -golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98= -golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= -golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= -golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= +golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= -golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= -golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= +golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/pkg/keess/metrics/metrics.go b/pkg/keess/metrics/metrics.go new file mode 100644 index 0000000..02a9dc4 --- /dev/null +++ b/pkg/keess/metrics/metrics.go @@ -0,0 +1,85 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +var ( + ErrorCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "keess_errors_total", + Help: "Total number of errors encountered by the operator.", + }, + ) + + // Resources counts the number of resources managed by the operator, labeled by resource type + // + // This is an informational metric (not meant to aid debugging problems, usually), to + // understand the scale at which the operator is being used and quickly check which + // types of resources are being managed. + Resources = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "keess_resources_managed_total", + Help: "Total number of resources managed by the operator.", + }, + []string{"resource_type"}, // e.g., "service", "configmap", "secret", "namespace" + ) + + // OrphansDetected counts the number of orphaned resources detected by the operator, labeled by resource type + // + // This metric must be incremented as soon as an orphan is detected. + // + // Note that if an orphan can NOT be deleted for some reason, it will be counted again + // the next time it is detected, leading to grow indefinitely while the orphan exists. + // Such a increase, or the divergence between this and OrphansRemoved, can be used + // to detect or alert that we have orphans that may need manual cleaning. + OrphansDetected = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "keess_resources_orphan_detections_total", + Help: "Total number of orphaned resources detected by the operator.", + }, + []string{"resource_type"}, + ) + + // OrphansRemoved counts the number of orphaned resources removed by the operator, labeled by resource type + // + // This metric must be incremented only when an orphan is actually deleted. See + // OrphansDetected for the relation between both. + OrphansRemoved = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "keess_resources_orphan_removals_total", + Help: "Total number of orphaned resources removed by the operator.", + }, + []string{"resource_type"}, + ) + + // RemoteUp indicates if Keess can reach and access the remote cluster (1 for up, 0 for down). + // + // This metric is labeled by remote cluster name, so we can track the status of + // multiple remote clusters independently. + RemoteUp = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "keess_remote_up", + Help: "Indicates if the remote cluster is reachable (1 for up, 0 for down).", + }, + []string{"remote_name"}, // e.g., "cluster1", "cluster2" + ) +) + +// RegisterMetrics registers all prometheus metrics +func RegisterMetrics() { + prometheus.MustRegister(ErrorCount) + prometheus.MustRegister(Resources) + prometheus.MustRegister(OrphansDetected) + prometheus.MustRegister(OrphansRemoved) + prometheus.MustRegister(RemoteUp) + + // For Vector metrics, prometheus requires at least one value to be set to show the metric as available + // So we preset them to 0 with the known labels + Resources.WithLabelValues("namespace").Add(0) // namespace label makes sense only to Resources metric + for _, label := range []string{"service", "configmap", "secret"} { + Resources.WithLabelValues(label).Add(0) + OrphansDetected.WithLabelValues(label).Add(0) + OrphansRemoved.WithLabelValues(label).Add(0) + } +} diff --git a/pkg/keess/service/service_deletion.go b/pkg/keess/service/service_deletion.go index 9cc033e..d0a621c 100644 --- a/pkg/keess/service/service_deletion.go +++ b/pkg/keess/service/service_deletion.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "keess/pkg/keess" + "keess/pkg/keess/metrics" "time" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -32,6 +33,7 @@ func (s *ServiceSynchronizer) deleteOrphans(ctx context.Context, pollInterval ti err := s.processServiceDeleteOrphan(ctx, service) if err != nil { + metrics.ErrorCount.Inc() s.logger.Error(err) // err message already contains context } @@ -45,10 +47,11 @@ func (s *ServiceSynchronizer) deleteOrphans(ctx context.Context, pollInterval ti return nil } -// proccessServiceDeleteOrphan processes the service for deletion if it is an orphan. // processServiceDeleteOrphan processes the service for deletion if it is an orphan. func (s *ServiceSynchronizer) processServiceDeleteOrphan(ctx context.Context, svc PacService) error { + metrics.OrphansDetected.WithLabelValues("service").Inc() + sourceKubeClient, err := s.getSourceKubeClient(svc) if err != nil { return fmt.Errorf("[Service][processServiceDeleteOrphan] failed to get source kube client: %w", err) @@ -75,6 +78,7 @@ func (s *ServiceSynchronizer) processServiceDeleteOrphan(ctx context.Context, sv if err != nil { return fmt.Errorf("[Service][processServiceDeleteOrphan] failed to delete orphan service: %w", err) } + metrics.OrphansRemoved.WithLabelValues("service").Inc() s.logger.Infof("[Service][processServiceDeleteOrphan] Deleted orphan service %s/%s", svc.Service.Namespace, svc.Service.Name) // NOTE: we decided not to implement managed namespace deletion for now