diff --git a/pkg/internalregistry/controllers/image_pull_secret_controller.go b/pkg/internalregistry/controllers/image_pull_secret_controller.go index 83c584e3c..6b00204ef 100644 --- a/pkg/internalregistry/controllers/image_pull_secret_controller.go +++ b/pkg/internalregistry/controllers/image_pull_secret_controller.go @@ -7,9 +7,9 @@ import ( "fmt" "reflect" "sync" + "sync/atomic" "time" - "go.uber.org/atomic" "golang.org/x/exp/slices" "gopkg.in/go-jose/go-jose.v2/jwt" authenticationv1 "k8s.io/api/authentication/v1" @@ -52,8 +52,8 @@ func NewImagePullSecretController(kubeclient kubernetes.Interface, secrets infor serviceAccounts: serviceAccounts.Lister(), cacheSyncs: []cache.InformerSynced{secrets.Informer().HasSynced, serviceAccounts.Informer().HasSynced}, queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "bound-token-managed-image-pull-secrets"), - kids: atomic.NewPointer[[]string](nil), - urls: atomic.NewPointer[[]string](nil), + kids: &atomic.Pointer[[]string]{}, + urls: &atomic.Pointer[[]string]{}, kidsC: make(chan []string), urlsC: make(chan []string), } @@ -147,10 +147,10 @@ func (c *imagePullSecretController) sync(ctx context.Context, key string) (error return c.cleanupOrphanedManagedImagePullSecret(ctx, secret), 0 } - refreshNow, refreshAt := c.isSecretRefreshNeeded(secret, *urls, *kids) - if !refreshNow { + now := time.Now() - // the annotation is missing or incorrect, fix it + if refreshNow, refreshAt := isSecretRefreshNeeded(secret, *urls, *kids, now); !refreshNow { + // if the annotation is missing or incorrect, fix it if secret.Annotations[InternalRegistryAuthTokenTypeAnnotation] != AuthTokenTypeBound { patch, err := applycorev1.ExtractSecret(secret, imagePullSecretControllerFieldManager) if err != nil { @@ -167,9 +167,9 @@ func (c *imagePullSecretController) sync(ctx context.Context, key string) (error } // token is not expired and not expiring soon, requeue when expected to need a refresh - refreshAfter := refreshAt.Sub(time.Now()) - klog.V(4).InfoS(key, "refreshAfter", refreshAfter) - return nil, refreshAfter + requeueAfter := refreshAt.Sub(now) + klog.V(4).InfoS(key, "requeueAfter", requeueAfter, "refreshed", false) + return nil, requeueAfter } var serviceAccountName = serviceAccountNameForManagedSecret(secret) @@ -202,13 +202,14 @@ func (c *imagePullSecretController) sync(ctx context.Context, key string) (error // the service account controller is responsible for re-creating the initial secret. WithUID(secret.UID) _, err = c.client.CoreV1().Secrets(secret.Namespace).Apply(ctx, patch, metav1.ApplyOptions{Force: true, FieldManager: imagePullSecretControllerFieldManager}) - if err != nil { return err, 0 } - refreshAfter := refreshAt.Sub(time.Now()) - return nil, refreshAfter + // assume `now` as the value of nbf as to not have to parse the token, it should be close enough + requeueAfter := refreshThresholdTime(now, tokenRequest.Status.ExpirationTimestamp.Time).Sub(now) + klog.V(4).InfoS(key, "requeueAfter", requeueAfter, "refreshed", true) + return nil, requeueAfter } func (c *imagePullSecretController) cleanupOrphanedManagedImagePullSecret(ctx context.Context, secret *corev1.Secret) error { @@ -267,31 +268,31 @@ func dockerConfig(token string, urls []string) any { return auth } -func (c *imagePullSecretController) isSecretRefreshNeeded(secret *corev1.Secret, urls, kids []string) (bool, time.Time) { - valid, refreshAt := c.registryAuthenticationFileValid(secret, urls, kids) +func isSecretRefreshNeeded(secret *corev1.Secret, urls, kids []string, now time.Time) (bool, *time.Time) { + valid, refreshAt := registryAuthenticationFileValid(secret, urls, kids, now) return !valid, refreshAt } -func (c *imagePullSecretController) registryAuthenticationFileValid(imagePullSecret *corev1.Secret, urls, kids []string) (bool, time.Time) { +func registryAuthenticationFileValid(imagePullSecret *corev1.Secret, urls, kids []string, now time.Time) (bool, *time.Time) { if imagePullSecret.Type != corev1.SecretTypeDockercfg { klog.V(2).InfoS("Internal registry pull secret type is incorrect.", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "type", imagePullSecret.Type) - return false, time.Now() + return false, nil } // registry authentication file must exist if _, ok := imagePullSecret.Data[corev1.DockerConfigKey]; !ok { klog.V(2).InfoS("Internal registry pull secret does not contain the expected key", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "keys", reflect.ValueOf(imagePullSecret.Data).MapKeys()) - return false, time.Now() + return false, nil } // parse the registry authentication file auth := credentialprovider.DockerConfig{} if err := json.Unmarshal(imagePullSecret.Data[corev1.DockerConfigKey], &auth); err != nil { klog.V(2).InfoS("Internal registry pull secret auth data cannot be parsed", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name) - return false, time.Now() + return false, nil } // there should be an entries for each internal registry url if len(auth) != len(urls) { klog.V(2).InfoS("Internal registry pull secret auth data does not contain the correct number of entries", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "expected", len(urls), "actual", len(auth)) - return false, time.Now() + return false, nil } matches := 0 CheckUrl: @@ -305,7 +306,7 @@ CheckUrl: } if matches != len(urls) { klog.V(2).InfoS("Internal registry pull secret needs to be refreshed", "reason", "auth data does not contain the correct entries", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "expected", urls, "actual", reflect.ValueOf(auth).MapKeys()) - return false, time.Now() + return false, nil } // track the earliest refresh time of the token (they should all be the same, but check anyway) @@ -316,10 +317,10 @@ CheckUrl: token, err := jwt.ParseSigned(entry.Password) if err != nil { klog.V(2).InfoS("Internal registry pull secret needs to be refreshed", "reason", "auth token cannot be parsed", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "error", err) - return false, time.Now() + return false, nil } - // was token created with previoud token signing cert? + // was token created with previous token signing cert? var validKeyID bool for _, kid := range kids { if token.Headers[0].KeyID == kid { @@ -329,7 +330,7 @@ CheckUrl: } if !validKeyID { klog.V(2).InfoS("Internal registry pull secret needs to be refreshed", "reason", "auth token was not signed by a current signer", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "error", err) - return false, time.Now() + return false, nil } var claims jwt.Claims @@ -337,21 +338,31 @@ CheckUrl: err = token.UnsafeClaimsWithoutVerification(&claims) if err != nil { klog.V(2).InfoS("Internal registry pull secret needs to be refreshed", "reason", "auth token claim cannot be parsed", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "error", err) - return false, time.Now() + return false, nil } - // if token is expired or will only be valid less than 40% of its remaining duration we want to trigger a new token request - refreshTime := claims.Expiry.Time().Add(time.Duration(-int64(float64(time.Now().Sub(claims.Expiry.Time())) * 0.4))) - klog.V(4).InfoS("Token expiration check.", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "expirtyTime", claims.Expiry.Time(), "refreshTime", refreshTime) - if time.Now().After(refreshTime) { - klog.V(2).InfoS("Internal registry pull secret needs to be refreshed", "reason", "auth token needs to be refreshed", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "expirtyTime", claims.Expiry.Time(), "refreshTime", refreshTime) - return false, time.Now() + // if token is expired or less than 40% of its valid duration is left, we want to trigger a new token request + refreshTime := refreshThresholdTime(claims.NotBefore.Time(), claims.Expiry.Time()) + klog.V(4).InfoS("Token expiration check.", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "exp", claims.Expiry.Time(), "refreshTime", refreshTime) + if now.After(refreshTime) { + klog.V(2).InfoS("Internal registry pull secret needs to be refreshed", "reason", "auth token needs to be refreshed", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name, "url", url, "exp", claims.Expiry.Time(), "refreshTime", refreshTime) + return false, nil } if requeueAt.IsZero() || requeueAt.After(refreshTime) { requeueAt = refreshTime } } klog.V(4).InfoS("Internal registry pull secret does not need to be refreshed.", "ns", imagePullSecret.Namespace, "name", imagePullSecret.Name) - return true, requeueAt + return true, &requeueAt +} + +func refreshThresholdTime(nbf, exp time.Time) time.Time { + // calculate the time at which only 40% of the valid duration would be left + validDuration := exp.Sub(nbf) + if validDuration < 0 { + // this should not happen, but let's not get stuck if it ever does + return time.Time{} + } + return exp.Add(-time.Duration(int64(float64(validDuration) * 0.4))) } func (c *imagePullSecretController) Run(ctx context.Context, workers int) { diff --git a/pkg/internalregistry/controllers/image_pull_secret_controller_test.go b/pkg/internalregistry/controllers/image_pull_secret_controller_test.go new file mode 100644 index 000000000..073e15471 --- /dev/null +++ b/pkg/internalregistry/controllers/image_pull_secret_controller_test.go @@ -0,0 +1,267 @@ +package controllers + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "reflect" + "sync/atomic" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + v1 "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" +) + +func TestRefreshThresholdTime(t *testing.T) { + ttime := func(d time.Duration) time.Time { + return time.Time{}.Add(d) + } + testCases := []struct { + name string + nbf time.Time + exp time.Time + want time.Time + }{ + {name: "happya", nbf: ttime(0), exp: ttime(10000 * time.Hour), want: ttime(6000 * time.Hour)}, + {name: "happyb", nbf: ttime(0), exp: ttime(10000 * time.Minute), want: ttime(6000 * time.Minute)}, + {name: "happyc", nbf: ttime(0), exp: ttime(10000 * time.Second), want: ttime(6000 * time.Second)}, + {name: "invalid", nbf: ttime(10), exp: ttime(5), want: ttime(0)}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if got := refreshThresholdTime(tc.nbf, tc.exp); !reflect.DeepEqual(got, tc.want) { + t.Errorf("refreshThresholdTime() = %v, want %v", got, tc.want) + } + }) + } +} + +func TestIsSecretRefreshNeededA(t *testing.T) { + type fields struct { + client kubernetes.Interface + secrets v1.SecretLister + serviceAccounts v1.ServiceAccountLister + cacheSyncs []cache.InformerSynced + queue workqueue.RateLimitingInterface + urls *atomic.Pointer[[]string] + urlsC chan []string + kids *atomic.Pointer[[]string] + kidsC chan []string + } + type args struct { + secret *corev1.Secret + urls []string + kids []string + now time.Time + } + tests := []struct { + name string + fields fields + args args + want bool + want1 *time.Time + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, got1 := isSecretRefreshNeeded(tt.args.secret, tt.args.urls, tt.args.kids, tt.args.now) + if got != tt.want { + t.Errorf("isSecretRefreshNeeded() got = %v, want %v", got, tt.want) + } + if !reflect.DeepEqual(got1, tt.want1) { + t.Errorf("isSecretRefreshNeeded() got1 = %v, want %v", got1, tt.want1) + } + }) + } +} + +func secret(opts ...func(*corev1.Secret)) *corev1.Secret { + s := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + }, + Type: corev1.SecretTypeDockercfg, + } + for _, f := range opts { + f(s) + } + return s +} + +func withType(t corev1.SecretType) func(*corev1.Secret) { + return func(s *corev1.Secret) { + s.Type = t + } +} + +func withDockerConfig(opts ...func(map[string]map[string]string)) func(*corev1.Secret) { + cfg := map[string]map[string]string{} + return func(s *corev1.Secret) { + for _, opt := range opts { + opt(cfg) + } + data, err := json.Marshal(cfg) + if err != nil { + panic(fmt.Sprintf("failed to marshal docker config: %v", err)) + } + fmt.Println(string(data)) + if s.Data == nil { + s.Data = make(map[string][]byte) + } + s.Data[corev1.DockerConfigKey] = data + } +} + +func withCredentials(url, auth string) func(map[string]map[string]string) { + return func(cfg map[string]map[string]string) { + cfg[url] = map[string]string{"auth": auth} + } +} + +func tokenAuth(kid string, nbf, exp time.Time) string { + h, err := json.Marshal(map[string]interface{}{"alg": "none", "kid": kid}) + if err != nil { + panic(err) + } + p, err := json.Marshal(map[string]interface{}{"nbf": nbf.Unix(), "exp": exp.Unix()}) + if err != nil { + panic(err) + } + t := fmt.Sprintf("%s.%s.", base64.RawURLEncoding.EncodeToString(h), base64.RawURLEncoding.EncodeToString(p)) + return base64.StdEncoding.EncodeToString([]byte(":" + t)) +} + +func TestIsSecretRefreshNeeded(t *testing.T) { + now := time.Now().Round(time.Second) + ts := func(d time.Duration) time.Time { + return now.Add(d) + } + tsPtr := func(d time.Duration) *time.Time { + p := ts(d) + return &p + } + tests := []struct { + name string + secret *corev1.Secret + urls []string + kids []string + wantRefreshNow bool + wantRefreshAt *time.Time + }{ + { + name: "wrong secret type", + urls: []string{"registry.internal"}, + kids: []string{"kid1"}, + secret: secret( + withType(corev1.SecretTypeDockerConfigJson), + withDockerConfig( + withCredentials("registry.internal", tokenAuth("kid1", ts(-5*time.Minute), ts(30*time.Minute))), + ), + ), + wantRefreshNow: true, + }, + { + name: "missing docker config key", + urls: []string{"url1"}, + kids: []string{"kid1"}, + secret: secret(), + wantRefreshNow: true, + }, + { + name: "malformed docker config key value", + urls: []string{"url1"}, + kids: []string{"kid1"}, + secret: secret(func(s *corev1.Secret) { + s.Data = map[string][]byte{corev1.DockerConfigKey: []byte("not.a.valid.dockercfg")} + }), + wantRefreshNow: true, + }, + { + name: "incorrect number of entries", + urls: []string{"url1", "url2"}, + kids: []string{"kid1"}, + secret: secret( + withDockerConfig( + withCredentials("url1", tokenAuth("kid1", ts(-5*time.Minute), ts(30*time.Minute))), + )), + wantRefreshNow: true, + }, + { + name: "incorrect list of entries", + urls: []string{"url1", "url2"}, + kids: []string{"kid1"}, + secret: secret( + withDockerConfig( + withCredentials("url1", tokenAuth("kid1", ts(-5*time.Minute), ts(30*time.Minute))), + withCredentials("url3", tokenAuth("kid1", ts(-5*time.Minute), ts(30*time.Minute))), + )), + wantRefreshNow: true, + }, + { + name: "malformed token", + urls: []string{"url1"}, + kids: []string{"kid1"}, + secret: secret( + withDockerConfig( + withCredentials("url1", base64.StdEncoding.EncodeToString([]byte(":not.a.valid.token"))), + ), + ), + wantRefreshNow: true, + }, + { + name: "token signed with wrong kid", + urls: []string{"url1"}, + kids: []string{"kid1"}, // expected kid is "kid1" + secret: secret( + withDockerConfig( + withCredentials("url1", tokenAuth("kid2", ts(-5*time.Minute), ts(30*time.Minute))), + ), + ), + wantRefreshNow: true, + }, + { + name: "valid secret, not near expiry", + urls: []string{"url1"}, + kids: []string{"kid1"}, + secret: secret( + withDockerConfig( + withCredentials("url1", tokenAuth("kid1", ts(-5*time.Minute), ts(30*time.Minute))), + ), + ), + wantRefreshNow: false, + wantRefreshAt: tsPtr((-5 + 21) * time.Minute), + }, + { + name: "token near expiry triggers refresh", + urls: []string{"url1"}, + kids: []string{"kid1"}, + secret: secret( + withDockerConfig( + withCredentials("url1", tokenAuth("kid1", ts(-30*time.Minute), ts(5*time.Minute))), + ), + ), + wantRefreshNow: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + refreshNow, refreshAt := isSecretRefreshNeeded(tc.secret, tc.urls, tc.kids, now) + if refreshNow != tc.wantRefreshNow { + t.Errorf("expected refreshNow=%v, got %v", tc.wantRefreshNow, refreshNow) + } + if refreshAt != tc.wantRefreshAt { + if refreshAt == nil || tc.wantRefreshAt == nil || refreshAt.Compare(*tc.wantRefreshAt) != 0 { + t.Errorf("expected refreshAt=%v, got %v", tc.wantRefreshAt, refreshAt) + } + } + }) + } +}