Skip to content

Commit e4ee018

Browse files
committed
Unified job management so that all controllers work the same
Also added a common status package. A lot of duplicate statuses that will be looked at later.
1 parent 2f2955f commit e4ee018

File tree

10 files changed

+646
-426
lines changed

10 files changed

+646
-426
lines changed

internal/controller/cluster_controller.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
logf "sigs.k8s.io/controller-runtime/pkg/log"
3434

3535
etosv1alpha1 "github.com/eiffel-community/etos/api/v1alpha1"
36+
"github.com/eiffel-community/etos/internal/controller/status"
3637
"github.com/eiffel-community/etos/internal/etos"
3738
"github.com/eiffel-community/etos/internal/extras"
3839
)
@@ -141,8 +142,8 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
141142

142143
// update will set the status condition and update the status of the ETOS cluster.
143144
// if the update fails due to conflict the reconciliation will requeue after one second.
144-
func (r *ClusterReconciler) update(ctx context.Context, cluster *etosv1alpha1.Cluster, status metav1.ConditionStatus, message string) (ctrl.Result, error) {
145-
if meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{Type: StatusReady, Status: status, Reason: "Ready", Message: message}) {
145+
func (r *ClusterReconciler) update(ctx context.Context, cluster *etosv1alpha1.Cluster, clusterStatus metav1.ConditionStatus, message string) (ctrl.Result, error) {
146+
if meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{Type: status.StatusReady, Status: clusterStatus, Reason: status.ReasonReady, Message: message}) {
146147
if err := r.Status().Update(ctx, cluster); err != nil {
147148
if apierrors.IsConflict(err) {
148149
return ctrl.Result{RequeueAfter: time.Second}, nil

internal/controller/environment_controller.go

Lines changed: 100 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@ import (
2929
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3030
"k8s.io/apimachinery/pkg/runtime"
3131
"k8s.io/apimachinery/pkg/types"
32-
ref "k8s.io/client-go/tools/reference"
3332
ctrl "sigs.k8s.io/controller-runtime"
3433
"sigs.k8s.io/controller-runtime/pkg/client"
3534
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3635
logf "sigs.k8s.io/controller-runtime/pkg/log"
3736

3837
etosv1alpha1 "github.com/eiffel-community/etos/api/v1alpha1"
38+
"github.com/eiffel-community/etos/internal/controller/jobs"
39+
"github.com/eiffel-community/etos/internal/controller/status"
3940
)
4041

4142
const releaseFinalizer = "etos.eiffel-community.github.io/release"
@@ -63,6 +64,7 @@ func (r *EnvironmentReconciler) Reconcile(ctx context.Context, req ctrl.Request)
6364
if err != nil {
6465
return ctrl.Result{}, client.IgnoreNotFound(err)
6566
}
67+
6668
// If the environment is considered 'Completed', it has been released. Check that the object is
6769
// being deleted and contains the finalizer and remove the finalizer.
6870
if environment.Status.CompletionTime != nil {
@@ -73,7 +75,7 @@ func (r *EnvironmentReconciler) Reconcile(ctx context.Context, req ctrl.Request)
7375
if apierrors.IsConflict(err) {
7476
return ctrl.Result{Requeue: true}, nil
7577
}
76-
return ctrl.Result{}, err
78+
return ctrl.Result{}, client.IgnoreNotFound(err)
7779
}
7880
}
7981
}
@@ -92,11 +94,15 @@ func (r *EnvironmentReconciler) Reconcile(ctx context.Context, req ctrl.Request)
9294

9395
// reconcile an environment resource to its desired state.
9496
func (r *EnvironmentReconciler) reconcile(ctx context.Context, environment *etosv1alpha1.Environment) error {
95-
logger := logf.FromContext(ctx)
96-
9797
// Set initial statuses if not set.
98-
if meta.FindStatusCondition(environment.Status.Conditions, StatusActive) == nil {
99-
meta.SetStatusCondition(&environment.Status.Conditions, metav1.Condition{Status: metav1.ConditionTrue, Type: StatusActive, Message: "Actively being used", Reason: "Active"})
98+
if meta.FindStatusCondition(environment.Status.Conditions, status.StatusActive) == nil {
99+
meta.SetStatusCondition(&environment.Status.Conditions,
100+
metav1.Condition{
101+
Status: metav1.ConditionTrue,
102+
Type: status.StatusActive,
103+
Reason: status.ReasonActive,
104+
Message: "Actively being used",
105+
})
100106
return r.Status().Update(ctx, environment)
101107
}
102108
if environment.ObjectMeta.DeletionTimestamp.IsZero() {
@@ -106,120 +112,80 @@ func (r *EnvironmentReconciler) reconcile(ctx context.Context, environment *etos
106112
}
107113
}
108114

109-
// Get active, finished and failed environment releasers.
110-
releasers, err := jobStatus(ctx, r, environment.Namespace, environment.Name, EnvironmentOwnerKey)
111-
if err != nil {
112-
return err
113-
}
114-
115-
environment.Status.EnvironmentReleasers = nil
116-
for _, activeReleaser := range releasers.activeJobs {
117-
jobRef, err := ref.GetReference(r.Scheme, activeReleaser)
118-
if err != nil {
119-
logger.Error(err, "failed to make reference to active environment releaser", "releaser", activeReleaser)
120-
continue
121-
}
122-
environment.Status.EnvironmentReleasers = append(environment.Status.EnvironmentReleasers, *jobRef)
123-
}
124-
if err := r.Status().Update(ctx, environment); err != nil {
125-
return err
126-
}
127-
logger.V(1).Info("environment releaser count", "active", len(releasers.activeJobs), "successful", len(releasers.successfulJobs), "failed", len(releasers.failedJobs))
128-
129115
// TODO: Provider information does not exist in a deterministic way in the Environment resource
130116
// so either we need to find the EnvironmentRequest or the Environment resource needs an update.
131117
// if err := checkProviders(ctx, r, environment.Namespace, environment.Spec.Providers); err != nil {
132118
// return err
133119
// }
134120

135-
if err := r.reconcileReleaser(ctx, releasers, environment); err != nil {
136-
return err
137-
}
121+
conditions := &environment.Status.Conditions
122+
jobManager := jobs.NewJob(r.Client, EnvironmentOwnerKey, environment.GetName(), environment.GetNamespace())
138123

139-
// There is no explicit retry here as it is not necessarily needed. If releasers is not successful
140-
// then the Job will get deleted after a while. When that job is deleted, a reconcile is called for
141-
// and the Environment will try to get released again.
142-
if releasers.successful() {
143-
environmentCondition := meta.FindStatusCondition(environment.Status.Conditions, StatusActive)
144-
environment.Status.CompletionTime = &environmentCondition.LastTransitionTime
145-
return r.Status().Update(ctx, environment)
124+
jobStatus, err := jobManager.Status(ctx)
125+
if err != nil {
126+
return err
146127
}
147-
148-
return nil
149-
}
150-
151-
// reconcileReleaser will check the status of environment releasers, create new ones if necessary.
152-
func (r *EnvironmentReconciler) reconcileReleaser(ctx context.Context, releasers *jobs, environment *etosv1alpha1.Environment) error {
153-
logger := logf.FromContext(ctx)
154-
155-
// Environment releaser failed, setting status.
156-
if releasers.failed() {
157-
releaser := releasers.failedJobs[0] // TODO: We should allow multiple releaser jobs in the future
158-
result, err := terminationLog(ctx, r, releaser, environment.Name)
159-
if err != nil {
160-
result.Description = err.Error()
161-
}
162-
if result.Description == "" {
163-
result.Description = "Failed to release an environment - Unknown error"
164-
}
165-
if meta.SetStatusCondition(&environment.Status.Conditions, metav1.Condition{Type: StatusActive, Status: metav1.ConditionFalse, Reason: "Failed", Message: result.Description}) {
128+
switch jobStatus {
129+
case jobs.StatusFailed:
130+
result := jobManager.Result(ctx)
131+
if meta.SetStatusCondition(conditions,
132+
metav1.Condition{
133+
Type: status.StatusActive,
134+
Status: metav1.ConditionFalse,
135+
Reason: status.ReasonFailed,
136+
Message: result.Description,
137+
}) {
166138
return r.Status().Update(ctx, environment)
167139
}
168-
}
169-
// Environment releaser successful, setting status.
170-
if releasers.successful() {
171-
releaser := releasers.successfulJobs[0] // TODO: We should allow multiple releaser jobs in the future
172-
result, err := terminationLog(ctx, r, releaser, environment.Name)
173-
if err != nil {
174-
result.Description = err.Error()
175-
}
176-
if result.Conclusion == ConclusionFailed {
177-
if meta.SetStatusCondition(&environment.Status.Conditions, metav1.Condition{Type: StatusActive, Status: metav1.ConditionFalse, Reason: "Failed", Message: result.Description}) {
178-
return r.Status().Update(ctx, environment)
140+
case jobs.StatusSuccessful:
141+
result := jobManager.Result(ctx)
142+
var condition metav1.Condition
143+
if result.Conclusion == jobs.ConclusionFailed {
144+
condition = metav1.Condition{
145+
Type: status.StatusActive,
146+
Status: metav1.ConditionFalse,
147+
Reason: status.ReasonFailed,
148+
Message: result.Description,
179149
}
180-
}
181-
if meta.SetStatusCondition(&environment.Status.Conditions, metav1.Condition{Type: StatusActive, Status: metav1.ConditionFalse, Reason: "Released", Message: result.Description}) {
182-
for _, environmentProvider := range releasers.successfulJobs {
183-
if err := r.Delete(ctx, environmentProvider, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil {
184-
if !apierrors.IsNotFound(err) {
185-
return err
186-
}
187-
}
150+
} else {
151+
condition = metav1.Condition{
152+
Type: status.StatusActive,
153+
Status: metav1.ConditionFalse,
154+
Reason: status.ReasonCompleted,
155+
Message: result.Description,
188156
}
189-
return r.Status().Update(ctx, environment)
190157
}
191-
}
192-
// Suite runners active, setting status
193-
if releasers.active() {
194-
if meta.SetStatusCondition(&environment.Status.Conditions, metav1.Condition{Type: StatusActive, Status: metav1.ConditionFalse, Reason: "Releasing", Message: "Environment is being released"}) {
158+
environmentCondition := meta.FindStatusCondition(environment.Status.Conditions, status.StatusActive)
159+
environment.Status.CompletionTime = &environmentCondition.LastTransitionTime
160+
if meta.SetStatusCondition(conditions, condition) {
161+
return errors.Join(r.Status().Update(ctx, environment), jobManager.Delete(ctx))
162+
}
163+
case jobs.StatusActive:
164+
if meta.SetStatusCondition(conditions,
165+
metav1.Condition{
166+
Type: status.StatusActive,
167+
Status: metav1.ConditionFalse,
168+
Reason: status.ReasonRunning,
169+
Message: "Job is running",
170+
}) {
195171
return r.Status().Update(ctx, environment)
196172
}
197-
}
198-
// Environment is being released and no releaser is active, create an environment releaser
199-
if releasers.empty() && !environment.ObjectMeta.DeletionTimestamp.IsZero() {
200-
if controllerutil.ContainsFinalizer(environment, releaseFinalizer) {
201-
logger.Info("Environment is being deleted, release it")
202-
environmentRequest, err := r.environmentRequest(ctx, environment)
203-
if err != nil {
204-
return err
205-
}
206-
clusterName := environment.Labels["etos.eiffel-community.github.io/cluster"]
207-
var cluster *etosv1alpha1.Cluster
208-
if clusterName != "" {
209-
cluster = &etosv1alpha1.Cluster{}
210-
if err := r.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: environment.Namespace}, cluster); err != nil {
211-
logger.Info("Failed to get cluster resource!")
212-
return err
213-
}
214-
}
215-
releaser := r.releaseJob(environment, environmentRequest, cluster)
216-
fmt.Println(releaser)
217-
if err := ctrl.SetControllerReference(environment, releaser, r.Scheme); err != nil {
218-
return err
219-
}
220-
if err := r.Create(ctx, releaser); err != nil {
221-
return err
173+
default:
174+
// Since this is a release job, we don't want to release if we are not deleting.
175+
if environment.GetDeletionTimestamp().IsZero() {
176+
return nil
177+
}
178+
if err := jobManager.Create(ctx, environment, r.releaseJob); err != nil {
179+
if meta.SetStatusCondition(conditions,
180+
metav1.Condition{
181+
Type: status.StatusActive,
182+
Status: metav1.ConditionFalse,
183+
Reason: status.ReasonFailed,
184+
Message: err.Error(),
185+
}) {
186+
return r.Status().Update(ctx, environment)
222187
}
188+
return err
223189
}
224190
}
225191
return nil
@@ -245,27 +211,43 @@ func (r *EnvironmentReconciler) environmentRequest(ctx context.Context, environm
245211
}
246212

247213
// releaseJob is the job definition for an environment releaser.
248-
func (r EnvironmentReconciler) releaseJob(environment *etosv1alpha1.Environment, environmentRequest *etosv1alpha1.EnvironmentRequest, cluster *etosv1alpha1.Cluster) *batchv1.Job {
249-
id := environment.Labels["etos.eiffel-community.github.io/id"]
214+
func (r EnvironmentReconciler) releaseJob(ctx context.Context, obj client.Object) (*batchv1.Job, error) {
215+
logger := logf.FromContext(ctx)
250216
ttl := int32(300)
251217
grace := int64(30)
252218
backoff := int32(0)
253219

254-
clusterName := ""
220+
environment, ok := obj.(*etosv1alpha1.Environment)
221+
if !ok {
222+
return nil, errors.New("object received from job manager is not an Environment")
223+
}
224+
environmentRequest, err := r.environmentRequest(ctx, environment)
225+
if err != nil {
226+
return nil, err
227+
}
228+
clusterName := environment.Labels["etos.eiffel-community.github.io/cluster"]
229+
var cluster *etosv1alpha1.Cluster
230+
if clusterName != "" {
231+
cluster = &etosv1alpha1.Cluster{}
232+
if err := r.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: environment.Namespace}, cluster); err != nil {
233+
logger.Info("Failed to get cluster resource!")
234+
return nil, err
235+
}
236+
}
237+
255238
databaseHost := "etcd-client"
256239
if cluster != nil {
257240
if cluster.Spec.Database.Deploy {
258241
databaseHost = fmt.Sprintf("%s-etcd-client", cluster.Name)
259242
} else {
260243
databaseHost = cluster.Spec.Database.Etcd.Host
261244
}
262-
clusterName = cluster.Name
263245
}
264246

265-
return &batchv1.Job{
247+
jobSpec := &batchv1.Job{
266248
ObjectMeta: metav1.ObjectMeta{
267249
Labels: map[string]string{
268-
"etos.eiffel-community.github.io/id": id,
250+
"etos.eiffel-community.github.io/id": environment.Labels["etos.eiffel-community.github.io/id"],
269251
"etos.eiffel-community.github.io/sub-suite": environment.Name,
270252
"etos.eiffel-community.github.io/cluster": clusterName,
271253
"app.kubernetes.io/name": "environment-releaser",
@@ -281,6 +263,13 @@ func (r EnvironmentReconciler) releaseJob(environment *etosv1alpha1.Environment,
281263
Template: corev1.PodTemplateSpec{
282264
ObjectMeta: metav1.ObjectMeta{
283265
Name: environment.Name,
266+
Labels: map[string]string{
267+
"etos.eiffel-community.github.io/id": environment.Labels["etos.eiffel-community.github.io/id"],
268+
"etos.eiffel-community.github.io/sub-suite": environment.Name,
269+
"etos.eiffel-community.github.io/cluster": clusterName,
270+
"app.kubernetes.io/name": "environment-releaser",
271+
"app.kubernetes.io/part-of": "etos",
272+
},
284273
},
285274
Spec: corev1.PodSpec{
286275
TerminationGracePeriodSeconds: &grace,
@@ -323,6 +312,7 @@ func (r EnvironmentReconciler) releaseJob(environment *etosv1alpha1.Environment,
323312
},
324313
},
325314
}
315+
return jobSpec, ctrl.SetControllerReference(environment, jobSpec, r.Scheme)
326316
}
327317

328318
// registerOwnerIndexForJob will set an index of the jobs that an environment owns.

0 commit comments

Comments
 (0)