26
26
TASK_METRICS_HEARTBEAT_LOCK ,
27
27
TASK_WAKEUP_UNBLOCK ,
28
28
TASK_WAKEUP_HANDLE ,
29
+ TASK_PUBSUB ,
29
30
)
30
31
from pulpcore .metrics import init_otel_meter
31
32
from pulpcore .app .apps import pulp_plugin_configs
32
33
from pulpcore .app .models import Worker , Task , ApiAppStatus , ContentAppStatus
33
34
from pulpcore .app .util import PGAdvisoryLock , get_domain
34
35
from pulpcore .exceptions import AdvisoryLockError
35
36
37
+ from pulpcore .tasking import pubsub
36
38
from pulpcore .tasking .storage import WorkerDirectory
37
39
from pulpcore .tasking ._util import (
38
40
delete_incomplete_resources ,
@@ -72,7 +74,6 @@ def __init__(self, auxiliary=False):
72
74
self .heartbeat_period = timedelta (seconds = settings .WORKER_TTL / 3 )
73
75
self .last_metric_heartbeat = timezone .now ()
74
76
self .versions = {app .label : app .version for app in pulp_plugin_configs ()}
75
- self .cursor = connection .cursor ()
76
77
self .worker = self .handle_worker_heartbeat ()
77
78
# This defaults to immediate task cancellation.
78
79
# It will be set into the future on moderately graceful worker shutdown,
@@ -81,6 +82,9 @@ def __init__(self, auxiliary=False):
81
82
self .worker_cleanup_countdown = random .randint (
82
83
int (WORKER_CLEANUP_INTERVAL / 10 ), WORKER_CLEANUP_INTERVAL
83
84
)
85
+ # Pubsub handling
86
+ self .pubsub_client = pubsub .PostgresPubSub (connection )
87
+ self .pubsub_channel_callback = {}
84
88
85
89
# Add a file descriptor to trigger select on signals
86
90
self .sentinel , sentinel_w = os .pipe ()
@@ -127,25 +131,6 @@ def _signal_handler(self, thesignal, frame):
127
131
)
128
132
self .shutdown_requested = True
129
133
130
- def _pg_notify_handler (self , notification ):
131
- if notification .channel == "pulp_worker_wakeup" :
132
- if notification .payload == TASK_WAKEUP_UNBLOCK :
133
- # Auxiliary workers don't do this.
134
- self .wakeup_unblock = not self .auxiliary
135
- elif notification .payload == TASK_WAKEUP_HANDLE :
136
- self .wakeup_handle = True
137
- else :
138
- _logger .warn ("Unknown wakeup call recieved. Reason: '%s'" , notification .payload )
139
- # We cannot be sure so assume everything happened.
140
- self .wakeup_unblock = not self .auxiliary
141
- self .wakeup_handle = True
142
-
143
- elif notification .channel == "pulp_worker_metrics_heartbeat" :
144
- self .last_metric_heartbeat = datetime .fromisoformat (notification .payload )
145
- elif self .task and notification .channel == "pulp_worker_cancel" :
146
- if notification .payload == str (self .task .pk ):
147
- self .cancel_task = True
148
-
149
134
def handle_worker_heartbeat (self ):
150
135
"""
151
136
Create or update worker heartbeat records.
@@ -205,9 +190,6 @@ def beat(self):
205
190
# to be able to report on a congested tasking system to produce reliable results.
206
191
self .record_unblocked_waiting_tasks_metric ()
207
192
208
- def notify_workers (self , reason = "unknown" ):
209
- self .cursor .execute ("SELECT pg_notify('pulp_worker_wakeup', %s)" , (reason ,))
210
-
211
193
def cancel_abandoned_task (self , task , final_state , reason = None ):
212
194
"""Cancel and clean up an abandoned task.
213
195
@@ -240,7 +222,7 @@ def cancel_abandoned_task(self, task, final_state, reason=None):
240
222
delete_incomplete_resources (task )
241
223
task .set_canceled (final_state = final_state , reason = reason )
242
224
if task .reserved_resources_record :
243
- self .notify_workers ( TASK_WAKEUP_UNBLOCK )
225
+ self .pubsub_client . wakeup_worker ( reason = TASK_WAKEUP_UNBLOCK )
244
226
return True
245
227
246
228
def is_compatible (self , task ):
@@ -280,7 +262,7 @@ def unblock_tasks(self):
280
262
self .wakeup_unblock_tasks = False
281
263
with contextlib .suppress (AdvisoryLockError ), PGAdvisoryLock (TASK_UNBLOCKING_LOCK ):
282
264
if count := self ._unblock_tasks ():
283
- self .notify_workers (TASK_WAKEUP_HANDLE )
265
+ self .pubsub_client . wakeup_worker (TASK_WAKEUP_HANDLE )
284
266
return count
285
267
286
268
def _unblock_tasks (self ):
@@ -405,11 +387,11 @@ def sleep(self):
405
387
_logger .debug (_ ("Worker %s entering sleep state." ), self .name )
406
388
while not self .shutdown_requested and not self .wakeup_handle :
407
389
r , w , x = select .select (
408
- [self .sentinel , connection . connection ], [], [], self .heartbeat_period .seconds
390
+ [self .sentinel , self . pubsub_client ], [], [], self .heartbeat_period .seconds
409
391
)
410
392
self .beat ()
411
- if connection . connection in r :
412
- connection . connection . execute ( "SELECT 1" )
393
+ if self . pubsub_client in r :
394
+ self . pubsub_handle_messages ( self . pubsub_client . fetch () )
413
395
if self .wakeup_unblock :
414
396
self .unblock_tasks ()
415
397
if self .sentinel in r :
@@ -447,14 +429,14 @@ def supervise_task(self, task):
447
429
os .kill (task_process .pid , signal .SIGUSR1 )
448
430
449
431
r , w , x = select .select (
450
- [self .sentinel , connection . connection , task_process .sentinel ],
432
+ [self .sentinel , self . pubsub_client , task_process .sentinel ],
451
433
[],
452
434
[],
453
435
self .heartbeat_period .seconds ,
454
436
)
455
437
self .beat ()
456
- if connection . connection in r :
457
- connection . connection . execute ( "SELECT 1" )
438
+ if self . pubsub_client in r :
439
+ self . pubsub_handle_messages ( self . pubsub_client . fetch () )
458
440
if self .cancel_task :
459
441
_logger .info (
460
442
_ ("Received signal to cancel current task %s in domain: %s." ),
@@ -506,7 +488,7 @@ def supervise_task(self, task):
506
488
if cancel_state :
507
489
self .cancel_abandoned_task (task , cancel_state , cancel_reason )
508
490
if task .reserved_resources_record :
509
- self .notify_workers ( TASK_WAKEUP_UNBLOCK )
491
+ self .pubsub_client . wakeup_worker ( reason = TASK_WAKEUP_UNBLOCK )
510
492
self .task = None
511
493
512
494
def handle_unblocked_tasks (self ):
@@ -559,26 +541,60 @@ def _record_unblocked_waiting_tasks_metric(self):
559
541
unblocked_tasks_stats ["longest_unblocked_waiting_time" ].seconds
560
542
)
561
543
562
- self .cursor .execute (f"NOTIFY pulp_worker_metrics_heartbeat, '{ str (now )} '" )
544
+ self .pubsub_client .record_worker_metrics (now )
545
+
546
+ def pubsub_handle_messages (self , messages : pubsub .PubsubMessage ):
547
+ for message in messages :
548
+ callback = self .pubsub_channel_callback [message .channel ]
549
+ callback (message .payload )
550
+
551
+ def pubsub_setup (self ):
552
+ def cancellation_callback (message ):
553
+ if self .task and message == str (self .task .pk ):
554
+ self .cancel_task = True
555
+
556
+ def wakeup_callback (message ):
557
+ if message == TASK_WAKEUP_UNBLOCK :
558
+ # Auxiliary workers don't do this.
559
+ self .wakeup_unblock = not self .auxiliary
560
+ elif message == TASK_WAKEUP_HANDLE :
561
+ self .wakeup_handle = True
562
+ else :
563
+ _logger .warn ("Unknown wakeup call recieved. Reason: '%s'" , message )
564
+ # We cannot be sure so assume everything happened.
565
+ self .wakeup_unblock = not self .auxiliary
566
+ self .wakeup_handle = True
567
+
568
+ def metric_callback (message ):
569
+ self .last_metric_heartbeat = datetime .fromisoformat (message )
570
+
571
+ self .pubsub_client .subscribe (TASK_PUBSUB .WAKEUP_WORKER )
572
+ self .pubsub_channel_callback [TASK_PUBSUB .WAKEUP_WORKER ] = wakeup_callback
573
+ self .pubsub_client .subscribe (TASK_PUBSUB .CANCEL_TASK )
574
+ self .pubsub_channel_callback [TASK_PUBSUB .CANCEL_TASK ] = cancellation_callback
575
+ self .pubsub_client .subscribe (TASK_PUBSUB .WORKER_METRICS )
576
+ self .pubsub_channel_callback [TASK_PUBSUB .WORKER_METRICS ] = metric_callback
577
+
578
+ def pubsub_teardown (self ):
579
+ self .pubsub_client .unsubscribe (TASK_PUBSUB .WAKEUP_WORKER )
580
+ self .pubsub_client .unsubscribe (TASK_PUBSUB .CANCEL_TASK )
581
+ self .pubsub_client .unsubscribe (TASK_PUBSUB .WORKER_METRICS )
563
582
564
583
def run (self , burst = False ):
565
584
with WorkerDirectory (self .name ):
566
585
signal .signal (signal .SIGINT , self ._signal_handler )
567
586
signal .signal (signal .SIGTERM , self ._signal_handler )
568
587
signal .signal (signal .SIGHUP , self ._signal_handler )
569
- # Subscribe to pgsql channels
570
- connection .connection .add_notify_handler (self ._pg_notify_handler )
571
- self .cursor .execute ("LISTEN pulp_worker_cancel" )
572
- self .cursor .execute ("LISTEN pulp_worker_metrics_heartbeat" )
588
+ self .pubsub_setup ()
573
589
if burst :
590
+ self .pubsub_client .unsubscribe (self .pubsub_client .WORKER_WAKEUP )
574
591
if not self .auxiliary :
575
592
# Attempt to flush the task queue completely.
576
593
# Stop iteration if no new tasks were found to unblock.
577
594
while self .unblock_tasks ():
578
595
self .handle_unblocked_tasks ()
579
596
self .handle_unblocked_tasks ()
580
597
else :
581
- self .cursor .execute ("LISTEN pulp_worker_wakeup" )
582
598
while not self .shutdown_requested :
583
599
# do work
584
600
if self .shutdown_requested :
@@ -588,7 +604,5 @@ def run(self, burst=False):
588
604
break
589
605
# rest until notified to wakeup
590
606
self .sleep ()
591
- self .cursor .execute ("UNLISTEN pulp_worker_wakeup" )
592
- self .cursor .execute ("UNLISTEN pulp_worker_metrics_heartbeat" )
593
- self .cursor .execute ("UNLISTEN pulp_worker_cancel" )
607
+ self .pubsub_teardown ()
594
608
self .shutdown ()
0 commit comments