13
13
import inspect
14
14
import itertools
15
15
import logging
16
- import random
17
16
from abc import abstractproperty
18
17
19
18
from dataclasses import dataclass
48
47
PythonMessage ,
49
48
PythonMessageKind ,
50
49
)
51
- from monarch ._rust_bindings .monarch_hyperactor .actor_mesh import (
52
- PythonActorMesh ,
53
- PythonActorMeshImpl ,
54
- )
50
+ from monarch ._rust_bindings .monarch_hyperactor .actor_mesh import PythonActorMesh
55
51
from monarch ._rust_bindings .monarch_hyperactor .context import Instance as HyInstance
56
52
from monarch ._rust_bindings .monarch_hyperactor .mailbox import (
57
53
Mailbox ,
58
- OncePortReceiver as HyOncePortReceiver , # noqa: F401
59
54
OncePortRef ,
60
- PortReceiver as HyPortReceiver , # noqa: F401
61
55
PortRef ,
62
56
UndeliverableMessageEnvelope ,
63
57
)
71
65
Region ,
72
66
Shape ,
73
67
)
74
- from monarch ._rust_bindings .monarch_hyperactor .supervision import SupervisionError
75
-
76
68
from monarch ._rust_bindings .monarch_hyperactor .value_mesh import (
77
69
ValueMesh as HyValueMesh ,
78
70
)
@@ -251,10 +243,6 @@ def set(debug_context: "DebugContext") -> None:
251
243
R = TypeVar ("R" )
252
244
A = TypeVar ("A" )
253
245
254
- # keep this load balancing deterministic, but
255
- # equally distributed.
256
- _load_balancing_seed = random .Random (4 )
257
-
258
246
259
247
class _SingletonActorAdapator :
260
248
def __init__ (self , inner : ActorId , region : Optional [Region ] = None ) -> None :
@@ -287,159 +275,6 @@ async def empty():
287
275
return PythonTask .from_coroutine (empty ())
288
276
289
277
290
- # standin class for whatever is the serializable python object we use
291
- # to name an actor mesh. Hacked up today because ActorMesh
292
- # isn't plumbed to non-clients
293
- class _ActorMeshRefImpl :
294
- def __init__ (
295
- self ,
296
- mailbox : Mailbox ,
297
- hy_actor_mesh : Optional [PythonActorMeshImpl ],
298
- proc_mesh : "Optional[ProcMesh]" ,
299
- shape : Shape ,
300
- actor_ids : List [ActorId ],
301
- ) -> None :
302
- self ._mailbox = mailbox
303
- self ._actor_mesh = hy_actor_mesh
304
- # actor meshes do not have a way to look this up at the moment,
305
- # so we fake it here
306
- self ._proc_mesh = proc_mesh
307
- self ._shape = shape
308
- self ._please_replace_me_actor_ids = actor_ids
309
-
310
- @staticmethod
311
- def from_hyperactor_mesh (
312
- mailbox : Mailbox ,
313
- shape : Shape ,
314
- hy_actor_mesh : PythonActorMeshImpl ,
315
- proc_mesh : "ProcMesh" ,
316
- ) -> "_ActorMeshRefImpl" :
317
- return _ActorMeshRefImpl (
318
- mailbox ,
319
- hy_actor_mesh ,
320
- proc_mesh ,
321
- shape ,
322
- [cast (ActorId , hy_actor_mesh .get (i )) for i in range (len (shape ))],
323
- )
324
-
325
- def __getstate__ (
326
- self ,
327
- ) -> Tuple [Shape , List [ActorId ], Mailbox ]:
328
- return self ._shape , self ._please_replace_me_actor_ids , self ._mailbox
329
-
330
- def __setstate__ (
331
- self ,
332
- state : Tuple [Shape , List [ActorId ], Mailbox ],
333
- ) -> None :
334
- self ._actor_mesh = None
335
- self ._shape , self ._please_replace_me_actor_ids , self ._mailbox = state
336
-
337
- def _check_state (self ) -> None :
338
- # This is temporary until we have real cast integration here. We need to actively check
339
- # supervision error here is because all communication is done through direct mailbox sending
340
- # and not through comm actor casting.
341
- # TODO: remove this when casting integration is done.
342
- if self ._actor_mesh is not None :
343
- if self ._actor_mesh .stopped :
344
- raise SupervisionError (
345
- "actor mesh is unhealthy with reason: actor mesh is stopped due to proc mesh shutdown. "
346
- "`PythonActorMesh` has already been stopped."
347
- )
348
-
349
- event = self ._actor_mesh .get_supervision_event ()
350
- if event is not None :
351
- raise SupervisionError (f"actor mesh is unhealthy with reason: { event } " )
352
-
353
- def cast (
354
- self ,
355
- message : PythonMessage ,
356
- selection : str ,
357
- instance : HyInstance ,
358
- ) -> None :
359
- self ._check_state ()
360
-
361
- # TODO: use the actual actor mesh when available. We cannot currently use it
362
- # directly because we risk bifurcating the message delivery paths from the same
363
- # client, since slicing the mesh will produce a reference, which calls actors
364
- # directly. The reason these paths are bifurcated is that actor meshes will
365
- # use multicasting, while direct actor comms do not. Separately we need to decide
366
- # whether actor meshes are ordered with actor references.
367
- #
368
- # The fix is to provide a first-class reference into Python, and always call "cast"
369
- # on it, including for load balanced requests.
370
- if selection == "choose" :
371
- idx = _load_balancing_seed .randrange (len (self ._shape ))
372
- actor_rank = self ._shape .ndslice [idx ]
373
- self ._mailbox .post (self ._please_replace_me_actor_ids [actor_rank ], message )
374
- elif selection == "all" :
375
- # replace me with actual remote actor mesh
376
- call_shape = Shape (
377
- self ._shape .labels , NDSlice .new_row_major (self ._shape .ndslice .sizes )
378
- )
379
- for i , rank in enumerate (self ._shape .ranks ()):
380
- self ._mailbox .post_cast (
381
- self ._please_replace_me_actor_ids [rank ],
382
- i ,
383
- call_shape ,
384
- message ,
385
- )
386
- elif isinstance (selection , int ):
387
- try :
388
- self ._mailbox .post (
389
- self ._please_replace_me_actor_ids [selection ], message
390
- )
391
- except IndexError :
392
- raise IndexError (
393
- f"Tried to send to an out-of-range rank { selection } : "
394
- f"mesh has { len (self ._please_replace_me_actor_ids )} elements."
395
- )
396
- else :
397
- raise ValueError (f"invalid selection: { selection } " )
398
-
399
- def __len__ (self ) -> int :
400
- return len (self ._shape )
401
-
402
- @property
403
- def _name_pid (self ):
404
- actor_id0 = self ._please_replace_me_actor_ids [0 ]
405
- return actor_id0 .actor_name , actor_id0 .pid
406
-
407
- @property
408
- def shape (self ) -> Shape :
409
- return self ._shape
410
-
411
- @property
412
- def proc_mesh (self ) -> Optional ["ProcMesh" ]:
413
- return self ._proc_mesh
414
-
415
- def new_with_region (self , region : Region ) -> "_ActorMeshRefImpl" :
416
- return _ActorMeshRefImpl (
417
- self ._mailbox ,
418
- None ,
419
- None ,
420
- region .as_shape (),
421
- self ._please_replace_me_actor_ids ,
422
- )
423
-
424
- def supervision_event (self ) -> "Optional[Shared[Exception]]" :
425
- if self ._actor_mesh is None :
426
- return None
427
- return self ._actor_mesh .supervision_event ()
428
-
429
- def stop (self ) -> PythonTask [None ]:
430
- async def task ():
431
- if self ._actor_mesh is not None :
432
- self ._actor_mesh .stop ()
433
-
434
- return PythonTask .from_coroutine (task ())
435
-
436
- def initialized (self ) -> PythonTask [None ]:
437
- async def task ():
438
- pass
439
-
440
- return PythonTask .from_coroutine (task ())
441
-
442
-
443
278
class ActorEndpoint (Endpoint [P , R ]):
444
279
def __init__ (
445
280
self ,
@@ -1148,7 +983,7 @@ def _endpoint(
1148
983
def _create (
1149
984
cls ,
1150
985
Class : Type [T ],
1151
- actor_mesh : "PythonActorMesh | PythonActorMeshImpl " ,
986
+ actor_mesh : "PythonActorMesh" ,
1152
987
mailbox : Mailbox ,
1153
988
shape : Shape ,
1154
989
proc_mesh : "ProcMesh" ,
@@ -1158,11 +993,6 @@ def _create(
1158
993
* args : Any ,
1159
994
** kwargs : Any ,
1160
995
) -> "ActorMesh[T]" :
1161
- if isinstance (actor_mesh , PythonActorMeshImpl ):
1162
- actor_mesh = _ActorMeshRefImpl .from_hyperactor_mesh (
1163
- mailbox , shape , actor_mesh , proc_mesh
1164
- )
1165
-
1166
996
mesh = cls (Class , actor_mesh , shape , proc_mesh )
1167
997
1168
998
async def null_func (* _args : Iterable [Any ], ** _kwargs : Dict [str , Any ]) -> None :
0 commit comments