Added choose endpoint latency. (#1771)

vidhyav · meta-codesync[bot] · commit 6273659aa43d · 2025-11-11T13:29:54.000-08:00
Summary: Pull Request resolved: #1771 As stated above. Also wrote the latency measurement as a decorator. Reviewed By: pzhan9 Differential Revision: D86236036 fbshipit-source-id: 3892413a09cc81327e2ed946fc04937f04fd530b
diff --git a/python/monarch/_src/actor/endpoint.py b/python/monarch/_src/actor/endpoint.py
@@ -57,12 +57,18 @@
     description="Latency of endpoint stream operations per yield in microseconds",
 )
 
+# Histogram for measuring endpoint choose latency
+endpoint_choose_latency_histogram: Histogram = METER.create_histogram(
+    name="endpoint_choose_latency.us",
+    description="Latency of endpoint choose operations in microseconds",
+)
+
 T = TypeVar("T")
 
 
 def _measure_latency(
     coro: Coroutine[Any, Any, T],
-    start_time: int,
+    start_time_ns: int,
     histogram: Histogram,
     method_name: str,
     actor_count: int,
@@ -84,7 +90,7 @@ async def _wrapper() -> T:
         try:
             return await coro
         finally:
-            duration_us = int((time.monotonic_ns() - start_time) / 1_000)
+            duration_us = int((time.monotonic_ns() - start_time_ns) / 1_000)
             histogram.record(
                 duration_us,
                 attributes={
@@ -130,6 +136,33 @@ def _get_method_name(self) -> str:
             return method_specifier.name
         return "unknown"
 
+    def _with_latency_measurement(
+        self, start_time_ns: int, histogram: Histogram, actor_count: int
+    ) -> Any:
+        """
+        Decorator factory to add latency measurement to async functions.
+
+        Args:
+            histogram: The histogram to record metrics to
+            actor_count: Number of actors involved in the operation
+
+        Returns:
+            A decorator that wraps async functions with latency measurement
+        """
+        method_name: str = self._get_method_name()
+
+        def decorator(func: Any) -> Any:
+            @functools.wraps(func)
+            def wrapper(*args: Any, **kwargs: Any) -> Any:
+                coro = func(*args, **kwargs)
+                return _measure_latency(
+                    coro, start_time_ns, histogram, method_name, actor_count
+                )
+
+            return wrapper
+
+        return decorator
+
     @abstractmethod
     def _send(
         self,
@@ -178,10 +211,20 @@ def choose(self, *args: P.args, **kwargs: P.kwargs) -> Future[R]:
         Load balanced RPC-style entrypoint for request/response messaging.
         """
 
-        p, r = self._port(once=True)
+        p, r_port = self._port(once=True)
+        r: "PortReceiver[R]" = r_port
+        start_time: int = time.monotonic_ns()
         # pyre-ignore[6]: ParamSpec kwargs is compatible with Dict[str, Any]
         self._send(args, kwargs, port=p, selection="choose")
-        return r.recv()
+
+        @self._with_latency_measurement(
+            start_time, endpoint_choose_latency_histogram, 1
+        )
+        async def process() -> R:
+            result = await r.recv()
+            return result
+
+        return Future(coro=process())
 
     def call_one(self, *args: P.args, **kwargs: P.kwargs) -> Future[R]:
         p, r_port = self._port(once=True)
@@ -194,32 +237,27 @@ def call_one(self, *args: P.args, **kwargs: P.kwargs) -> Future[R]:
                 f"Can only use 'call_one' on a single Actor but this actor has shape {extent}"
             )
 
-        method_name = self._get_method_name()
-
+        @self._with_latency_measurement(
+            start_time, endpoint_call_one_latency_histogram, 1
+        )
         async def process() -> R:
             result = await r.recv()
             return result
 
-        measured_coro = _measure_latency(
-            process(),
-            start_time,
-            endpoint_call_one_latency_histogram,
-            method_name,
-            1,
-        )
-        return Future(coro=measured_coro)
+        return Future(coro=process())
 
     def call(self, *args: P.args, **kwargs: P.kwargs) -> "Future[ValueMesh[R]]":
         from monarch._src.actor.actor_mesh import RankedPortReceiver, ValueMesh
 
+        start_time: int = time.monotonic_ns()
         p, unranked = self._port()
         r: RankedPortReceiver[R] = unranked.ranked()
-        start_time: int = time.monotonic_ns()
         # pyre-ignore[6]: ParamSpec kwargs is compatible with Dict[str, Any]
         extent: Extent = self._send(args, kwargs, port=p)
 
-        method_name = self._get_method_name()
-
+        @self._with_latency_measurement(
+            start_time, endpoint_call_latency_histogram, extent.nelements
+        )
         async def process() -> "ValueMesh[R]":
             from monarch._rust_bindings.monarch_hyperactor.shape import Shape
             from monarch._src.actor.shape import NDSlice
@@ -234,14 +272,7 @@ async def process() -> "ValueMesh[R]":
             )
             return ValueMesh(call_shape, results)
 
-        measured_coro = _measure_latency(
-            process(),
-            start_time,
-            endpoint_call_latency_histogram,
-            method_name,
-            extent.nelements,
-        )
-        return Future(coro=measured_coro)
+        return Future(coro=process())
 
     def stream(
         self, *args: P.args, **kwargs: P.kwargs
@@ -258,18 +289,18 @@ def stream(
         extent: Extent = self._send(args, kwargs, port=p)
         r: "PortReceiver[R]" = r_port
 
-        method_name: str = self._get_method_name()
+        latency_decorator: Any = self._with_latency_measurement(
+            start_time, endpoint_stream_latency_histogram, extent.nelements
+        )
 
         def _stream() -> Generator[Future[R], None, None]:
             for _ in range(extent.nelements):
-                measured_coro = _measure_latency(
-                    r._recv(),
-                    start_time,
-                    endpoint_stream_latency_histogram,
-                    method_name,
-                    extent.nelements,
-                )
-                yield Future(coro=measured_coro)
+
+                @latency_decorator
+                async def receive() -> R:
+                    return await r._recv()
+
+                yield Future(coro=receive())
 
         return _stream()