Skip to content

Commit 3c0c5a4

Browse files
Hiren Patelfacebook-github-bot
authored andcommitted
Adding Actor Id to Exceptions in Monarch Tracing Scuba (#1263)
Summary: Pull Request resolved: #1263 The Actor Id column used to be null for Exceptions: {F1982094709} https://fburl.com/scuba/monarch_tracing/13hx70aj Actor Id would show up for spans: {F1982094744} https://fburl.com/scuba/monarch_tracing/s2f6glbu This change allows Exceptions to have Actor Id column populated as well: {F1982094856} https://fburl.com/scuba/monarch_tracing/757k16xp Reviewed By: eliothedeman Differential Revision: D82748854 fbshipit-source-id: af08f202e46928f83736c826c8df3420a4ff9ed5
1 parent 0762d1f commit 3c0c5a4

File tree

3 files changed

+42
-6
lines changed

3 files changed

+42
-6
lines changed

monarch_hyperactor/src/telemetry.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ pub fn forward_to_tracing(py: Python, record: PyObject) -> PyResult<()> {
3333
let file = record.getattr(py, "filename")?;
3434
let file: &str = file.extract(py)?;
3535
let level: i32 = record.getattr(py, "levelno")?.extract(py)?;
36+
37+
// Extract actor_id from the Python record object if available
38+
let actor_id = record
39+
.getattr(py, "actor_id")
40+
.ok()
41+
.and_then(|attr| attr.extract::<String>(py).ok());
42+
3643
// Map level number to level name
3744
match level {
3845
40 | 50 => {
@@ -53,21 +60,36 @@ pub fn forward_to_tracing(py: Python, record: PyObject) -> PyResult<()> {
5360
file = file,
5461
lineno = lineno,
5562
stacktrace = traceback,
63+
actor_id = actor_id.as_deref(),
5664
message
5765
);
5866
}
5967
None => {
60-
tracing::error!(file = file, lineno = lineno, message);
68+
tracing::error!(
69+
file = file,
70+
lineno = lineno,
71+
actor_id = actor_id.as_deref(),
72+
message
73+
);
6174
}
6275
}
6376
}
64-
30 => tracing::warn!(target:"log_events", file = file, lineno = lineno, message),
65-
20 => tracing::info!(target:"log_events", file = file, lineno = lineno, message),
66-
10 => tracing::debug!(target:"log_events", file = file, lineno = lineno, message),
67-
_ => tracing::info!(target:"log_events", file = file, lineno = lineno, message),
77+
30 => {
78+
tracing::warn!(target:"log_events", file = file, lineno = lineno, actor_id = actor_id.as_deref(), message)
79+
}
80+
20 => {
81+
tracing::info!(target:"log_events", file = file, lineno = lineno, actor_id = actor_id.as_deref(), message)
82+
}
83+
10 => {
84+
tracing::debug!(target:"log_events", file = file, lineno = lineno, actor_id = actor_id.as_deref(), message)
85+
}
86+
_ => {
87+
tracing::info!(target:"log_events", file = file, lineno = lineno, actor_id = actor_id.as_deref(), message)
88+
}
6889
}
6990
Ok(())
7091
}
92+
7193
#[pyfunction]
7294
pub fn use_real_clock() -> PyResult<()> {
7395
swap_telemetry_clock(ClockKind::Real(RealClock));

python/monarch/_src/actor/actor_mesh.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
from monarch._src.actor.python_extension_methods import rust_struct
8484
from monarch._src.actor.shape import MeshTrait, NDSlice
8585
from monarch._src.actor.sync_state import fake_sync_state
86-
from monarch._src.actor.telemetry import METER
86+
from monarch._src.actor.telemetry import METER, TracingForwarder
8787
from monarch._src.actor.tensor_engine_shim import actor_rref, actor_send
8888
from typing_extensions import Self
8989

@@ -97,6 +97,7 @@
9797
CallMethod = PythonMessageKind.CallMethod
9898

9999
logger: logging.Logger = logging.getLogger(__name__)
100+
logging.root.addHandler(TracingForwarder(level=logging.DEBUG))
100101

101102
TRACER = get_monarch_tracer()
102103

python/monarch/_src/actor/telemetry/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,19 @@
2828

2929
class TracingForwarder(logging.Handler):
3030
def emit(self, record: logging.LogRecord) -> None:
31+
# Try to add actor_id from the current context to the logging record
32+
try:
33+
from monarch._src.actor.actor_mesh import context
34+
35+
ctx = context()
36+
if ctx and ctx.actor_instance and ctx.actor_instance.actor_id:
37+
# Add actor_id as an attribute to the logging record
38+
setattr(record, "actor_id", str(ctx.actor_instance.actor_id))
39+
except Exception:
40+
# If we can't get the context or actor_id for any reason, just continue
41+
# without adding the actor_id field
42+
pass
43+
3144
forward_to_tracing(record)
3245

3346

0 commit comments

Comments
 (0)