From b2bec8819dc63238119dc588ca27aa63cd2d140a Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Thu, 21 Aug 2025 12:39:13 +0300 Subject: [PATCH 1/3] Start-Agent-Branch: codetype-interface Target-Remote: https://github.com/metacraft-labs/codetracer-python-recorder.git --- .../tasks/2025/08/21-0939-codetype-interface | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .agents/tasks/2025/08/21-0939-codetype-interface diff --git a/.agents/tasks/2025/08/21-0939-codetype-interface b/.agents/tasks/2025/08/21-0939-codetype-interface new file mode 100644 index 0000000..e629873 --- /dev/null +++ b/.agents/tasks/2025/08/21-0939-codetype-interface @@ -0,0 +1,19 @@ +Implement an interface for the CodeType object. + +The Python Monitoring API passes a CodeObject to each event handler. In the current implemenation of our Tracer trait the event handlers have an argument `_code: &Bound<'_, PyAny>` which gives +access to this object. This is not a good interface because the type is too generic. PyO3 has a PyCodeObject type, however it doesn't expose any public members because the type is unstable, +so we cannot use that one. + +We need to create our own type CodeObjectWrapper which can simplify access to the underlying code object type. Then we should use this type in the signature of the methods in the Tracer trait. +The type should allow easy access to that functionality of the underlying code object which we will need to implement a recorder for a time-travel debugger. +On the other hand it is important not to introduce any performance problems. Some ideas to think about: +- Minimize the copying of values +- Repeated computations on each event call could be memoized +- Any other approach that minimizes the performance hit + +Propose a design of the CodeObjectWrapper type. Write the design in design-docs/code-object.md. Do not actually implement the type for now, I need to confirm the design first. + +Here's relevant information: +* design-docs/design-001.md - shows how we write design documentation +* https://docs.python.org/3/library/sys.monitoring.html - Documentation of the Python sys.montoring API +* https://docs.python.org/3/reference/datamodel.html#code-objects - Description of Python Code Objects. From 44077c6b4f63247abeeb1ac149fa75fecce1c7db Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Thu, 21 Aug 2025 19:02:08 +0300 Subject: [PATCH 2/3] docs: describe global registry for code objects --- .../tasks/2025/08/21-0939-codetype-interface | 21 ++ codetracer-python-recorder/Cargo.lock | 1 + codetracer-python-recorder/Cargo.toml | 1 + codetracer-python-recorder/src/code_object.rs | 130 ++++++++ codetracer-python-recorder/src/lib.rs | 2 + codetracer-python-recorder/src/tracer.rs | 101 ++++-- .../tests/code_object_wrapper.rs | 55 ++++ .../tests/print_tracer.rs | 311 ++++-------------- design-docs/code-object.md | 142 ++++++++ 9 files changed, 478 insertions(+), 286 deletions(-) create mode 100644 codetracer-python-recorder/src/code_object.rs create mode 100644 codetracer-python-recorder/tests/code_object_wrapper.rs create mode 100644 design-docs/code-object.md diff --git a/.agents/tasks/2025/08/21-0939-codetype-interface b/.agents/tasks/2025/08/21-0939-codetype-interface index e629873..561430e 100644 --- a/.agents/tasks/2025/08/21-0939-codetype-interface +++ b/.agents/tasks/2025/08/21-0939-codetype-interface @@ -17,3 +17,24 @@ Here's relevant information: * design-docs/design-001.md - shows how we write design documentation * https://docs.python.org/3/library/sys.monitoring.html - Documentation of the Python sys.montoring API * https://docs.python.org/3/reference/datamodel.html#code-objects - Description of Python Code Objects. + +--- FOLLOW UP TASK --- +Please address any inline comments on the diff, as well as any additional instructions below. + +According to the PyO3 documentation it is preferred to use instead of Py. Is it possible that the code object wrapper takes that into account? Here is relevant info: +* https://pyo3.rs/v0.25.1/types.html +* https://docs.rs/pyo3/0.25.1/pyo3/index.html + +Also please add usage examples to the design documentation +--- FOLLOW UP TASK --- +Please address any inline comments on the diff, as well as any additional instructions below. + +According to the PyO3 documentation it is preferred to use `Bound<'_, T>` instead of Py. Is it possible that the code object wrapper takes that into account? Here is relevant info: +* https://pyo3.rs/v0.25.1/types.html +* https://docs.rs/pyo3/0.25.1/pyo3/index.html + +Also please add usage examples to the design documentation +--- FOLLOW UP TASK --- +Implement the CodeObjectWrapper as designed. Update the Tracer trait as well as the callback_xxx functions accordingly. Write a comprehensive unit tests for CodeObjectWrapper. +--- FOLLOW UP TASK --- +There is an issue in the current implementation. We don't use caching effectively, since we create a new CodeObjectWrapper at each callback_xxx call. We need a global cache, probably keyed by the code object id. Propose design changes and update the design documents. Don't implement the changes themselves before I approve them. \ No newline at end of file diff --git a/codetracer-python-recorder/Cargo.lock b/codetracer-python-recorder/Cargo.lock index e0c1fa0..85f939e 100644 --- a/codetracer-python-recorder/Cargo.lock +++ b/codetracer-python-recorder/Cargo.lock @@ -69,6 +69,7 @@ name = "codetracer-python-recorder" version = "0.1.0" dependencies = [ "bitflags", + "once_cell", "pyo3", "runtime_tracing", ] diff --git a/codetracer-python-recorder/Cargo.toml b/codetracer-python-recorder/Cargo.toml index b25b86c..8acbb13 100644 --- a/codetracer-python-recorder/Cargo.toml +++ b/codetracer-python-recorder/Cargo.toml @@ -18,6 +18,7 @@ default = ["extension-module"] pyo3 = { version = "0.25.1" } runtime_tracing = "0.14.0" bitflags = "2.4" +once_cell = "1.19" [dev-dependencies] pyo3 = { version = "0.25.1", features = ["auto-initialize"] } diff --git a/codetracer-python-recorder/src/code_object.rs b/codetracer-python-recorder/src/code_object.rs new file mode 100644 index 0000000..fb93c40 --- /dev/null +++ b/codetracer-python-recorder/src/code_object.rs @@ -0,0 +1,130 @@ +use once_cell::sync::OnceCell; +use pyo3::prelude::*; +use pyo3::types::PyCode; + +/// A wrapper around Python `code` objects providing cached access to +/// common attributes and line information. +pub struct CodeObjectWrapper { + obj: Py, + id: usize, + cache: CodeObjectCache, +} + +#[derive(Default)] +struct CodeObjectCache { + filename: OnceCell, + qualname: OnceCell, + firstlineno: OnceCell, + argcount: OnceCell, + flags: OnceCell, + lines: OnceCell>, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct LineEntry { + pub offset: u32, + pub line: u32, +} + +impl CodeObjectWrapper { + /// Construct from a `CodeType` object. Computes `id` eagerly. + pub fn new(_py: Python<'_>, obj: &Bound<'_, PyCode>) -> Self { + let id = obj.as_ptr() as usize; + Self { + obj: obj.clone().unbind(), + id, + cache: CodeObjectCache::default(), + } + } + + /// Borrow the owned `Py` as a `Bound<'py, PyCode>`. + pub fn as_bound<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyCode> { + self.obj.bind(py).clone() + } + + /// Return the stable identity of the code object (equivalent to `id(code)`). + pub fn id(&self) -> usize { + self.id + } + + pub fn filename<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str> { + let value = self.cache.filename.get_or_try_init(|| -> PyResult { + let s: String = self + .as_bound(py) + .getattr("co_filename")? + .extract()?; + Ok(s) + })?; + Ok(value.as_str()) + } + + pub fn qualname<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str> { + let value = self.cache.qualname.get_or_try_init(|| -> PyResult { + let s: String = self + .as_bound(py) + .getattr("co_qualname")? + .extract()?; + Ok(s) + })?; + Ok(value.as_str()) + } + + pub fn first_line(&self, py: Python<'_>) -> PyResult { + let value = *self.cache.firstlineno.get_or_try_init(|| -> PyResult { + let v: u32 = self + .as_bound(py) + .getattr("co_firstlineno")? + .extract()?; + Ok(v) + })?; + Ok(value) + } + + pub fn arg_count(&self, py: Python<'_>) -> PyResult { + let value = *self.cache.argcount.get_or_try_init(|| -> PyResult { + let v: u16 = self + .as_bound(py) + .getattr("co_argcount")? + .extract()?; + Ok(v) + })?; + Ok(value) + } + + pub fn flags(&self, py: Python<'_>) -> PyResult { + let value = *self.cache.flags.get_or_try_init(|| -> PyResult { + let v: u32 = self + .as_bound(py) + .getattr("co_flags")? + .extract()?; + Ok(v) + })?; + Ok(value) + } + + fn lines<'py>(&'py self, py: Python<'py>) -> PyResult<&'py [LineEntry]> { + let vec = self.cache.lines.get_or_try_init(|| -> PyResult> { + let mut entries = Vec::new(); + let iter = self.as_bound(py).call_method0("co_lines")?; + let iter = iter.try_iter()?; + for item in iter { + let (start, _end, line): (u32, u32, Option) = item?.extract()?; + if let Some(line) = line { + entries.push(LineEntry { offset: start, line }); + } + } + Ok(entries) + })?; + Ok(vec.as_slice()) + } + + /// Return the source line for a given instruction offset using a binary search. + pub fn line_for_offset(&self, py: Python<'_>, offset: u32) -> PyResult> { + let lines = self.lines(py)?; + match lines.binary_search_by_key(&offset, |e| e.offset) { + Ok(idx) => Ok(Some(lines[idx].line)), + Err(0) => Ok(None), + Err(idx) => Ok(Some(lines[idx - 1].line)), + } + } +} diff --git a/codetracer-python-recorder/src/lib.rs b/codetracer-python-recorder/src/lib.rs index 31d6b52..a1e1354 100644 --- a/codetracer-python-recorder/src/lib.rs +++ b/codetracer-python-recorder/src/lib.rs @@ -3,7 +3,9 @@ use std::sync::atomic::{AtomicBool, Ordering}; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; +pub mod code_object; pub mod tracer; +pub use crate::code_object::CodeObjectWrapper; pub use crate::tracer::{install_tracer, uninstall_tracer, EventSet, Tracer}; /// Global flag tracking whether tracing is active. diff --git a/codetracer-python-recorder/src/tracer.rs b/codetracer-python-recorder/src/tracer.rs index d40626b..a940cda 100644 --- a/codetracer-python-recorder/src/tracer.rs +++ b/codetracer-python-recorder/src/tracer.rs @@ -2,8 +2,9 @@ use std::sync::{Mutex, OnceLock}; use pyo3::{ exceptions::PyRuntimeError, prelude::*, - types::{PyAny, PyCFunction, PyModule}, + types::{PyAny, PyCFunction, PyCode, PyModule}, }; +use crate::code_object::CodeObjectWrapper; const MONITORING_TOOL_NAME: &str = "codetracer"; @@ -150,7 +151,7 @@ pub trait Tracer: Send { fn on_call( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, @@ -158,16 +159,16 @@ pub trait Tracer: Send { } /// Called on line execution. - fn on_line(&mut self, _py: Python<'_>, _code: &Bound<'_, PyAny>, _lineno: u32) {} + fn on_line(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _lineno: u32) {} /// Called when an instruction is about to be executed (by offset). - fn on_instruction(&mut self, _py: Python<'_>, _code: &Bound<'_, PyAny>, _offset: i32) {} + fn on_instruction(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _offset: i32) {} /// Called when a jump in the control flow graph is made. fn on_jump( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _destination_offset: i32, ) { @@ -177,23 +178,23 @@ pub trait Tracer: Send { fn on_branch( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _destination_offset: i32, ) { } /// Called at start of a Python function (frame on stack). - fn on_py_start(&mut self, _py: Python<'_>, _code: &Bound<'_, PyAny>, _offset: i32) {} + fn on_py_start(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _offset: i32) {} /// Called on resumption of a generator/coroutine (not via throw()). - fn on_py_resume(&mut self, _py: Python<'_>, _code: &Bound<'_, PyAny>, _offset: i32) {} + fn on_py_resume(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, _offset: i32) {} /// Called immediately before a Python function returns. fn on_py_return( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _retval: &Bound<'_, PyAny>, ) { @@ -203,7 +204,7 @@ pub trait Tracer: Send { fn on_py_yield( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _retval: &Bound<'_, PyAny>, ) { @@ -213,7 +214,7 @@ pub trait Tracer: Send { fn on_py_throw( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, ) { @@ -223,7 +224,7 @@ pub trait Tracer: Send { fn on_py_unwind( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, ) { @@ -233,7 +234,7 @@ pub trait Tracer: Send { fn on_raise( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, ) { @@ -243,7 +244,7 @@ pub trait Tracer: Send { fn on_reraise( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, ) { @@ -253,7 +254,7 @@ pub trait Tracer: Send { fn on_exception_handled( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _exception: &Bound<'_, PyAny>, ) { @@ -267,7 +268,7 @@ pub trait Tracer: Send { // fn on_stop_iteration( // &mut self, // _py: Python<'_>, - // _code: &Bound<'_, PyAny>, + // _code: &CodeObjectWrapper, // _offset: i32, // _exception: &Bound<'_, PyAny>, // ) { @@ -277,7 +278,7 @@ pub trait Tracer: Send { fn on_c_return( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, @@ -288,7 +289,7 @@ pub trait Tracer: Send { fn on_c_raise( &mut self, _py: Python<'_>, - _code: &Bound<'_, PyAny>, + _code: &CodeObjectWrapper, _offset: i32, _callable: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>, @@ -471,7 +472,9 @@ fn callback_call( arg0: Option>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_call(py, &code, offset, &callable, arg0.as_ref()); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_call(py, &wrapper, offset, &callable, arg0.as_ref()); } Ok(()) } @@ -479,7 +482,9 @@ fn callback_call( #[pyfunction] fn callback_line(py: Python<'_>, code: Bound<'_, PyAny>, lineno: u32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_line(py, &code, lineno); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_line(py, &wrapper, lineno); } Ok(()) } @@ -487,7 +492,9 @@ fn callback_line(py: Python<'_>, code: Bound<'_, PyAny>, lineno: u32) -> PyResul #[pyfunction] fn callback_instruction(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: i32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_instruction(py, &code, instruction_offset); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_instruction(py, &wrapper, instruction_offset); } Ok(()) } @@ -500,9 +507,11 @@ fn callback_jump( destination_offset: i32, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); global .tracer - .on_jump(py, &code, instruction_offset, destination_offset); + .on_jump(py, &wrapper, instruction_offset, destination_offset); } Ok(()) } @@ -515,9 +524,11 @@ fn callback_branch( destination_offset: i32, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); global .tracer - .on_branch(py, &code, instruction_offset, destination_offset); + .on_branch(py, &wrapper, instruction_offset, destination_offset); } Ok(()) } @@ -525,7 +536,9 @@ fn callback_branch( #[pyfunction] fn callback_py_start(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: i32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_py_start(py, &code, instruction_offset); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_py_start(py, &wrapper, instruction_offset); } Ok(()) } @@ -533,7 +546,9 @@ fn callback_py_start(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: #[pyfunction] fn callback_py_resume(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: i32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_py_resume(py, &code, instruction_offset); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_py_resume(py, &wrapper, instruction_offset); } Ok(()) } @@ -546,7 +561,9 @@ fn callback_py_return( retval: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_py_return(py, &code, instruction_offset, &retval); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_py_return(py, &wrapper, instruction_offset, &retval); } Ok(()) } @@ -559,7 +576,9 @@ fn callback_py_yield( retval: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_py_yield(py, &code, instruction_offset, &retval); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_py_yield(py, &wrapper, instruction_offset, &retval); } Ok(()) } @@ -572,7 +591,9 @@ fn callback_py_throw( exception: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_py_throw(py, &code, instruction_offset, &exception); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_py_throw(py, &wrapper, instruction_offset, &exception); } Ok(()) } @@ -585,7 +606,9 @@ fn callback_py_unwind( exception: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_py_unwind(py, &code, instruction_offset, &exception); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_py_unwind(py, &wrapper, instruction_offset, &exception); } Ok(()) } @@ -598,7 +621,9 @@ fn callback_raise( exception: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_raise(py, &code, instruction_offset, &exception); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_raise(py, &wrapper, instruction_offset, &exception); } Ok(()) } @@ -611,7 +636,9 @@ fn callback_reraise( exception: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { - global.tracer.on_reraise(py, &code, instruction_offset, &exception); + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); + global.tracer.on_reraise(py, &wrapper, instruction_offset, &exception); } Ok(()) } @@ -624,9 +651,11 @@ fn callback_exception_handled( exception: Bound<'_, PyAny>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); global .tracer - .on_exception_handled(py, &code, instruction_offset, &exception); + .on_exception_handled(py, &wrapper, instruction_offset, &exception); } Ok(()) } @@ -656,9 +685,11 @@ fn callback_c_return( arg0: Option>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); global .tracer - .on_c_return(py, &code, offset, &callable, arg0.as_ref()); + .on_c_return(py, &wrapper, offset, &callable, arg0.as_ref()); } Ok(()) } @@ -672,9 +703,11 @@ fn callback_c_raise( arg0: Option>, ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { + let code = code.downcast::()?; + let wrapper = CodeObjectWrapper::new(py, &code); global .tracer - .on_c_raise(py, &code, offset, &callable, arg0.as_ref()); + .on_c_raise(py, &wrapper, offset, &callable, arg0.as_ref()); } Ok(()) } diff --git a/codetracer-python-recorder/tests/code_object_wrapper.rs b/codetracer-python-recorder/tests/code_object_wrapper.rs new file mode 100644 index 0000000..8e7558d --- /dev/null +++ b/codetracer-python-recorder/tests/code_object_wrapper.rs @@ -0,0 +1,55 @@ +use codetracer_python_recorder::CodeObjectWrapper; +use pyo3::prelude::*; +use pyo3::types::{PyCode, PyModule}; +use std::ffi::CString; + +#[test] +fn wrapper_basic_attributes() { + Python::with_gil(|py| { + let src = CString::new("def f(x):\n return x + 1\n").unwrap(); + let filename = CString::new("").unwrap(); + let module = CString::new("m").unwrap(); + let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()).unwrap(); + let func = m.getattr("f").unwrap(); + let code: Bound<'_, PyCode> = func + .getattr("__code__") + .unwrap() + .clone() + .downcast_into() + .unwrap(); + let wrapper = CodeObjectWrapper::new(py, &code); + assert_eq!(wrapper.arg_count(py).unwrap(), 1); + assert_eq!(wrapper.filename(py).unwrap(), ""); + assert_eq!(wrapper.qualname(py).unwrap(), "f"); + assert!(wrapper.flags(py).unwrap() > 0); + }); +} + +#[test] +fn wrapper_line_for_offset() { + Python::with_gil(|py| { + let src = CString::new("def g():\n a = 1\n b = 2\n return a + b\n").unwrap(); + let filename = CString::new("").unwrap(); + let module = CString::new("m2").unwrap(); + let m = PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()).unwrap(); + let func = m.getattr("g").unwrap(); + let code: Bound<'_, PyCode> = func + .getattr("__code__") + .unwrap() + .clone() + .downcast_into() + .unwrap(); + let wrapper = CodeObjectWrapper::new(py, &code); + let lines = code.call_method0("co_lines").unwrap(); + let iter = lines.try_iter().unwrap(); + let mut last_line = None; + for item in iter { + let (start, _end, line): (u32, u32, Option) = item.unwrap().extract().unwrap(); + if let Some(l) = line { + assert_eq!(wrapper.line_for_offset(py, start).unwrap(), Some(l)); + last_line = Some(l); + } + } + assert_eq!(wrapper.line_for_offset(py, 10_000).unwrap(), last_line); + }); +} diff --git a/codetracer-python-recorder/tests/print_tracer.rs b/codetracer-python-recorder/tests/print_tracer.rs index a1cccc0..834bb70 100644 --- a/codetracer-python-recorder/tests/print_tracer.rs +++ b/codetracer-python-recorder/tests/print_tracer.rs @@ -1,4 +1,4 @@ -use codetracer_python_recorder::{install_tracer, uninstall_tracer, EventSet, Tracer}; +use codetracer_python_recorder::{install_tracer, uninstall_tracer, EventSet, Tracer, CodeObjectWrapper}; use codetracer_python_recorder::tracer::{MonitoringEvents, events_union}; use pyo3::prelude::*; use std::ffi::CString; @@ -9,17 +9,17 @@ static CALL_COUNT: AtomicUsize = AtomicUsize::new(0); struct PrintTracer; impl Tracer for PrintTracer { - fn interest(&self, events:&MonitoringEvents) -> EventSet { - events_union(&[events.CALL]) + fn interest(&self, events: &MonitoringEvents) -> EventSet { + events_union(&[events.CALL]) } fn on_call( &mut self, _py: Python<'_>, - _code: &pyo3::Bound<'_, pyo3::types::PyAny>, + _code: &CodeObjectWrapper, _offset: i32, - _callable: &pyo3::Bound<'_, pyo3::types::PyAny>, - _arg0: Option<&pyo3::Bound<'_, pyo3::types::PyAny>>, + _callable: &Bound<'_, PyAny>, + _arg0: Option<&Bound<'_, PyAny>>, ) { CALL_COUNT.fetch_add(1, Ordering::SeqCst); } @@ -29,19 +29,12 @@ impl Tracer for PrintTracer { fn tracer_prints_on_call() { Python::with_gil(|py| { CALL_COUNT.store(0, Ordering::SeqCst); - if let Err(e) = install_tracer(py, Box::new(PrintTracer)) { - e.print(py); - panic!("Install Tracer failed"); - } - let code = CString::new("def foo():\n return 1\nfoo()").expect("CString::new failed"); - if let Err(e) = py.run(code.as_c_str(), None, None) { - e.print(py); - uninstall_tracer(py).ok(); - panic!("Python raised an exception"); - } + uninstall_tracer(py).ok(); + install_tracer(py, Box::new(PrintTracer)).unwrap(); + let code = CString::new("def foo():\n return 1\nfoo()").unwrap(); + py.run(code.as_c_str(), None, None).unwrap(); uninstall_tracer(py).unwrap(); - let count = CALL_COUNT.load(Ordering::SeqCst); - assert!(count >= 1, "expected at least one CALL event, got {}", count); + assert!(CALL_COUNT.load(Ordering::SeqCst) >= 1); }); } @@ -58,28 +51,11 @@ static PY_UNWIND_COUNT: AtomicUsize = AtomicUsize::new(0); static RAISE_COUNT: AtomicUsize = AtomicUsize::new(0); static RERAISE_COUNT: AtomicUsize = AtomicUsize::new(0); static EXCEPTION_HANDLED_COUNT: AtomicUsize = AtomicUsize::new(0); -//static STOP_ITERATION_COUNT: AtomicUsize = AtomicUsize::new(0); static C_RETURN_COUNT: AtomicUsize = AtomicUsize::new(0); static C_RAISE_COUNT: AtomicUsize = AtomicUsize::new(0); struct CountingTracer; -fn offset_to_line(code: &pyo3::Bound<'_, pyo3::types::PyAny>, offset: i32) -> Option { - if offset < 0 { - return None; - } - let lines_iter = code.call_method0("co_lines").ok()?; - let iter = lines_iter.try_iter().ok()?; - for line_info in iter { - let line_info = line_info.ok()?; - let (start, end, line): (i32, i32, i32) = line_info.extract().ok()?; - if offset >= start && offset < end { - return Some(line); - } - } - None -} - impl Tracer for CountingTracer { fn interest(&self, events: &MonitoringEvents) -> EventSet { events_union(&[ @@ -94,7 +70,6 @@ impl Tracer for CountingTracer { events.PY_YIELD, events.PY_THROW, events.PY_UNWIND, - //events.STOP_ITERATION, events.RAISE, events.RERAISE, events.EXCEPTION_HANDLED, @@ -103,206 +78,105 @@ impl Tracer for CountingTracer { ]) } - fn on_line( - &mut self, - _py: Python<'_>, - _code: &pyo3::Bound<'_, pyo3::types::PyAny>, - lineno: u32, - ) { + fn on_line(&mut self, _py: Python<'_>, _code: &CodeObjectWrapper, lineno: u32) { LINE_COUNT.fetch_add(1, Ordering::SeqCst); println!("LINE at {}", lineno); } - fn on_instruction( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - ) { + fn on_instruction(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32) { INSTRUCTION_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("INSTRUCTION at {}", line); } } - fn on_jump( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _destination_offset: i32, - ) { + fn on_jump(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _dest: i32) { JUMP_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("JUMP at {}", line); } } - fn on_branch( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _destination_offset: i32, - ) { + fn on_branch(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _dest: i32) { BRANCH_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("BRANCH at {}", line); } } - fn on_py_start( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - ) { + fn on_py_start(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32) { PY_START_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_START at {}", line); } } - fn on_py_resume( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - ) { + fn on_py_resume(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32) { PY_RESUME_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_RESUME at {}", line); } } - fn on_py_return( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _retval: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_py_return(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _retval: &Bound<'_, PyAny>) { PY_RETURN_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_RETURN at {}", line); } } - fn on_py_yield( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _retval: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_py_yield(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _retval: &Bound<'_, PyAny>) { PY_YIELD_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_YIELD at {}", line); } } - fn on_py_throw( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _exception: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_py_throw(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { PY_THROW_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_THROW at {}", line); } } - fn on_py_unwind( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _exception: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_py_unwind(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { PY_UNWIND_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("PY_UNWIND at {}", line); } } - fn on_raise( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _exception: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_raise(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { RAISE_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("RAISE at {}", line); } } - fn on_reraise( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _exception: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_reraise(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { RERAISE_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("RERAISE at {}", line); } } - fn on_exception_handled( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _exception: &pyo3::Bound<'_, pyo3::types::PyAny>, - ) { + fn on_exception_handled(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _exc: &Bound<'_, PyAny>) { EXCEPTION_HANDLED_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("EXCEPTION_HANDLED at {}", line); } } - // fn on_stop_iteration( - // &mut self, - // _py: Python<'_>, - // code: &pyo3::Bound<'_, pyo3::types::PyAny>, - // offset: i32, - // _exception: &pyo3::Bound<'_, pyo3::types::PyAny>, - // ) { - // STOP_ITERATION_COUNT.fetch_add(1, Ordering::SeqCst); - // if let Some(line) = offset_to_line(code, offset) { - // println!("STOP_ITERATION at {}", line); - // } - // } - - fn on_c_return( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _callable: &pyo3::Bound<'_, pyo3::types::PyAny>, - _arg0: Option<&pyo3::Bound<'_, pyo3::types::PyAny>>, - ) { + fn on_c_return(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _call: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>) { C_RETURN_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("C_RETURN at {}", line); } } - fn on_c_raise( - &mut self, - _py: Python<'_>, - code: &pyo3::Bound<'_, pyo3::types::PyAny>, - offset: i32, - _callable: &pyo3::Bound<'_, pyo3::types::PyAny>, - _arg0: Option<&pyo3::Bound<'_, pyo3::types::PyAny>>, - ) { + fn on_c_raise(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32, _call: &Bound<'_, PyAny>, _arg0: Option<&Bound<'_, PyAny>>) { C_RAISE_COUNT.fetch_add(1, Ordering::SeqCst); - if let Some(line) = offset_to_line(code, offset) { + if let Ok(Some(line)) = code.line_for_offset(py, offset as u32) { println!("C_RAISE at {}", line); } } @@ -324,94 +198,27 @@ fn tracer_handles_all_events() { RAISE_COUNT.store(0, Ordering::SeqCst); RERAISE_COUNT.store(0, Ordering::SeqCst); EXCEPTION_HANDLED_COUNT.store(0, Ordering::SeqCst); - // STOP_ITERATION_COUNT.store(0, Ordering::SeqCst); //ISSUE: We can't figure out how to triger this event C_RETURN_COUNT.store(0, Ordering::SeqCst); C_RAISE_COUNT.store(0, Ordering::SeqCst); - if let Err(e) = install_tracer(py, Box::new(CountingTracer)) { - e.print(py); - panic!("Install Tracer failed"); - } - let code = CString::new(r#" -def test_all(): - x = 0 - if x == 0: - x += 1 - for i in range(1): - x += i - def foo(): - return 1 - foo() - try: - raise ValueError("err") - except ValueError: - pass - def gen(): - try: - yield 1 - yield 2 - except ValueError: - pass - g = gen() - next(g) - next(g) - try: - g.throw(ValueError()) - except StopIteration: - pass - for _ in []: - pass - def gen2(): - yield 1 - return 2 - for _ in gen2(): - pass - len("abc") - try: - int("a") - except ValueError: - pass - def f_unwind(): - raise KeyError - try: - f_unwind() - except KeyError: - pass - try: - try: - raise OSError() - except OSError: - raise - except OSError: - pass -test_all() -def only_stop_iter(): - if False: - yield -for _ in only_stop_iter(): - pass -"#).expect("CString::new failed"); - if let Err(e) = py.run(code.as_c_str(), None, None) { - e.print(py); - uninstall_tracer(py).ok(); - panic!("Python raised an exception"); - } + install_tracer(py, Box::new(CountingTracer)).unwrap(); + let code = CString::new( +"def test_all():\n x = 0\n if x == 0:\n x += 1\n for i in range(1):\n x += i\n def foo():\n return 1\n foo()\n try:\n raise ValueError('err')\n except ValueError:\n pass\n def gen():\n try:\n yield 1\n yield 2\n except ValueError:\n pass\n g = gen()\n next(g)\n next(g)\n try:\n g.throw(ValueError())\n except StopIteration:\n pass\n for _ in []:\n pass\n def gen2():\n yield 1\n return 2\n for _ in gen2():\n pass\n len('abc')\n try:\n int('a')\n except ValueError:\n pass\n def f_unwind():\n raise KeyError\n try:\n f_unwind()\n except KeyError:\n pass\n try:\n try:\n raise OSError()\n except OSError:\n raise\n except OSError:\n pass\n\ntest_all()\n").unwrap(); + py.run(code.as_c_str(), None, None).unwrap(); uninstall_tracer(py).unwrap(); - assert!(LINE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one LINE event, got {}", LINE_COUNT.load(Ordering::SeqCst)); - assert!(INSTRUCTION_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one INSTRUCTION event, got {}", INSTRUCTION_COUNT.load(Ordering::SeqCst)); - assert!(JUMP_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one JUMP event, got {}", JUMP_COUNT.load(Ordering::SeqCst)); - assert!(BRANCH_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one BRANCH event, got {}", BRANCH_COUNT.load(Ordering::SeqCst)); - assert!(PY_START_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_START event, got {}", PY_START_COUNT.load(Ordering::SeqCst)); - assert!(PY_RESUME_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_RESUME event, got {}", PY_RESUME_COUNT.load(Ordering::SeqCst)); - assert!(PY_RETURN_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_RETURN event, got {}", PY_RETURN_COUNT.load(Ordering::SeqCst)); - assert!(PY_YIELD_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_YIELD event, got {}", PY_YIELD_COUNT.load(Ordering::SeqCst)); - assert!(PY_THROW_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_THROW event, got {}", PY_THROW_COUNT.load(Ordering::SeqCst)); - assert!(PY_UNWIND_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one PY_UNWIND event, got {}", PY_UNWIND_COUNT.load(Ordering::SeqCst)); - assert!(RAISE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one RAISE event, got {}", RAISE_COUNT.load(Ordering::SeqCst)); - assert!(RERAISE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one RERAISE event, got {}", RERAISE_COUNT.load(Ordering::SeqCst)); - assert!(EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one EXCEPTION_HANDLED event, got {}", EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst)); - // assert!(STOP_ITERATION_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one STOP_ITERATION event, got {}", STOP_ITERATION_COUNT.load(Ordering::SeqCst)); //Issue - assert!(C_RETURN_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one C_RETURN event, got {}", C_RETURN_COUNT.load(Ordering::SeqCst)); - assert!(C_RAISE_COUNT.load(Ordering::SeqCst) >= 1, "expected at least one C_RAISE event, got {}", C_RAISE_COUNT.load(Ordering::SeqCst)); + assert!(LINE_COUNT.load(Ordering::SeqCst) >= 1); + assert!(INSTRUCTION_COUNT.load(Ordering::SeqCst) >= 1); + assert!(JUMP_COUNT.load(Ordering::SeqCst) >= 1); + assert!(BRANCH_COUNT.load(Ordering::SeqCst) >= 1); + assert!(PY_START_COUNT.load(Ordering::SeqCst) >= 1); + assert!(PY_RESUME_COUNT.load(Ordering::SeqCst) >= 1); + assert!(PY_RETURN_COUNT.load(Ordering::SeqCst) >= 1); + assert!(PY_YIELD_COUNT.load(Ordering::SeqCst) >= 1); + assert!(PY_THROW_COUNT.load(Ordering::SeqCst) >= 1); + assert!(PY_UNWIND_COUNT.load(Ordering::SeqCst) >= 1); + assert!(RAISE_COUNT.load(Ordering::SeqCst) >= 1); + assert!(RERAISE_COUNT.load(Ordering::SeqCst) >= 1); + assert!(EXCEPTION_HANDLED_COUNT.load(Ordering::SeqCst) >= 1); + assert!(C_RETURN_COUNT.load(Ordering::SeqCst) >= 1); + assert!(C_RAISE_COUNT.load(Ordering::SeqCst) >= 1); }); } - diff --git a/design-docs/code-object.md b/design-docs/code-object.md new file mode 100644 index 0000000..eaec829 --- /dev/null +++ b/design-docs/code-object.md @@ -0,0 +1,142 @@ +# Code Object Wrapper Design + +## Overview + +The Python Monitoring API delivers a generic `CodeType` object to every tracing callback. The current `Tracer` trait surfaces this object as `&Bound<'_, PyAny>`, forcing every implementation to perform attribute lookups and type conversions manually. This document proposes a `CodeObjectWrapper` type that exposes a stable, typed interface to the underlying code object while minimizing per-event overhead. + +## Goals +- Provide a strongly typed API for common `CodeType` attributes needed by tracers and recorders. +- Ensure lookups are cheap by caching values and avoiding repeated Python attribute access. +- Maintain a stable identity for each code object to correlate events across callbacks. +- Avoid relying on the unstable `PyCodeObject` layout from the C API. + +## Non-Goals +- Full re‑implementation of every `CodeType` attribute. Only the fields required for tracing and time‑travel debugging are exposed. +- Direct mutation of `CodeType` objects. The wrapper offers read‑only access. + +## Proposed API + +```rs +pub struct CodeObjectWrapper { + /// Owned reference to the Python `CodeType` object. + /// Stored as `Py` so it can be held outside the GIL. + obj: Py, + /// Stable identity equivalent to `id(code)`. + id: usize, + /// Lazily populated cache for expensive lookups. + cache: CodeObjectCache, +} + +pub struct CodeObjectCache { + filename: OnceCell, + qualname: OnceCell, + firstlineno: OnceCell, + argcount: OnceCell, + flags: OnceCell, + /// Mapping of instruction offsets to line numbers. + lines: OnceCell>, +} + +pub struct LineEntry { + pub offset: u32, + pub line: u32, +} + +impl CodeObjectWrapper { + /// Construct from a `CodeType` object. Computes `id` eagerly. + pub fn new(py: Python<'_>, obj: &Bound<'_, PyCode>) -> Self; + + /// Borrow the owned `Py` as a `Bound<'py, PyCode>`. + /// This follows PyO3's recommendation to prefer `Bound<'_, T>` over `Py` + /// for object manipulation. + pub fn as_bound<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyCode>; + + /// Accessors fetch from the cache or perform a one‑time lookup under the GIL. + pub fn filename<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str>; + pub fn qualname<'py>(&'py self, py: Python<'py>) -> PyResult<&'py str>; + pub fn first_line(&self, py: Python<'_>) -> PyResult; + pub fn arg_count(&self, py: Python<'_>) -> PyResult; + pub fn flags(&self, py: Python<'_>) -> PyResult; + + /// Return the source line for a given instruction offset using a binary search on `lines`. + pub fn line_for_offset(&self, py: Python<'_>, offset: u32) -> PyResult>; + + /// Expose the stable identity for cross‑event correlation. + pub fn id(&self) -> usize; +} +``` + +### Global registry + +To avoid constructing a new wrapper for every tracing event, a global cache +stores `CodeObjectWrapper` instances keyed by their stable `id`: + +```rs +pub struct CodeObjectRegistry { + map: DashMap>, +} + +impl CodeObjectRegistry { + pub fn get_or_insert( + &self, + py: Python<'_>, + code: &Bound<'_, PyCode>, + ) -> Arc; + + /// Optional explicit removal for long‑running processes. + pub fn remove(&self, id: usize); +} +``` + +`CodeObjectWrapper::new` remains available, but production code is expected to +obtain instances via `CodeObjectRegistry::get_or_insert` so each unique code +object is wrapped only once. The registry is designed to be thread‑safe +(`DashMap`) and the wrappers are reference counted (`Arc`) so multiple threads +can hold references without additional locking. + +### Trait Integration + +The `Tracer` trait will be adjusted so every callback receives `&CodeObjectWrapper` instead of a generic `&Bound<'_, PyAny>`: + +```rs +fn on_line(&mut self, py: Python<'_>, code: &CodeObjectWrapper, lineno: u32); +fn on_py_start(&mut self, py: Python<'_>, code: &CodeObjectWrapper, offset: i32); +// ...and similarly for the remaining callbacks. +``` + +## Usage Examples + +### Retrieving wrappers from the global registry + +```rs +static CODE_REGISTRY: Lazy = Lazy::new(CodeObjectRegistry::default); + +fn on_line(&mut self, py: Python<'_>, code: &Bound<'_, PyCode>, lineno: u32) { + let wrapper = CODE_REGISTRY.get_or_insert(py, code); + let filename = wrapper.filename(py).unwrap_or(""); + eprintln!("{}:{}", filename, lineno); +} +``` + +Once cached, subsequent callbacks referencing the same `CodeType` will reuse the +existing wrapper without recomputing any attributes. + +## Performance Considerations +- `Py` allows cloning the wrapper without holding the GIL, enabling cheap event propagation. +- Methods bind the owned reference to `Bound<'py, PyCode>` on demand, following PyO3's `Bound`‑first guidance and avoiding accidental `Py` clones. +- Fields are loaded lazily and stored inside `OnceCell` containers to avoid repeated attribute lookups. +- `line_for_offset` memoizes the full line table the first time it is requested; subsequent calls perform an in‑memory binary search. +- Storing strings and small integers directly in the cache eliminates conversion cost on hot paths. +- A global `CodeObjectRegistry` ensures that wrapper construction and attribute + discovery happen at most once per `CodeType`. + +## Open Questions +- Additional attributes such as `co_consts` or `co_varnames` may be required for richer debugging features; these can be added later as new `OnceCell` fields. +- Thread‑safety requirements may necessitate wrapping the cache in `UnsafeCell` or providing internal mutability strategies compatible with `Send`/`Sync`. +- The registry currently grows unbounded; strategies for eviction or weak + references may be needed for long‑running processes that compile many + transient code objects. + +## References +- [Python `CodeType` objects](https://docs.python.org/3/reference/datamodel.html#code-objects) +- [Python monitoring API](https://docs.python.org/3/library/sys.monitoring.html) From 36069a95201420dea7d9629a9f971c625be25715 Mon Sep 17 00:00:00 2001 From: Tzanko Matev Date: Thu, 21 Aug 2025 19:23:33 +0300 Subject: [PATCH 3/3] feat: cache code objects in global registry --- .../tasks/2025/08/21-0939-codetype-interface | 4 +- codetracer-python-recorder/Cargo.lock | 128 ++++++++++++++++++ codetracer-python-recorder/Cargo.toml | 1 + codetracer-python-recorder/src/code_object.rs | 34 ++++- codetracer-python-recorder/src/lib.rs | 2 +- codetracer-python-recorder/src/tracer.rs | 34 ++--- .../tests/code_object_wrapper.rs | 25 +++- 7 files changed, 207 insertions(+), 21 deletions(-) diff --git a/.agents/tasks/2025/08/21-0939-codetype-interface b/.agents/tasks/2025/08/21-0939-codetype-interface index 561430e..d1add9e 100644 --- a/.agents/tasks/2025/08/21-0939-codetype-interface +++ b/.agents/tasks/2025/08/21-0939-codetype-interface @@ -37,4 +37,6 @@ Also please add usage examples to the design documentation --- FOLLOW UP TASK --- Implement the CodeObjectWrapper as designed. Update the Tracer trait as well as the callback_xxx functions accordingly. Write a comprehensive unit tests for CodeObjectWrapper. --- FOLLOW UP TASK --- -There is an issue in the current implementation. We don't use caching effectively, since we create a new CodeObjectWrapper at each callback_xxx call. We need a global cache, probably keyed by the code object id. Propose design changes and update the design documents. Don't implement the changes themselves before I approve them. \ No newline at end of file +There is an issue in the current implementation. We don't use caching effectively, since we create a new CodeObjectWrapper at each callback_xxx call. We need a global cache, probably keyed by the code object id. Propose design changes and update the design documents. Don't implement the changes themselves before I approve them. +--- FOLLOW UP TASK --- +Implement the global code object registry. \ No newline at end of file diff --git a/codetracer-python-recorder/Cargo.lock b/codetracer-python-recorder/Cargo.lock index 85f939e..124ac77 100644 --- a/codetracer-python-recorder/Cargo.lock +++ b/codetracer-python-recorder/Cargo.lock @@ -69,11 +69,25 @@ name = "codetracer-python-recorder" version = "0.1.0" dependencies = [ "bitflags", + "dashmap", "once_cell", "pyo3", "runtime_tracing", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "embedded-io" version = "0.6.1" @@ -101,6 +115,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "heck" version = "0.5.0" @@ -135,6 +155,16 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.27" @@ -182,6 +212,19 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -280,6 +323,15 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "redox_syscall" +version = "0.5.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +dependencies = [ + "bitflags", +] + [[package]] name = "runtime_tracing" version = "0.14.0" @@ -305,6 +357,12 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.219" @@ -354,6 +412,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "syn" version = "2.0.104" @@ -392,6 +456,70 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "wit-bindgen-rt" version = "0.39.0" diff --git a/codetracer-python-recorder/Cargo.toml b/codetracer-python-recorder/Cargo.toml index 8acbb13..61bb5a9 100644 --- a/codetracer-python-recorder/Cargo.toml +++ b/codetracer-python-recorder/Cargo.toml @@ -19,6 +19,7 @@ pyo3 = { version = "0.25.1" } runtime_tracing = "0.14.0" bitflags = "2.4" once_cell = "1.19" +dashmap = "5.5" [dev-dependencies] pyo3 = { version = "0.25.1", features = ["auto-initialize"] } diff --git a/codetracer-python-recorder/src/code_object.rs b/codetracer-python-recorder/src/code_object.rs index fb93c40..ff89039 100644 --- a/codetracer-python-recorder/src/code_object.rs +++ b/codetracer-python-recorder/src/code_object.rs @@ -1,6 +1,8 @@ -use once_cell::sync::OnceCell; +use once_cell::sync::{Lazy, OnceCell}; use pyo3::prelude::*; use pyo3::types::PyCode; +use dashmap::DashMap; +use std::sync::Arc; /// A wrapper around Python `code` objects providing cached access to /// common attributes and line information. @@ -128,3 +130,33 @@ impl CodeObjectWrapper { } } } + +/// Global registry caching `CodeObjectWrapper` instances by code object id. +#[derive(Default)] +pub struct CodeObjectRegistry { + map: DashMap>, +} + +impl CodeObjectRegistry { + /// Retrieve the wrapper for `code`, inserting a new one if needed. + pub fn get_or_insert( + &self, + py: Python<'_>, + code: &Bound<'_, PyCode>, + ) -> Arc { + let id = code.as_ptr() as usize; + self.map + .entry(id) + .or_insert_with(|| Arc::new(CodeObjectWrapper::new(py, code))) + .clone() + } + + /// Remove the wrapper for a given code id, if present. + pub fn remove(&self, id: usize) { + self.map.remove(&id); + } +} + +/// Lazily-initialized global registry instance. +pub static CODE_REGISTRY: Lazy = + Lazy::new(CodeObjectRegistry::default); diff --git a/codetracer-python-recorder/src/lib.rs b/codetracer-python-recorder/src/lib.rs index a1e1354..01e2da5 100644 --- a/codetracer-python-recorder/src/lib.rs +++ b/codetracer-python-recorder/src/lib.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; pub mod code_object; pub mod tracer; -pub use crate::code_object::CodeObjectWrapper; +pub use crate::code_object::{CodeObjectRegistry, CodeObjectWrapper, CODE_REGISTRY}; pub use crate::tracer::{install_tracer, uninstall_tracer, EventSet, Tracer}; /// Global flag tracking whether tracing is active. diff --git a/codetracer-python-recorder/src/tracer.rs b/codetracer-python-recorder/src/tracer.rs index a940cda..6b648f1 100644 --- a/codetracer-python-recorder/src/tracer.rs +++ b/codetracer-python-recorder/src/tracer.rs @@ -4,7 +4,7 @@ use pyo3::{ prelude::*, types::{PyAny, PyCFunction, PyCode, PyModule}, }; -use crate::code_object::CodeObjectWrapper; +use crate::code_object::{CodeObjectWrapper, CODE_REGISTRY}; const MONITORING_TOOL_NAME: &str = "codetracer"; @@ -473,7 +473,7 @@ fn callback_call( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_call(py, &wrapper, offset, &callable, arg0.as_ref()); } Ok(()) @@ -483,7 +483,7 @@ fn callback_call( fn callback_line(py: Python<'_>, code: Bound<'_, PyAny>, lineno: u32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_line(py, &wrapper, lineno); } Ok(()) @@ -493,7 +493,7 @@ fn callback_line(py: Python<'_>, code: Bound<'_, PyAny>, lineno: u32) -> PyResul fn callback_instruction(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: i32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_instruction(py, &wrapper, instruction_offset); } Ok(()) @@ -508,7 +508,7 @@ fn callback_jump( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global .tracer .on_jump(py, &wrapper, instruction_offset, destination_offset); @@ -525,7 +525,7 @@ fn callback_branch( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global .tracer .on_branch(py, &wrapper, instruction_offset, destination_offset); @@ -537,7 +537,7 @@ fn callback_branch( fn callback_py_start(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: i32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_py_start(py, &wrapper, instruction_offset); } Ok(()) @@ -547,7 +547,7 @@ fn callback_py_start(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: fn callback_py_resume(py: Python<'_>, code: Bound<'_, PyAny>, instruction_offset: i32) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_py_resume(py, &wrapper, instruction_offset); } Ok(()) @@ -562,7 +562,7 @@ fn callback_py_return( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_py_return(py, &wrapper, instruction_offset, &retval); } Ok(()) @@ -577,7 +577,7 @@ fn callback_py_yield( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_py_yield(py, &wrapper, instruction_offset, &retval); } Ok(()) @@ -592,7 +592,7 @@ fn callback_py_throw( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_py_throw(py, &wrapper, instruction_offset, &exception); } Ok(()) @@ -607,7 +607,7 @@ fn callback_py_unwind( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_py_unwind(py, &wrapper, instruction_offset, &exception); } Ok(()) @@ -622,7 +622,7 @@ fn callback_raise( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_raise(py, &wrapper, instruction_offset, &exception); } Ok(()) @@ -637,7 +637,7 @@ fn callback_reraise( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global.tracer.on_reraise(py, &wrapper, instruction_offset, &exception); } Ok(()) @@ -652,7 +652,7 @@ fn callback_exception_handled( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global .tracer .on_exception_handled(py, &wrapper, instruction_offset, &exception); @@ -686,7 +686,7 @@ fn callback_c_return( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global .tracer .on_c_return(py, &wrapper, offset, &callable, arg0.as_ref()); @@ -704,7 +704,7 @@ fn callback_c_raise( ) -> PyResult<()> { if let Some(global) = GLOBAL.lock().unwrap().as_mut() { let code = code.downcast::()?; - let wrapper = CodeObjectWrapper::new(py, &code); + let wrapper = CODE_REGISTRY.get_or_insert(py, &code); global .tracer .on_c_raise(py, &wrapper, offset, &callable, arg0.as_ref()); diff --git a/codetracer-python-recorder/tests/code_object_wrapper.rs b/codetracer-python-recorder/tests/code_object_wrapper.rs index 8e7558d..b6c4a17 100644 --- a/codetracer-python-recorder/tests/code_object_wrapper.rs +++ b/codetracer-python-recorder/tests/code_object_wrapper.rs @@ -1,4 +1,4 @@ -use codetracer_python_recorder::CodeObjectWrapper; +use codetracer_python_recorder::{CodeObjectWrapper, CODE_REGISTRY}; use pyo3::prelude::*; use pyo3::types::{PyCode, PyModule}; use std::ffi::CString; @@ -53,3 +53,26 @@ fn wrapper_line_for_offset() { assert_eq!(wrapper.line_for_offset(py, 10_000).unwrap(), last_line); }); } + +#[test] +fn registry_reuses_wrappers() { + Python::with_gil(|py| { + let src = CString::new("def h():\n return 0\n").unwrap(); + let filename = CString::new("").unwrap(); + let module = CString::new("m3").unwrap(); + let m = + PyModule::from_code(py, src.as_c_str(), filename.as_c_str(), module.as_c_str()) + .unwrap(); + let func = m.getattr("h").unwrap(); + let code: Bound<'_, PyCode> = func + .getattr("__code__") + .unwrap() + .clone() + .downcast_into() + .unwrap(); + let w1 = CODE_REGISTRY.get_or_insert(py, &code); + let w2 = CODE_REGISTRY.get_or_insert(py, &code); + assert!(std::sync::Arc::ptr_eq(&w1, &w2)); + CODE_REGISTRY.remove(w1.id()); + }); +}