-
Notifications
You must be signed in to change notification settings - Fork 2
Added support for traceCallback #28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e99954c
d68d201
5870dbd
19ffd6b
6f81ae8
1c34a9a
ffbdfb9
f05c0f3
d3fecdb
1520740
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ pub(crate) enum FunctionType { | |
pub enum FunctionKind { | ||
Sync, | ||
Async, | ||
Callback, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that the After this change, the callback position config can only be passed when you're actually setting up a callback hook. How it currently stands you can pass a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It'd be ignored.
Sorry, I don't understand any of that. As far as I can tell, the |
||
} | ||
|
||
impl FunctionKind { | ||
|
@@ -25,11 +26,17 @@ impl FunctionKind { | |
matches!(self, FunctionKind::Async) | ||
} | ||
|
||
#[must_use] | ||
pub fn is_callback(&self) -> bool { | ||
matches!(self, FunctionKind::Callback) | ||
} | ||
|
||
#[must_use] | ||
pub fn tracing_operator(&self) -> &'static str { | ||
match self { | ||
FunctionKind::Sync => "traceSync", | ||
FunctionKind::Async => "tracePromise", | ||
FunctionKind::Callback => "traceCallback", | ||
} | ||
} | ||
} | ||
|
@@ -43,7 +50,7 @@ impl FunctionKind { | |
#[derive(Debug, Clone)] | ||
pub enum FunctionQuery { | ||
// The order here matters because this enum is untagged, serde will try | ||
// choose the first variant that matches the data. | ||
// to choose the first variant that matches the data. | ||
ClassMethod { | ||
class_name: String, | ||
method_name: String, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -119,19 +119,20 @@ impl Instrumentation { | |
ctxt: SyntaxContext::empty(), | ||
stmts: vec![ | ||
quote!("const __apm$wrapped = $wrapped;" as Stmt, wrapped: Expr = wrapped_fn.into()), | ||
quote!("return __apm$wrapped.apply(null, __apm$original_args);" as Stmt), | ||
// TODO: instead of a static `this` we should be able to declare the value | ||
quote!("return __apm$wrapped.apply(this, __apm$original_args);" as Stmt), | ||
Comment on lines
+122
to
+123
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe should've been a separate PR, but binding to |
||
], | ||
}; | ||
|
||
let traced_fn = self.new_fn(traced_body, vec![]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so i was testing all of this out and I found that This is a test that asserts it's doing the right thing: const diagnostics_channel = require('node:diagnostics_channel');
const channels = diagnostics_channel.tracingChannel('my-channel');
const { AsyncLocalStorage } = require('node:async_hooks');
const assert = require('node:assert')
const myStore = new AsyncLocalStorage();
let expectedCalls = 0
// The start channel sets the initial store data to something
// and stores that store data value on the trace context object
channels.start.bindStore(myStore, (data) => {
console.log('calling start store')
expectedCalls++
return data
});
channels.asyncStart.bindStore(myStore, (data) => {
console.log('calling async start store')
expectedCalls++
return data
});
channels.subscribe({
start() {
console.log('calling start')
expectedCalls++
},
end() {
console.log('calling end')
expectedCalls++
},
error() {
console.log('calling error')
expectedCalls++
},
asyncStart() {
console.log('calling asyncStart')
expectedCalls++
},
asyncEnd() {
console.log('calling asyncEnd')
expectedCalls++
}
})
function test() {
const origArgs = arguments
function traced() {
function wrapped(name, obj, cb) {
console.log('cb name should be wrapped', cb.name)
assert.equal(cb.name, 'wrappedCallback')
cb(null, 'test')
}
return wrapped.apply(this, arguments)
}
return channels.traceCallback(traced, -1, { some: 'thing'}, this, ...origArgs)
}
test('hi', { key: 'value'}, (err, data) => {
assert.deepEqual(err, null)
assert.equal(data, 'test')
setImmediate(() => {
assert.equal(expectedCalls, 6)
})
}) but if you change the wraps to arrow functions, like this repo does, they fail. |
||
|
||
let id_name = self.config.get_identifier_name(); | ||
let ch_ident = ident!(format!("tr_ch_apm${}", &id_name)); | ||
let trace_ident = ident!(format!( | ||
"tr_ch_apm${}.{}", | ||
let trace_statement = construct_trace_statement( | ||
&self.config, | ||
&id_name, | ||
self.config.function_query.kind().tracing_operator() | ||
)); | ||
self.module_version.clone().unwrap_or_default().as_str(), | ||
); | ||
|
||
body.stmts = vec![ | ||
quote!("const __apm$original_args = arguments" as Stmt), | ||
|
@@ -140,18 +141,7 @@ impl Instrumentation { | |
"if (!$ch.hasSubscribers) return __apm$traced();" as Stmt, | ||
ch = ch_ident | ||
), | ||
match &self.module_version { | ||
Some(version) => quote!( | ||
"return $trace(__apm$traced, { arguments, self: this, moduleVersion: $version } );" | ||
as Stmt, | ||
trace = trace_ident, | ||
version: Expr = version.as_str().into(), | ||
), | ||
None => quote!( | ||
"return $trace(__apm$traced, { arguments, self: this } );" as Stmt, | ||
trace = trace_ident, | ||
), | ||
}, | ||
trace_statement, | ||
]; | ||
|
||
self.has_injected = true; | ||
|
@@ -235,7 +225,7 @@ impl Instrumentation { | |
} | ||
|
||
// The rest of these functions are from `VisitMut`, except they return a boolean to indicate | ||
// whether recusrsing through the tree is necessary, rather than calling | ||
// whether recursing through the tree is necessary, rather than calling | ||
// `visit_mut_children_with`. | ||
|
||
pub fn visit_mut_module(&mut self, node: &mut Module) -> bool { | ||
|
@@ -356,7 +346,7 @@ impl Instrumentation { | |
|
||
pub fn visit_mut_assign_expr(&mut self, node: &mut AssignExpr) -> bool { | ||
// TODO(bengl) This is by far the hardest bit. We're trying to infer a name for this | ||
// function expresion using the surrounding code, but it's not always possible, and even | ||
// function expression using the surrounding code, but it's not always possible, and even | ||
// where it is, there are so many ways to give a function expression a "name", that the | ||
// code paths here can get pretty hairy. Right now this is only covering some basic cases. | ||
// The following cases are missing: | ||
|
@@ -398,3 +388,33 @@ pub fn get_script_start_index(script: &Script) -> usize { | |
} | ||
0 | ||
} | ||
|
||
/// Builds a tracing channel trace invocation. For example, the result may | ||
/// look like: | ||
/// | ||
/// ```js | ||
/// return tr_ch_amp$myChannel.traceSync(__apm$traced, { arguments, self: this, moduleVersion: "1.0.0" }) | ||
/// ``` | ||
#[rustfmt::skip] | ||
fn construct_trace_statement(config: &InstrumentationConfig, channel_name: &str, mod_version: &str) -> Stmt { | ||
let mut ctx = "{ arguments, self: this }".to_string(); | ||
if mod_version.is_empty() == false { | ||
ctx = ["{ arguments, self: this, moduleVersion: \"", mod_version, "\" }"].join(""); | ||
} | ||
|
||
let operator = ["tr_ch_apm$", channel_name, ".", config.function_query.kind().tracing_operator()].join(""); | ||
#[allow(clippy::needless_late_init)] | ||
let stmt_str; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In Rust all statements return a value so it's much cleaner to just: let stmt_str = if config.function_query.kind().is_callback() {
// TODO: figure out how we can pass a `this` argument
let pos = config.callback_config.position.to_string();
["return ", &*operator, "(__apm$traced, ", pos.as_str(), ", ", ctx.as_str(), ", this, ...arguments)"].join("")
} else {
["return ", &*operator, "(__apm$traced, ", ctx.as_str(), ")"].join("")
} If the callback config gets moved to let stmt_str = match config.function_query.kind() {
FunctionKind::Callback { position } => {
format!("return {operator}(__apm$traced, {position}, {ctx}, this, ...arguments)")
},
_ => format!("return {operator}(__apm$traced, {ctx})")
}; Prefer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think all of that is way more difficult to read. The code in the PR is as legible as I can make it in this language. |
||
if config.function_query.kind().is_callback() == true { | ||
// TODO: figure out how we can pass a `this` argument | ||
let pos = config.callback_config.position.to_string(); | ||
stmt_str = ["return ", &*operator, "(__apm$traced, ", pos.as_str(), ", ", ctx.as_str(), ", this, ...arguments)"].join(""); | ||
} else { | ||
stmt_str = ["return ", &*operator, "(__apm$traced, ", ctx.as_str(), ")"].join(""); | ||
} | ||
|
||
return quote!( | ||
"$stmt" as Stmt, | ||
stmt = Ident::new(Atom::from(stmt_str), Span::default(), SyntaxContext::empty()) | ||
) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,8 @@ | |
#![deny(clippy::complexity)] | ||
#![deny(clippy::correctness)] | ||
#![deny(clippy::unwrap_used)] | ||
#![allow(clippy::bool_comparison)] | ||
#![allow(clippy::needless_return)] | ||
Comment on lines
+16
to
+17
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree about However, it's hard to be "completely against implicit returns" in Rust because every statement in Rust implicitly returns. Even if you don't use the return keyword or return anything, the statement will implicitly return the unit type (). For example adding return to these three functions adds nothing but noise: impl FunctionKind {
#[must_use]
pub fn is_async(&self) -> bool {
matches!(self, FunctionKind::Async)
}
#[must_use]
pub fn is_callback(&self) -> bool {
matches!(self, FunctionKind::Callback { .. })
}
#[must_use]
pub fn tracing_operator(&self) -> &'static str {
match self {
FunctionKind::Sync => "traceSync",
FunctionKind::Async => "tracePromise",
FunctionKind::Callback { .. } => "traceCallback",
}
}
} You could write pub fn tracing_operator(&self) -> &'static str {
return match self {
FunctionKind::Sync => return "traceSync",
FunctionKind::Async => return "tracePromise",
FunctionKind::Callback { .. } => return "traceCallback",
};
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because an implicit return is mystical. I do not agree with your assessment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code I would write is: impl FunctionKind {
#[must_use]
pub fn is_async(&self) -> bool {
return matches!(self, FunctionKind::Async)
}
#[must_use]
pub fn is_callback(&self) -> bool {
return matches!(self, FunctionKind::Callback { .. })
}
#[must_use]
pub fn tracing_operator(&self) -> &'static str {
return match self {
FunctionKind::Sync => "traceSync",
FunctionKind::Async => "tracePromise",
FunctionKind::Callback { .. } => "traceCallback",
}
}
} A lambda function returning it's single statement is okay. I don't know how to put it except that for the others, it's akin to leaving off the braces on conditional statements. That is, people would rightly balk at: if (something == true)
foo We can illustrate further: function isSomething(input) {
if (input == 'some test')
true
false
} In my opinion, it's much clearer and less prone to error in the future to write: function isSomething(input) {
if (input == 'some test') {
return true
}
return false
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either way, this PR is not the right place to propose changing major lints and coding idioms of this repository. That should be proposed in another PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR doesn't propose anything that requires modifications to existing source code. |
||
|
||
/** | ||
* Unless explicitly stated otherwise all files in this repository are licensed under the Apache-2.0 License. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Unit Tests | ||
|
||
Unit tests in this project follow a specific structure: | ||
|
||
1. A directory named the same as the unit test. For example, | ||
the `index_cjs` test will be within the `index_cjs/` directory. | ||
2. A `mod.rs` file that contains the unit test. | ||
3. A `mod.{js,mjs}` file that contains a JavaScript script to apply | ||
the test against, i.e. this replicates a potential module from | ||
npmjs.com. | ||
4. A `test.js` file that exercises the patched code. | ||
5. A new `mod` line in the [`instrumentor_test.rs`](./instrumentor_test.rs) | ||
file. | ||
|
||
To run a specific test from the command line: | ||
|
||
```sh | ||
# To run the index_cjs test only: | ||
cargo test index_cjs | ||
``` | ||
|
||
Each unit test should utilize the `transpile_and_test` function | ||
exported by [`common/mod.rs`](./common/mod.rs). This function: | ||
|
||
1. Reads in the unit test file to get the base path to the test. | ||
2. Reads in the JavaScript module file. | ||
3. Transforms the read-in JavaScript according to the defined configuration. | ||
4. Generates a new `instrumented.js` file within the test directory. This | ||
file contains the `mod.{js,mjs}` code patched by our tool. | ||
5. Runs `node test.js` within the test directory. The result of this test | ||
should be a `0` exit code for the process. | ||
|
||
## Running In A Debugger | ||
|
||
### RustRover | ||
|
||
To run a specific test through the [RustRover](https://www.jetbrains.com/rust/) | ||
debugger: | ||
|
||
1. Create a new run configuration of type "Cargo". | ||
2. For the "command", enter (e.g. for the `index_cjs` test): | ||
|
||
test --package orchestrion-js --test instrumentor_test index_cjs::index_cjs -- --exact | ||
3. Set breakpoints and run the profile. | ||
|
||
For a shortcut, open the desired `mod.rs` test file and click the "run" | ||
button in the gutter. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
'use strict' | ||
|
||
module.exports = Foo | ||
|
||
function Foo() {} | ||
|
||
Foo.prototype.doWork = function doWork(input, callback) { | ||
if (input === 'fail') { | ||
return callback(Error('boom')) | ||
} | ||
return callback(null, 'done') | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
use crate::common::*; | ||
use orchestrion_js::*; | ||
|
||
#[test] | ||
fn callback_cjs() { | ||
transpile_and_test_with_imports( | ||
file!(), | ||
false, | ||
Config::new_single(InstrumentationConfig::new_with_callback_config( | ||
"foo_ch", | ||
ModuleMatcher::new("foo", ">=1.0.0", "index.js").unwrap(), | ||
FunctionQuery::FunctionExpression { | ||
expression_name: "doWork".to_string(), | ||
kind: FunctionKind::Callback, | ||
index: 0, | ||
}, | ||
CallbackConfig { position: 1 }, | ||
)), | ||
&[PackageImport { | ||
module_name: "foo".to_string(), | ||
module_version: "1.1.1".to_string(), | ||
file: "index.js".to_string(), | ||
}], | ||
) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
'use strict' | ||
|
||
const assert = require('node:assert') | ||
const { getContext } = require('../common/preamble.js'); | ||
const context = getContext('orchestrion:foo:foo_ch'); | ||
const Foo = require('./instrumented.js') | ||
|
||
const start = process.hrtime.bigint() | ||
const timer1 = setInterval(timeout, 1_000) | ||
const timer2 = setInterval(timeout, 1_000) | ||
const foo = new Foo() | ||
|
||
foo.doWork('success', (error, result) => { | ||
assert.equal(error, undefined) | ||
assert.equal(result, 'done') | ||
|
||
assert.deepStrictEqual(context, { | ||
start: true | ||
}) | ||
delete context.start | ||
|
||
|
||
clearInterval(timer1) | ||
}) | ||
|
||
foo.doWork('fail', (error, result) => { | ||
assert.equal(error.message, 'boom') | ||
assert.equal(undefined, result) | ||
|
||
assert.deepStrictEqual(context, { | ||
end: true, | ||
start: true | ||
}) | ||
delete context.end | ||
delete context.start | ||
|
||
clearInterval(timer2) | ||
}) | ||
|
||
function timeout() { | ||
const current = process.hrtime.bigint() | ||
if ((current - start) > 3e+10) { | ||
// Exceeded 30 seconds, terminate it all. | ||
clearInterval(timer1) | ||
clearInterval(timer2) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,14 +12,43 @@ use swc::config::IsModule; | |
static TEST_MODULE_NAME: &str = "undici"; | ||
static TEST_MODULE_PATH: &str = "index.mjs"; | ||
|
||
/// PackageImport represents metadata around a module that we are pretending | ||
/// was imported from a npm module. So, if we are pretending to test | ||
/// instrumentation of a module `@foo/bar`, the `module_name` would be | ||
/// `@foo/bar`, the `module_version` would be a simple version string like | ||
/// `0.1.0`, and the `file` would be some filename within that package like | ||
/// `index.js` or `lib/utils.js`. | ||
/// | ||
/// This information will be used to match a unit test to the instrumentation. | ||
pub struct PackageImport { | ||
pub module_name: String, | ||
pub module_version: String, | ||
pub file: String, | ||
} | ||
|
||
pub fn transpile_and_test(test_file: &str, mjs: bool, config: Config) { | ||
transpile_and_test_with_imports(test_file, mjs, config, &[]); | ||
} | ||
|
||
#[rustfmt::skip] | ||
pub fn transpile_and_test_with_imports(test_file: &str, mjs: bool, config: Config, imports: &[PackageImport]) { | ||
let test_file = PathBuf::from(test_file); | ||
let test_dir = test_file.parent().expect("Couldn't find test directory"); | ||
|
||
let file_path = PathBuf::from("index.mjs"); | ||
let instrumentor = Instrumentor::new(config); | ||
let mut instrumentations = | ||
let mut instrumentations; | ||
if imports.len() > 0 { | ||
let import = &imports[0]; | ||
instrumentations = instrumentor.get_matching_instrumentations( | ||
import.module_name.as_str(), | ||
import.module_version.as_str(), | ||
&PathBuf::from(&import.file) | ||
); | ||
Comment on lines
+42
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should really have a loop here to support more than one instrumentation at a time. But all of the existing tests utilize a single instrumentation. So I have left this as an exercise for later. |
||
} else { | ||
instrumentations = | ||
instrumentor.get_matching_instrumentations(TEST_MODULE_NAME, "0.0.1", &file_path); | ||
} | ||
|
||
let extension = if mjs { "mjs" } else { "js" }; | ||
let instrumentable = test_dir.join(format!("mod.{extension}")); | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should probably be an
Option<CallbackConfig>
.Option
can beNone
orSome<T>
. That way if it's not supplied, deserialisation will automatically make itNone
and you don't need the serde default.Having said that, see my other comment below because I'm not convinced this config should be here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I expect that would make dealing with the value more difficult in the code where it is actually used, e.g. when building the JavaScript string to inject into the script being instrumented. In fact, I am confident of it because I encountered the difficulty when dealing with
module_version
:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should not be taking the default of
self.module_version
anyway as this will be an empty string.You should change
construct_trace_statement
to takemod_version: Option<&str>
and then construct the JavaScript code differently depending on whether themodule_version
actually exists:Then you can call
construct_trace_statement
like this:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Checking for the empty string is way easier all around.