diff options
author | Matt Mastracci <matthew@mastracci.com> | 2024-02-16 14:22:12 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-16 21:22:12 +0000 |
commit | 67a4231bb62b7839f39bafc9e2e656f43093619b (patch) | |
tree | 428c3db10857a5c1785af56ea8997dff8b2adb0b /cli/tools | |
parent | c92717a1a43d31951cd8a1161afb766ef339aa21 (diff) |
refactor(cli): move op sanitizer to Rust (#22413)
The format of the sanitizers will change a little bit:
- If multiple async ops leak and traces are on, we repeat the async op
header once per stack trace.
- All leaks are aggregated under a "Leaks detected:" banner as the new
timers are eventually going to be added, and these are neither ops nor
resources.
- `1 async op` is now `An async op`
- If ops and resources leak, we show both (rather than op leaks masking
resources)
Follow-on to https://github.com/denoland/deno/pull/22226
Diffstat (limited to 'cli/tools')
-rw-r--r-- | cli/tools/test/fmt.rs | 113 | ||||
-rw-r--r-- | cli/tools/test/mod.rs | 210 |
2 files changed, 260 insertions, 63 deletions
diff --git a/cli/tools/test/fmt.rs b/cli/tools/test/fmt.rs index 931caf147..a185bd04f 100644 --- a/cli/tools/test/fmt.rs +++ b/cli/tools/test/fmt.rs @@ -82,17 +82,23 @@ pub fn format_test_error(js_error: &JsError) -> String { format_js_error(&js_error) } -pub fn format_sanitizer_diff(diff: RuntimeActivityDiff) -> Vec<String> { - let mut output = format_sanitizer_accum(diff.appeared, true); - output.extend(format_sanitizer_accum(diff.disappeared, false)); - output.sort(); - output +pub fn format_sanitizer_diff( + diff: RuntimeActivityDiff, +) -> (Vec<String>, Vec<String>) { + let (mut messages, trailers) = format_sanitizer_accum(diff.appeared, true); + let disappeared = format_sanitizer_accum(diff.disappeared, false); + messages.extend(disappeared.0); + messages.sort(); + let mut trailers = BTreeSet::from_iter(trailers); + trailers.extend(disappeared.1); + (messages, trailers.into_iter().collect::<Vec<_>>()) } fn format_sanitizer_accum( activities: Vec<RuntimeActivity>, appeared: bool, -) -> Vec<String> { +) -> (Vec<String>, Vec<String>) { + // Aggregate the sanitizer information let mut accum = HashMap::new(); for activity in activities { let item = format_sanitizer_accum_item(activity); @@ -100,21 +106,46 @@ fn format_sanitizer_accum( } let mut output = vec![]; - for ((item_type, item_name), count) in accum.into_iter() { + let mut needs_trace_ops = false; + for ((item_type, item_name, trace), count) in accum.into_iter() { if item_type == RuntimeActivityType::Resource { - // TODO(mmastrac): until we implement the new timers and op sanitization, these must be ignored in this path - if item_name == "timer" { - continue; - } let (name, action1, action2) = pretty_resource_name(&item_name); let hint = resource_close_hint(&item_name); - if appeared { - output.push(format!("{name} was {action1} during the test, but not {action2} during the test. {hint}")); + let value = if appeared { + format!("{name} was {action1} during the test, but not {action2} during the test. {hint}") } else { - output.push(format!("{name} was {action1} before the test started, but was {action2} during the test. \ - Do not close resources in a test that were not created during that test.")); - } + format!("{name} was {action1} before the test started, but was {action2} during the test. \ + Do not close resources in a test that were not created during that test.") + }; + output.push(value); + } else if item_type == RuntimeActivityType::AsyncOp { + let (count_str, plural, tense) = if count == 1 { + (Cow::Borrowed("An"), "", "was") + } else { + (Cow::Owned(count.to_string()), "s", "were") + }; + let phrase = if appeared { + "started in this test, but never completed" + } else { + "started before the test, but completed during the test. Async operations should not complete in a test if they were not started in that test" + }; + let mut value = if let Some([operation, hint]) = + OP_DETAILS.get(&item_name) + { + format!("{count_str} async operation{plural} to {operation} {tense} {phrase}. This is often caused by not {hint}.") + } else { + format!( + "{count_str} async call{plural} to {item_name} {tense} {phrase}." + ) + }; + value += &if let Some(trace) = trace { + format!(" The operation {tense} started here:\n{trace}") + } else { + needs_trace_ops = true; + String::new() + }; + output.push(value); } else { // TODO(mmastrac): this will be done in a later PR unimplemented!( @@ -125,18 +156,25 @@ fn format_sanitizer_accum( ); } } - output + if needs_trace_ops { + (output, vec!["To get more details where ops were leaked, run again with --trace-ops flag.".to_owned()]) + } else { + (output, vec![]) + } } fn format_sanitizer_accum_item( activity: RuntimeActivity, -) -> (RuntimeActivityType, Cow<'static, str>) { +) -> (RuntimeActivityType, Cow<'static, str>, Option<String>) { let activity_type = activity.activity(); match activity { - RuntimeActivity::AsyncOp(_, name, _) => (activity_type, name.into()), - RuntimeActivity::Interval(_) => (activity_type, "".into()), - RuntimeActivity::Resource(_, name) => (activity_type, name.into()), - RuntimeActivity::Timer(_) => (activity_type, "".into()), + // TODO(mmastrac): OpCallTrace needs to be Eq + RuntimeActivity::AsyncOp(_, name, trace) => { + (activity_type, name.into(), trace.map(|x| x.to_string())) + } + RuntimeActivity::Interval(_) => (activity_type, "".into(), None), + RuntimeActivity::Resource(_, name) => (activity_type, name.into(), None), + RuntimeActivity::Timer(_) => (activity_type, "".into(), None), } } @@ -215,9 +253,6 @@ fn resource_close_hint(name: &str) -> &'static str { } } -// An async operation to $0 was started in this test, but never completed. This is often caused by not $1. -// An async operation to $0 was started in this test, but never completed. Async operations should not complete in a test if they were not started in that test. -// deno-fmt-ignore pub const OP_DETAILS: phf::Map<&'static str, [&'static str; 2]> = phf_map! { "op_blob_read_part" => ["read from a Blob or File", "awaiting the result of a Blob or File read"], "op_broadcast_recv" => ["receive a message from a BroadcastChannel", "closing the BroadcastChannel"], @@ -295,3 +330,31 @@ pub const OP_DETAILS: phf::Map<&'static str, [&'static str; 2]> = phf_map! { "op_ws_send_ping" => ["send a message on a WebSocket", "closing a `WebSocket` or `WebSocketStream`"], "op_spawn_wait" => ["wait for a subprocess to exit", "awaiting the result of a `Deno.Process#status` call"], }; + +#[cfg(test)] +mod tests { + use deno_core::stats::RuntimeActivity; + + macro_rules! leak_format_test { + ($name:ident, $appeared:literal, [$($activity:expr),*], $expected:literal) => { + #[test] + fn $name() { + let (leaks, trailer_notes) = super::format_sanitizer_accum(vec![$($activity),*], $appeared); + let mut output = String::new(); + for leak in leaks { + output += &format!(" - {leak}\n"); + } + for trailer in trailer_notes { + output += &format!("{trailer}\n"); + } + assert_eq!(output, $expected); + } + } + } + + // https://github.com/denoland/deno/issues/13729 + // https://github.com/denoland/deno/issues/13938 + leak_format_test!(op_unknown, true, [RuntimeActivity::AsyncOp(0, "op_unknown", None)], + " - An async call to op_unknown was started in this test, but never completed.\n\ + To get more details where ops were leaked, run again with --trace-ops flag.\n"); +} diff --git a/cli/tools/test/mod.rs b/cli/tools/test/mod.rs index 2cf663b5f..6e22b894a 100644 --- a/cli/tools/test/mod.rs +++ b/cli/tools/test/mod.rs @@ -34,13 +34,15 @@ use deno_core::error::AnyError; use deno_core::error::JsError; use deno_core::futures::future; use deno_core::futures::stream; -use deno_core::futures::task::noop_waker; use deno_core::futures::FutureExt; use deno_core::futures::StreamExt; use deno_core::located_script_name; use deno_core::parking_lot::Mutex; use deno_core::serde_v8; +use deno_core::stats::RuntimeActivity; +use deno_core::stats::RuntimeActivityDiff; use deno_core::stats::RuntimeActivityStats; +use deno_core::stats::RuntimeActivityStatsFactory; use deno_core::stats::RuntimeActivityStatsFilter; use deno_core::unsync::spawn; use deno_core::unsync::spawn_blocking; @@ -68,6 +70,7 @@ use std::collections::BTreeSet; use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Write as _; +use std::future::poll_fn; use std::io::Read; use std::io::Write; use std::num::NonZeroUsize; @@ -76,7 +79,7 @@ use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; use std::sync::Arc; -use std::task::Context; +use std::task::Poll; use std::time::Duration; use std::time::Instant; use std::time::SystemTime; @@ -98,6 +101,9 @@ use reporters::PrettyTestReporter; use reporters::TapTestReporter; use reporters::TestReporter; +/// How many times we're allowed to spin the event loop before considering something a leak. +const MAX_SANITIZER_LOOP_SPINS: usize = 16; + /// The test mode is used to determine how a specifier is to be tested. #[derive(Debug, Clone, Eq, PartialEq)] pub enum TestMode { @@ -210,8 +216,7 @@ pub enum TestFailure { JsError(Box<JsError>), FailedSteps(usize), IncompleteSteps, - LeakedOps(Vec<String>, bool), // Details, isOpCallTracingEnabled - LeakedResources(Vec<String>), // Details + Leaked(Vec<String>, Vec<String>), // Details, trailer notes // The rest are for steps only. Incomplete, OverlapsWithSanitizers(IndexSet<String>), // Long names of overlapped tests @@ -226,20 +231,13 @@ impl ToString for TestFailure { TestFailure::FailedSteps(n) => format!("{} test steps failed.", n), TestFailure::IncompleteSteps => "Completed while steps were still running. Ensure all steps are awaited with `await t.step(...)`.".to_string(), TestFailure::Incomplete => "Didn't complete before parent. Await step with `await t.step(...)`.".to_string(), - TestFailure::LeakedOps(details, is_op_call_tracing_enabled) => { - let mut string = "Leaking async ops:".to_string(); + TestFailure::Leaked(details, trailer_notes) => { + let mut string = "Leaks detected:".to_string(); for detail in details { - string.push_str(&format!("\n - {}", detail)); + string.push_str(&format!("\n - {detail}")); } - if !is_op_call_tracing_enabled { - string.push_str("\nTo get more details where ops were leaked, run again with --trace-ops flag."); - } - string - } - TestFailure::LeakedResources(details) => { - let mut string = "Leaking resources:".to_string(); - for detail in details { - string.push_str(&format!("\n - {}", detail)); + for trailer in trailer_notes { + string.push_str(&format!("\n{trailer}")); } string } @@ -540,6 +538,25 @@ pub fn worker_has_tests(worker: &mut MainWorker) -> bool { !state.borrow::<ops::testing::TestContainer>().0.is_empty() } +/// Yields to tokio to allow async work to process, and then polls +/// the event loop once. +#[must_use = "The event loop result should be checked"] +pub async fn poll_event_loop(worker: &mut MainWorker) -> Result<(), AnyError> { + // Allow any ops that to do work in the tokio event loop to do so + tokio::task::yield_now().await; + // Spin the event loop once + poll_fn(|cx| { + if let Poll::Ready(Err(err)) = worker + .js_runtime + .poll_event_loop(cx, PollEventLoopOptions::default()) + { + return Poll::Ready(Err(err)); + } + Poll::Ready(Ok(())) + }) + .await +} + pub async fn run_tests_for_worker( worker: &mut MainWorker, specifier: &ModuleSpecifier, @@ -574,11 +591,43 @@ pub async fn run_tests_for_worker( }))?; let mut had_uncaught_error = false; let stats = worker.js_runtime.runtime_activity_stats_factory(); + let ops = worker.js_runtime.op_names(); + + // These particular ops may start and stop independently of tests, so we just filter them out + // completely. + let op_id_host_recv_message = ops + .iter() + .position(|op| *op == "op_host_recv_message") + .unwrap(); + let op_id_host_recv_ctrl = ops + .iter() + .position(|op| *op == "op_host_recv_ctrl") + .unwrap(); + + // For consistency between tests with and without sanitizers, we _always_ include + // the actual sanitizer capture before and after a test, but a test that ignores resource + // or op sanitization simply doesn't throw if one of these constraints is violated. + let mut filter = RuntimeActivityStatsFilter::default(); + filter = filter.with_resources(); + filter = filter.with_ops(); + filter = filter.omit_op(op_id_host_recv_ctrl as _); + filter = filter.omit_op(op_id_host_recv_message as _); for (desc, function) in tests { if fail_fast_tracker.should_stop() { break; } + + // Each test needs a fresh reqwest connection pool to avoid inter-test weirdness with connections + // failing. If we don't do this, a connection to a test server we just tore down might be re-used in + // the next test. + // TODO(mmastrac): this should be some sort of callback that we can implement for any subsystem + worker + .js_runtime + .op_state() + .borrow_mut() + .try_take::<deno_runtime::deno_fetch::reqwest::Client>(); + if desc.ignore { sender.send(TestEvent::Result(desc.id, TestResult::Ignored, 0))?; continue; @@ -593,24 +642,10 @@ pub async fn run_tests_for_worker( // responded to settle. // TODO(mmastrac): we should provide an API to poll the event loop until no futher // progress is made. - { - let waker = noop_waker(); - let mut cx = Context::from_waker(&waker); - let _ = worker - .js_runtime - .poll_event_loop(&mut cx, PollEventLoopOptions::default()); - } + poll_event_loop(worker).await?; - let mut filter = RuntimeActivityStatsFilter::default(); - if desc.sanitize_resources { - filter = filter.with_resources(); - } - - let before = if !filter.is_empty() { - Some(stats.clone().capture(&filter)) - } else { - None - }; + // We always capture stats, regardless of sanitization state + let before = stats.clone().capture(&filter); let earlier = SystemTime::now(); let call = worker.js_runtime.call(&function); @@ -635,12 +670,21 @@ pub async fn run_tests_for_worker( } } }; - if let Some(before) = before { - let after = stats.clone().capture(&filter); - let diff = RuntimeActivityStats::diff(&before, &after); - let formatted = format_sanitizer_diff(diff); + + // Await activity stabilization + if let Some(diff) = wait_for_activity_to_stabilize( + worker, + &stats, + &filter, + before, + desc.sanitize_ops, + desc.sanitize_resources, + ) + .await? + { + let (formatted, trailer_notes) = format_sanitizer_diff(diff); if !formatted.is_empty() { - let failure = TestFailure::LeakedResources(formatted); + let failure = TestFailure::Leaked(formatted, trailer_notes); let elapsed = SystemTime::now().duration_since(earlier)?.as_millis(); sender.send(TestEvent::Result( desc.id, @@ -663,6 +707,96 @@ pub async fn run_tests_for_worker( Ok(()) } +/// Removes timer resources and op_sleep_interval calls. When an interval is started before a test +/// and resolves during a test, there's a false alarm. +fn preprocess_timer_activity(activities: &mut Vec<RuntimeActivity>) { + // TODO(mmastrac): Once we get to the new timer implementation, all of this + // code can go away and be replaced by a proper timer sanitizer. + let mut timer_resource_leaked = false; + + // First, search for any timer resources which will indicate that we have an interval leak + activities.retain(|activity| { + if let RuntimeActivity::Resource(_, name) = activity { + if name == "timer" { + timer_resource_leaked = true; + return false; + } + } + true + }); + + // If we've leaked a timer resource, we un-mute op_sleep_interval calls. Otherwise, we remove + // them. + if !timer_resource_leaked { + activities.retain(|activity| { + if let RuntimeActivity::AsyncOp(_, op, _) = activity { + *op != "op_sleep_interval" + } else { + true + } + }) + } +} + +async fn wait_for_activity_to_stabilize( + worker: &mut MainWorker, + stats: &RuntimeActivityStatsFactory, + filter: &RuntimeActivityStatsFilter, + before: RuntimeActivityStats, + sanitize_ops: bool, + sanitize_resources: bool, +) -> Result<Option<RuntimeActivityDiff>, AnyError> { + // First, check to see if there's any diff at all. If not, just continue. + let after = stats.clone().capture(filter); + let mut diff = RuntimeActivityStats::diff(&before, &after); + preprocess_timer_activity(&mut diff.appeared); + preprocess_timer_activity(&mut diff.disappeared); + if diff.appeared.is_empty() && diff.disappeared.is_empty() { + // No activity, so we return early + return Ok(None); + } + + // We allow for up to MAX_SANITIZER_LOOP_SPINS to get to a point where there is no difference. + // TODO(mmastrac): We could be much smarter about this if we had the concept of "progress" in + // an event loop tick. Ideally we'd be able to tell if we were spinning and doing nothing, or + // spinning and resolving ops. + for _ in 0..MAX_SANITIZER_LOOP_SPINS { + // There was a diff, so let the event loop run once + poll_event_loop(worker).await?; + + let after = stats.clone().capture(filter); + diff = RuntimeActivityStats::diff(&before, &after); + preprocess_timer_activity(&mut diff.appeared); + preprocess_timer_activity(&mut diff.disappeared); + if diff.appeared.is_empty() && diff.disappeared.is_empty() { + return Ok(None); + } + } + + if !sanitize_ops { + diff + .appeared + .retain(|activity| !matches!(activity, RuntimeActivity::AsyncOp(..))); + diff + .disappeared + .retain(|activity| !matches!(activity, RuntimeActivity::AsyncOp(..))); + } + if !sanitize_resources { + diff + .appeared + .retain(|activity| !matches!(activity, RuntimeActivity::Resource(..))); + diff + .disappeared + .retain(|activity| !matches!(activity, RuntimeActivity::Resource(..))); + } + + Ok(if diff.appeared.is_empty() && diff.disappeared.is_empty() { + None + } else { + Some(diff) + }) +} + fn extract_files_from_regex_blocks( specifier: &ModuleSpecifier, source: &str, |