feat(cli): evaluate code snippets in JSDoc and markdown (#25220)

This commit lets `deno test --doc` command actually evaluate code snippets in JSDoc and markdown files. ## How it works 1. Extract code snippets from JSDoc or code fences 2. Convert them into pseudo files by wrapping them in `Deno.test(...)` 3. Register the pseudo files as in-memory files 4. Run type-check and evaluation We apply some magic at the step 2 - let's say we have the following file named `mod.ts` as an input: ````ts /** * ```ts * import { assertEquals } from "jsr:@std/assert/equals"; * * assertEquals(add(1, 2), 3); * ``` */ export function add(a: number, b: number) { return a + b; } ```` This is virtually transformed into: ```ts import { assertEquals } from "jsr:@std/assert/equals"; import { add } from "files:///path/to/mod.ts"; Deno.test("mod.ts$2-7.ts", async () => { assertEquals(add(1, 2), 3); }); ``` Note that a new import statement is inserted here to make `add` function available. In a nutshell, all items exported from `mod.ts` become available in the generated pseudo file with this automatic import insertion. The intention behind this design is that, from library user's standpoint, it should be very obvious that this `add` function is what this example code is attached to. Also, if there is an explicit import statement like `import { add } from "./mod.ts"`, this import path `./mod.ts` is not helpful for doc readers because they will need to import it in a different way. The automatic import insertion has some edge cases, in particular where there is a local variable in a snippet with the same name as one of the exported items. This case is addressed by employing swc's scope analysis (see test cases for more details). ## "type-checking only" mode stays around This change will likely impact a lot of existing doc tests in the ecosystem because some doc tests rely on the fact that they are not evaluated - some cause side effects if executed, some throw errors at runtime although they do pass the type check, etc. To help those tests gradually transition to the ones runnable with the new `deno test --doc`, we will keep providing the ability to run type-checking only via `deno check --doc`. Additionally there is a `--doc-only` option added to the `check` subcommand too, which is useful when you want to type-check on code snippets in markdown files, as normal `deno check` command doesn't accept markdown. ## Demo https://github.com/user-attachments/assets/47e9af73-d16e-472d-b09e-1853b9e8f5ce --- Closes #4716
author: Yusuke Tanaka <yusuktan@maguro.dev> 2024-09-18 13:35:48 +0900
committer: GitHub <noreply@github.com> 2024-09-17 21:35:48 -0700
commit: d5c00ef50e6519fccde54a577e038f0ebb1282e9 (patch)
tree: 9429c5f09c6969fd8236041f48b354a9b0841e1f /cli/tools
parent: 37315917625179063cb5653e2edd4ee0e5de99c5 (diff)
2 files changed, 123 insertions, 259 deletions
diff --git a/cli/tools/check.rs b/cli/tools/check.rs
index d50af5230..9c464fa16 100644
--- a/cli/tools/check.rs
+++ b/cli/tools/check.rs
@@ -15,7 +15,9 @@ use once_cell::sync::Lazy;
 use regex::Regex;
 
 use crate::args::check_warn_tsconfig;
+use crate::args::CheckFlags;
 use crate::args::CliOptions;
+use crate::args::Flags;
 use crate::args::TsConfig;
 use crate::args::TsConfigType;
 use crate::args::TsTypeLib;
@@ -24,13 +26,57 @@ use crate::cache::CacheDBHash;
 use crate::cache::Caches;
 use crate::cache::FastInsecureHasher;
 use crate::cache::TypeCheckCache;
+use crate::factory::CliFactory;
 use crate::graph_util::BuildFastCheckGraphOptions;
 use crate::graph_util::ModuleGraphBuilder;
 use crate::npm::CliNpmResolver;
 use crate::tsc;
 use crate::tsc::Diagnostics;
+use crate::util::extract;
 use crate::util::path::to_percent_decoded_str;
 
+pub async fn check(
+  flags: Arc<Flags>,
+  check_flags: CheckFlags,
+) -> Result<(), AnyError> {
+  let factory = CliFactory::from_flags(flags);
+
+  let main_graph_container = factory.main_module_graph_container().await?;
+
+  let specifiers =
+    main_graph_container.collect_specifiers(&check_flags.files)?;
+  if specifiers.is_empty() {
+    log::warn!("{} No matching files found.", colors::yellow("Warning"));
+  }
+
+  let specifiers_for_typecheck = if check_flags.doc || check_flags.doc_only {
+    let file_fetcher = factory.file_fetcher()?;
+
+    let mut specifiers_for_typecheck = if check_flags.doc {
+      specifiers.clone()
+    } else {
+      vec![]
+    };
+
+    for s in specifiers {
+      let file = file_fetcher.fetch_bypass_permissions(&s).await?;
+      let snippet_files = extract::extract_snippet_files(file)?;
+      for snippet_file in snippet_files {
+        specifiers_for_typecheck.push(snippet_file.specifier.clone());
+        file_fetcher.insert_memory_files(snippet_file);
+      }
+    }
+
+    specifiers_for_typecheck
+  } else {
+    specifiers
+  };
+
+  main_graph_container
+    .check_specifiers(&specifiers_for_typecheck)
+    .await
+}
+
 /// Options for performing a check of a module graph. Note that the decision to
 /// emit or not is determined by the `ts_config` settings.
 pub struct CheckOptions {
diff --git a/cli/tools/test/mod.rs b/cli/tools/test/mod.rs
index 63382ffc6..d043ffcba 100644
--- a/cli/tools/test/mod.rs
+++ b/cli/tools/test/mod.rs
@@ -9,21 +9,18 @@ use crate::display;
 use crate::factory::CliFactory;
 use crate::file_fetcher::File;
 use crate::file_fetcher::FileFetcher;
-use crate::graph_container::MainModuleGraphContainer;
 use crate::graph_util::has_graph_root_local_dependent_changed;
 use crate::ops;
+use crate::util::extract::extract_doc_tests;
 use crate::util::file_watcher;
 use crate::util::fs::collect_specifiers;
 use crate::util::path::get_extension;
 use crate::util::path::is_script_ext;
-use crate::util::path::mapped_specifier_for_tsc;
 use crate::util::path::matches_pattern_or_exact_path;
 use crate::worker::CliMainWorkerFactory;
 use crate::worker::CoverageCollector;
 
-use deno_ast::swc::common::comments::CommentKind;
 use deno_ast::MediaType;
-use deno_ast::SourceRangedForSpanned;
 use deno_config::glob::FilePatterns;
 use deno_config::glob::WalkEntry;
 use deno_core::anyhow;
@@ -151,6 +148,20 @@ pub enum TestMode {
   Both,
 }
 
+impl TestMode {
+  /// Returns `true` if the test mode indicates that code snippet extraction is
+  /// needed.
+  fn needs_test_extraction(&self) -> bool {
+    matches!(self, Self::Documentation | Self::Both)
+  }
+
+  /// Returns `true` if the test mode indicates that the test should be
+  /// type-checked and run.
+  fn needs_test_run(&self) -> bool {
+    matches!(self, Self::Executable | Self::Both)
+  }
+}
+
 #[derive(Clone, Debug, Default)]
 pub struct TestFilter {
   pub substring: Option<String>,
@@ -1174,233 +1185,6 @@ async fn wait_for_activity_to_stabilize(
   })
 }
 
-fn extract_files_from_regex_blocks(
-  specifier: &ModuleSpecifier,
-  source: &str,
-  media_type: MediaType,
-  file_line_index: usize,
-  blocks_regex: &Regex,
-  lines_regex: &Regex,
-) -> Result<Vec<File>, AnyError> {
-  let files = blocks_regex
-    .captures_iter(source)
-    .filter_map(|block| {
-      block.get(1)?;
-
-      let maybe_attributes: Option<Vec<_>> = block
-        .get(1)
-        .map(|attributes| attributes.as_str().split(' ').collect());
-
-      let file_media_type = if let Some(attributes) = maybe_attributes {
-        if attributes.contains(&"ignore") {
-          return None;
-        }
-
-        match attributes.first() {
-          Some(&"js") => MediaType::JavaScript,
-          Some(&"javascript") => MediaType::JavaScript,
-          Some(&"mjs") => MediaType::Mjs,
-          Some(&"cjs") => MediaType::Cjs,
-          Some(&"jsx") => MediaType::Jsx,
-          Some(&"ts") => MediaType::TypeScript,
-          Some(&"typescript") => MediaType::TypeScript,
-          Some(&"mts") => MediaType::Mts,
-          Some(&"cts") => MediaType::Cts,
-          Some(&"tsx") => MediaType::Tsx,
-          _ => MediaType::Unknown,
-        }
-      } else {
-        media_type
-      };
-
-      if file_media_type == MediaType::Unknown {
-        return None;
-      }
-
-      let line_offset = source[0..block.get(0).unwrap().start()]
-        .chars()
-        .filter(|c| *c == '\n')
-        .count();
-
-      let line_count = block.get(0).unwrap().as_str().split('\n').count();
-
-      let body = block.get(2).unwrap();
-      let text = body.as_str();
-
-      // TODO(caspervonb) generate an inline source map
-      let mut file_source = String::new();
-      for line in lines_regex.captures_iter(text) {
-        let text = line.get(1).unwrap();
-        writeln!(file_source, "{}", text.as_str()).unwrap();
-      }
-
-      let file_specifier = ModuleSpecifier::parse(&format!(
-        "{}${}-{}",
-        specifier,
-        file_line_index + line_offset + 1,
-        file_line_index + line_offset + line_count + 1,
-      ))
-      .unwrap();
-      let file_specifier =
-        mapped_specifier_for_tsc(&file_specifier, file_media_type)
-          .map(|s| ModuleSpecifier::parse(&s).unwrap())
-          .unwrap_or(file_specifier);
-
-      Some(File {
-        specifier: file_specifier,
-        maybe_headers: None,
-        source: file_source.into_bytes().into(),
-      })
-    })
-    .collect();
-
-  Ok(files)
-}
-
-fn extract_files_from_source_comments(
-  specifier: &ModuleSpecifier,
-  source: Arc<str>,
-  media_type: MediaType,
-) -> Result<Vec<File>, AnyError> {
-  let parsed_source = deno_ast::parse_module(deno_ast::ParseParams {
-    specifier: specifier.clone(),
-    text: source,
-    media_type,
-    capture_tokens: false,
-    maybe_syntax: None,
-    scope_analysis: false,
-  })?;
-  let comments = parsed_source.comments().get_vec();
-  let blocks_regex = lazy_regex::regex!(r"```([^\r\n]*)\r?\n([\S\s]*?)```");
-  let lines_regex = lazy_regex::regex!(r"(?:\* ?)(?:\# ?)?(.*)");
-
-  let files = comments
-    .iter()
-    .filter(|comment| {
-      if comment.kind != CommentKind::Block || !comment.text.starts_with('*') {
-        return false;
-      }
-
-      true
-    })
-    .flat_map(|comment| {
-      extract_files_from_regex_blocks(
-        specifier,
-        &comment.text,
-        media_type,
-        parsed_source.text_info_lazy().line_index(comment.start()),
-        blocks_regex,
-        lines_regex,
-      )
-    })
-    .flatten()
-    .collect();
-
-  Ok(files)
-}
-
-fn extract_files_from_fenced_blocks(
-  specifier: &ModuleSpecifier,
-  source: &str,
-  media_type: MediaType,
-) -> Result<Vec<File>, AnyError> {
-  // The pattern matches code blocks as well as anything in HTML comment syntax,
-  // but it stores the latter without any capturing groups. This way, a simple
-  // check can be done to see if a block is inside a comment (and skip typechecking)
-  // or not by checking for the presence of capturing groups in the matches.
-  let blocks_regex =
-    lazy_regex::regex!(r"(?s)<!--.*?-->|```([^\r\n]*)\r?\n([\S\s]*?)```");
-  let lines_regex = lazy_regex::regex!(r"(?:\# ?)?(.*)");
-
-  extract_files_from_regex_blocks(
-    specifier,
-    source,
-    media_type,
-    /* file line index */ 0,
-    blocks_regex,
-    lines_regex,
-  )
-}
-
-async fn fetch_inline_files(
-  file_fetcher: &FileFetcher,
-  specifiers: Vec<ModuleSpecifier>,
-) -> Result<Vec<File>, AnyError> {
-  let mut files = Vec::new();
-  for specifier in specifiers {
-    let file = file_fetcher
-      .fetch_bypass_permissions(&specifier)
-      .await?
-      .into_text_decoded()?;
-
-    let inline_files = if file.media_type == MediaType::Unknown {
-      extract_files_from_fenced_blocks(
-        &file.specifier,
-        &file.source,
-        file.media_type,
-      )
-    } else {
-      extract_files_from_source_comments(
-        &file.specifier,
-        file.source,
-        file.media_type,
-      )
-    };
-
-    files.extend(inline_files?);
-  }
-
-  Ok(files)
-}
-
-/// Type check a collection of module and document specifiers.
-pub async fn check_specifiers(
-  file_fetcher: &FileFetcher,
-  main_graph_container: &Arc<MainModuleGraphContainer>,
-  specifiers: Vec<(ModuleSpecifier, TestMode)>,
-) -> Result<(), AnyError> {
-  let inline_files = fetch_inline_files(
-    file_fetcher,
-    specifiers
-      .iter()
-      .filter_map(|(specifier, mode)| {
-        if *mode != TestMode::Executable {
-          Some(specifier.clone())
-        } else {
-          None
-        }
-      })
-      .collect(),
-  )
-  .await?;
-
-  let mut module_specifiers = specifiers
-    .into_iter()
-    .filter_map(|(specifier, mode)| {
-      if mode != TestMode::Documentation {
-        Some(specifier)
-      } else {
-        None
-      }
-    })
-    .collect::<Vec<_>>();
-
-  if !inline_files.is_empty() {
-    module_specifiers
-      .extend(inline_files.iter().map(|file| file.specifier.clone()));
-
-    for file in inline_files {
-      file_fetcher.insert_memory_files(file);
-    }
-  }
-
-  main_graph_container
-    .check_specifiers(&module_specifiers)
-    .await?;
-
-  Ok(())
-}
-
 static HAS_TEST_RUN_SIGINT_HANDLER: AtomicBool = AtomicBool::new(false);
 
 /// Test a collection of specifiers with test modes concurrently.
@@ -1788,14 +1572,19 @@ pub async fn run_tests(
     return Err(generic_error("No test modules found"));
   }
 
+  let doc_tests = get_doc_tests(&specifiers_with_mode, file_fetcher).await?;
+  let specifiers_for_typecheck_and_test =
+    get_target_specifiers(specifiers_with_mode, &doc_tests);
+  for doc_test in doc_tests {
+    file_fetcher.insert_memory_files(doc_test);
+  }
+
   let main_graph_container = factory.main_module_graph_container().await?;
 
-  check_specifiers(
-    file_fetcher,
-    main_graph_container,
-    specifiers_with_mode.clone(),
-  )
-  .await?;
+  // Typecheck
+  main_graph_container
+    .check_specifiers(&specifiers_for_typecheck_and_test)
+    .await?;
 
   if workspace_test_options.no_run {
     return Ok(());
@@ -1804,17 +1593,12 @@ pub async fn run_tests(
   let worker_factory =
     Arc::new(factory.create_cli_main_worker_factory().await?);
 
+  // Run tests
   test_specifiers(
     worker_factory,
     &permissions,
     permission_desc_parser,
-    specifiers_with_mode
-      .into_iter()
-      .filter_map(|(s, m)| match m {
-        TestMode::Documentation => None,
-        _ => Some(s),
-      })
-      .collect(),
+    specifiers_for_typecheck_and_test,
     TestSpecifiersOptions {
       cwd: Url::from_directory_path(cli_options.initial_cwd()).map_err(
         |_| {
@@ -1949,8 +1733,6 @@ pub async fn run_tests_with_watch(
           test_modules.clone()
         };
 
-        let worker_factory =
-          Arc::new(factory.create_cli_main_worker_factory().await?);
         let specifiers_with_mode = fetch_specifiers_with_test_mode(
           &cli_options,
           file_fetcher,
@@ -1962,30 +1744,34 @@ pub async fn run_tests_with_watch(
         .filter(|(specifier, _)| test_modules_to_reload.contains(specifier))
         .collect::<Vec<(ModuleSpecifier, TestMode)>>();
 
+        let doc_tests =
+          get_doc_tests(&specifiers_with_mode, file_fetcher).await?;
+        let specifiers_for_typecheck_and_test =
+          get_target_specifiers(specifiers_with_mode, &doc_tests);
+        for doc_test in doc_tests {
+          file_fetcher.insert_memory_files(doc_test);
+        }
+
         let main_graph_container =
           factory.main_module_graph_container().await?;
-        check_specifiers(
-          file_fetcher,
-          main_graph_container,
-          specifiers_with_mode.clone(),
-        )
-        .await?;
+
+        // Typecheck
+        main_graph_container
+          .check_specifiers(&specifiers_for_typecheck_and_test)
+          .await?;
 
         if workspace_test_options.no_run {
           return Ok(());
         }
 
+        let worker_factory =
+          Arc::new(factory.create_cli_main_worker_factory().await?);
+
         test_specifiers(
           worker_factory,
           &permissions,
           permission_desc_parser,
-          specifiers_with_mode
-            .into_iter()
-            .filter_map(|(s, m)| match m {
-              TestMode::Documentation => None,
-              _ => Some(s),
-            })
-            .collect(),
+          specifiers_for_typecheck_and_test,
           TestSpecifiersOptions {
             cwd: Url::from_directory_path(cli_options.initial_cwd()).map_err(
               |_| {
@@ -2020,6 +1806,38 @@ pub async fn run_tests_with_watch(
   Ok(())
 }
 
+/// Extracts doc tests from files specified by the given specifiers.
+async fn get_doc_tests(
+  specifiers_with_mode: &[(Url, TestMode)],
+  file_fetcher: &FileFetcher,
+) -> Result<Vec<File>, AnyError> {
+  let specifiers_needing_extraction = specifiers_with_mode
+    .iter()
+    .filter(|(_, mode)| mode.needs_test_extraction())
+    .map(|(s, _)| s);
+
+  let mut doc_tests = Vec::new();
+  for s in specifiers_needing_extraction {
+    let file = file_fetcher.fetch_bypass_permissions(s).await?;
+    doc_tests.extend(extract_doc_tests(file)?);
+  }
+
+  Ok(doc_tests)
+}
+
+/// Get a list of specifiers that we need to perform typecheck and run tests on.
+/// The result includes "pseudo specifiers" for doc tests.
+fn get_target_specifiers(
+  specifiers_with_mode: Vec<(Url, TestMode)>,
+  doc_tests: &[File],
+) -> Vec<Url> {
+  specifiers_with_mode
+    .into_iter()
+    .filter_map(|(s, mode)| mode.needs_test_run().then_some(s))
+    .chain(doc_tests.iter().map(|d| d.specifier.clone()))
+    .collect()
+}
+
 /// Tracks failures for the `--fail-fast` argument in
 /// order to tell when to stop running tests.
 #[derive(Clone, Default)]
author	Yusuke Tanaka <yusuktan@maguro.dev>	2024-09-18 13:35:48 +0900
committer	GitHub <noreply@github.com>	2024-09-17 21:35:48 -0700
commit	d5c00ef50e6519fccde54a577e038f0ebb1282e9 (patch)
tree	9429c5f09c6969fd8236041f48b354a9b0841e1f /cli/tools
parent	37315917625179063cb5653e2edd4ee0e5de99c5 (diff)