summaryrefslogtreecommitdiff
path: root/cli/cache
diff options
context:
space:
mode:
authorDavid Sherret <dsherret@users.noreply.github.com>2022-07-19 11:58:18 -0400
committerGitHub <noreply@github.com>2022-07-19 11:58:18 -0400
commit0ab262b901348e9251262a02bef17d14ed13b997 (patch)
treefc5a6e3926ea7480714cbc844098eca6c43c1ab5 /cli/cache
parente99d64acedb6e111d33f53599da494865978f1aa (diff)
feat: emit files on demand and fix racy emit (#15220)
Diffstat (limited to 'cli/cache')
-rw-r--r--cli/cache/check.rs6
-rw-r--r--cli/cache/common.rs35
-rw-r--r--cli/cache/disk_cache.rs78
-rw-r--r--cli/cache/emit.rs232
-rw-r--r--cli/cache/incremental.rs15
-rw-r--r--cli/cache/mod.rs53
6 files changed, 232 insertions, 187 deletions
diff --git a/cli/cache/check.rs b/cli/cache/check.rs
index 4e0f8d912..6f3c41950 100644
--- a/cli/cache/check.rs
+++ b/cli/cache/check.rs
@@ -22,7 +22,7 @@ impl TypeCheckCache {
Err(err) => {
log::debug!(
concat!(
- "Failed creating internal type checking cache. ",
+ "Failed loading internal type checking cache. ",
"Recreating...\n\nError details:\n{:#}",
),
err
@@ -35,7 +35,7 @@ impl TypeCheckCache {
Err(err) => {
log::debug!(
concat!(
- "Unable to create internal cache for type checking. ",
+ "Unable to load internal cache for type checking. ",
"This will reduce the performance of type checking.\n\n",
"Error details:\n{:#}",
),
@@ -233,7 +233,7 @@ mod test {
cache.set_tsbuildinfo(&specifier1, "test");
assert_eq!(cache.get_tsbuildinfo(&specifier1), Some("test".to_string()));
- // recreating the cache should not remove the data because the CLI version and state hash is the same
+ // recreating the cache should not remove the data because the CLI version is the same
let conn = cache.0.unwrap();
let cache =
TypeCheckCache::from_connection(conn, "2.0.0".to_string()).unwrap();
diff --git a/cli/cache/common.rs b/cli/cache/common.rs
index c01c1ab9a..b536d6cb2 100644
--- a/cli/cache/common.rs
+++ b/cli/cache/common.rs
@@ -1,16 +1,37 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
+use std::hash::Hasher;
+
use deno_core::error::AnyError;
use deno_runtime::deno_webstorage::rusqlite::Connection;
-/// Very fast non-cryptographically secure hash.
-pub fn fast_insecure_hash(bytes: &[u8]) -> u64 {
- use std::hash::Hasher;
- use twox_hash::XxHash64;
+/// A very fast insecure hasher that uses the xxHash algorithm.
+#[derive(Default)]
+pub struct FastInsecureHasher(twox_hash::XxHash64);
+
+impl FastInsecureHasher {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ pub fn write_str(&mut self, text: &str) -> &mut Self {
+ self.write(text.as_bytes());
+ self
+ }
+
+ pub fn write(&mut self, bytes: &[u8]) -> &mut Self {
+ self.0.write(bytes);
+ self
+ }
+
+ pub fn write_u64(&mut self, value: u64) -> &mut Self {
+ self.0.write_u64(value);
+ self
+ }
- let mut hasher = XxHash64::default();
- hasher.write(bytes);
- hasher.finish()
+ pub fn finish(&self) -> u64 {
+ self.0.finish()
+ }
}
/// Runs the common sqlite pragma.
diff --git a/cli/cache/disk_cache.rs b/cli/cache/disk_cache.rs
index 01352c398..5a2f11e3c 100644
--- a/cli/cache/disk_cache.rs
+++ b/cli/cache/disk_cache.rs
@@ -3,13 +3,6 @@
use crate::fs_util;
use crate::http_cache::url_to_filename;
-use super::CacheType;
-use super::Cacher;
-use super::EmitMetadata;
-
-use deno_ast::ModuleSpecifier;
-use deno_core::error::AnyError;
-use deno_core::serde_json;
use deno_core::url::Host;
use deno_core::url::Url;
use std::ffi::OsStr;
@@ -154,77 +147,6 @@ impl DiskCache {
fs_util::atomic_write_file(&path, data, crate::http_cache::CACHE_PERM)
.map_err(|e| with_io_context(&e, format!("{:#?}", &path)))
}
-
- fn get_emit_metadata(
- &self,
- specifier: &ModuleSpecifier,
- ) -> Option<EmitMetadata> {
- let filename = self.get_cache_filename_with_extension(specifier, "meta")?;
- let bytes = self.get(&filename).ok()?;
- serde_json::from_slice(&bytes).ok()
- }
-
- fn set_emit_metadata(
- &self,
- specifier: &ModuleSpecifier,
- data: EmitMetadata,
- ) -> Result<(), AnyError> {
- let filename = self
- .get_cache_filename_with_extension(specifier, "meta")
- .unwrap();
- let bytes = serde_json::to_vec(&data)?;
- self.set(&filename, &bytes).map_err(|e| e.into())
- }
-}
-
-// todo(13302): remove and replace with sqlite database
-impl Cacher for DiskCache {
- fn get(
- &self,
- cache_type: CacheType,
- specifier: &ModuleSpecifier,
- ) -> Option<String> {
- let extension = match cache_type {
- CacheType::Emit => "js",
- CacheType::SourceMap => "js.map",
- CacheType::Version => {
- return self.get_emit_metadata(specifier).map(|d| d.version_hash)
- }
- };
- let filename =
- self.get_cache_filename_with_extension(specifier, extension)?;
- self
- .get(&filename)
- .ok()
- .and_then(|b| String::from_utf8(b).ok())
- }
-
- fn set(
- &self,
- cache_type: CacheType,
- specifier: &ModuleSpecifier,
- value: String,
- ) -> Result<(), AnyError> {
- let extension = match cache_type {
- CacheType::Emit => "js",
- CacheType::SourceMap => "js.map",
- CacheType::Version => {
- let data = if let Some(mut data) = self.get_emit_metadata(specifier) {
- data.version_hash = value;
- data
- } else {
- EmitMetadata {
- version_hash: value,
- }
- };
- return self.set_emit_metadata(specifier, data);
- }
- };
- let filename = self
- .get_cache_filename_with_extension(specifier, extension)
- .unwrap();
- self.set(&filename, value.as_bytes()).map_err(|e| e.into())
- }
}
#[cfg(test)]
diff --git a/cli/cache/emit.rs b/cli/cache/emit.rs
index e1469b862..61039a966 100644
--- a/cli/cache/emit.rs
+++ b/cli/cache/emit.rs
@@ -1,71 +1,209 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
+use std::path::PathBuf;
+
use deno_ast::ModuleSpecifier;
+use deno_core::anyhow::anyhow;
use deno_core::error::AnyError;
+use deno_core::serde_json;
+use serde::Deserialize;
+use serde::Serialize;
-use super::CacheType;
-use super::Cacher;
+use super::DiskCache;
+use super::FastInsecureHasher;
-/// Emit cache for a single file.
-#[derive(Debug, Clone, PartialEq)]
-pub struct SpecifierEmitCacheData {
+#[derive(Debug, Deserialize, Serialize)]
+struct EmitMetadata {
pub source_hash: String,
- pub text: String,
- pub map: Option<String>,
+ pub emit_hash: String,
+ // purge the cache between cli versions
+ pub cli_version: String,
}
-pub trait EmitCache {
- /// Gets the emit data from the cache.
- fn get_emit_data(
- &self,
- specifier: &ModuleSpecifier,
- ) -> Option<SpecifierEmitCacheData>;
- /// Sets the emit data in the cache.
- fn set_emit_data(
- &self,
- specifier: ModuleSpecifier,
- data: SpecifierEmitCacheData,
- ) -> Result<(), AnyError>;
- /// Gets the stored hash of the source of the provider specifier
- /// to tell if the emit is out of sync with the source.
- /// TODO(13302): this is actually not reliable and should be removed
- /// once switching to an sqlite db
- fn get_source_hash(&self, specifier: &ModuleSpecifier) -> Option<String>;
- /// Gets the emitted JavaScript of the TypeScript source.
- /// TODO(13302): remove this once switching to an sqlite db
- fn get_emit_text(&self, specifier: &ModuleSpecifier) -> Option<String>;
+/// The cache that stores previously emitted files.
+#[derive(Clone)]
+pub struct EmitCache {
+ disk_cache: DiskCache,
+ cli_version: String,
}
-impl<T: Cacher> EmitCache for T {
- fn get_emit_data(
+impl EmitCache {
+ pub fn new(disk_cache: DiskCache) -> Self {
+ Self {
+ disk_cache,
+ cli_version: crate::version::deno(),
+ }
+ }
+
+ /// Gets the emitted code with embedded sourcemap from the cache.
+ ///
+ /// The expected source hash is used in order to verify
+ /// that you're getting a value from the cache that is
+ /// for the provided source.
+ ///
+ /// Cached emits from previous CLI releases will not be returned
+ /// or emits that do not match the source.
+ pub fn get_emit_code(
&self,
specifier: &ModuleSpecifier,
- ) -> Option<SpecifierEmitCacheData> {
- Some(SpecifierEmitCacheData {
- source_hash: self.get_source_hash(specifier)?,
- text: self.get_emit_text(specifier)?,
- map: self.get(CacheType::SourceMap, specifier),
- })
+ expected_source_hash: u64,
+ ) -> Option<String> {
+ let meta_filename = self.get_meta_filename(specifier)?;
+ let emit_filename = self.get_emit_filename(specifier)?;
+
+ // load and verify the meta data file is for this source and CLI version
+ let bytes = self.disk_cache.get(&meta_filename).ok()?;
+ let meta: EmitMetadata = serde_json::from_slice(&bytes).ok()?;
+ if meta.source_hash != expected_source_hash.to_string()
+ || meta.cli_version != self.cli_version
+ {
+ return None;
+ }
+
+ // load and verify the emit is for the meta data
+ let emit_bytes = self.disk_cache.get(&emit_filename).ok()?;
+ if meta.emit_hash != compute_emit_hash(&emit_bytes) {
+ return None;
+ }
+
+ // everything looks good, return it
+ let emit_text = String::from_utf8(emit_bytes).ok()?;
+ Some(emit_text)
}
- fn get_source_hash(&self, specifier: &ModuleSpecifier) -> Option<String> {
- self.get(CacheType::Version, specifier)
+ /// Gets the filepath which stores the emit.
+ pub fn get_emit_filepath(
+ &self,
+ specifier: &ModuleSpecifier,
+ ) -> Option<PathBuf> {
+ Some(
+ self
+ .disk_cache
+ .location
+ .join(self.get_emit_filename(specifier)?),
+ )
}
- fn get_emit_text(&self, specifier: &ModuleSpecifier) -> Option<String> {
- self.get(CacheType::Emit, specifier)
+ /// Sets the emit code in the cache.
+ pub fn set_emit_code(
+ &self,
+ specifier: &ModuleSpecifier,
+ source_hash: u64,
+ code: &str,
+ ) {
+ if let Err(err) = self.set_emit_code_result(specifier, source_hash, code) {
+ // should never error here, but if it ever does don't fail
+ if cfg!(debug_assertions) {
+ panic!("Error saving emit data ({}): {}", specifier, err);
+ } else {
+ log::debug!("Error saving emit data({}): {}", specifier, err);
+ }
+ }
}
- fn set_emit_data(
+ fn set_emit_code_result(
&self,
- specifier: ModuleSpecifier,
- data: SpecifierEmitCacheData,
+ specifier: &ModuleSpecifier,
+ source_hash: u64,
+ code: &str,
) -> Result<(), AnyError> {
- self.set(CacheType::Version, &specifier, data.source_hash)?;
- self.set(CacheType::Emit, &specifier, data.text)?;
- if let Some(map) = data.map {
- self.set(CacheType::SourceMap, &specifier, map)?;
- }
+ let meta_filename = self
+ .get_meta_filename(specifier)
+ .ok_or_else(|| anyhow!("Could not get meta filename."))?;
+ let emit_filename = self
+ .get_emit_filename(specifier)
+ .ok_or_else(|| anyhow!("Could not get emit filename."))?;
+
+ // save the metadata
+ let metadata = EmitMetadata {
+ cli_version: self.cli_version.to_string(),
+ source_hash: source_hash.to_string(),
+ emit_hash: compute_emit_hash(code.as_bytes()),
+ };
+ self
+ .disk_cache
+ .set(&meta_filename, &serde_json::to_vec(&metadata)?)?;
+
+ // save the emit source
+ self.disk_cache.set(&emit_filename, code.as_bytes())?;
+
Ok(())
}
+
+ fn get_meta_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> {
+ self
+ .disk_cache
+ .get_cache_filename_with_extension(specifier, "meta")
+ }
+
+ fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> {
+ self
+ .disk_cache
+ .get_cache_filename_with_extension(specifier, "js")
+ }
+}
+
+fn compute_emit_hash(bytes: &[u8]) -> String {
+ // it's ok to use an insecure hash here because
+ // if someone can change the emit source then they
+ // can also change the version hash
+ FastInsecureHasher::new().write(bytes).finish().to_string()
+}
+
+#[cfg(test)]
+mod test {
+ use test_util::TempDir;
+
+ use super::*;
+
+ #[test]
+ pub fn emit_cache_general_use() {
+ let temp_dir = TempDir::new();
+ let disk_cache = DiskCache::new(temp_dir.path());
+ let cache = EmitCache {
+ disk_cache: disk_cache.clone(),
+ cli_version: "1.0.0".to_string(),
+ };
+
+ let specifier1 =
+ ModuleSpecifier::from_file_path(temp_dir.path().join("file1.ts"))
+ .unwrap();
+ let specifier2 =
+ ModuleSpecifier::from_file_path(temp_dir.path().join("file2.ts"))
+ .unwrap();
+ assert_eq!(cache.get_emit_code(&specifier1, 1), None);
+ let emit_code1 = "text1".to_string();
+ let emit_code2 = "text2".to_string();
+ cache.set_emit_code(&specifier1, 10, &emit_code1);
+ cache.set_emit_code(&specifier2, 2, &emit_code2);
+ // providing the incorrect source hash
+ assert_eq!(cache.get_emit_code(&specifier1, 5), None);
+ // providing the correct source hash
+ assert_eq!(
+ cache.get_emit_code(&specifier1, 10),
+ Some(emit_code1.clone()),
+ );
+ assert_eq!(cache.get_emit_code(&specifier2, 2), Some(emit_code2),);
+
+ // try changing the cli version (should not load previous ones)
+ let cache = EmitCache {
+ disk_cache: disk_cache.clone(),
+ cli_version: "2.0.0".to_string(),
+ };
+ assert_eq!(cache.get_emit_code(&specifier1, 10), None);
+ cache.set_emit_code(&specifier1, 5, &emit_code1);
+
+ // recreating the cache should still load the data because the CLI version is the same
+ let cache = EmitCache {
+ disk_cache,
+ cli_version: "2.0.0".to_string(),
+ };
+ assert_eq!(cache.get_emit_code(&specifier1, 5), Some(emit_code1));
+
+ // adding when already exists should not cause issue
+ let emit_code3 = "asdf".to_string();
+ cache.set_emit_code(&specifier1, 20, &emit_code3);
+ assert_eq!(cache.get_emit_code(&specifier1, 5), None);
+ assert_eq!(cache.get_emit_code(&specifier1, 20), Some(emit_code3));
+ }
}
diff --git a/cli/cache/incremental.rs b/cli/cache/incremental.rs
index b5fff0734..da832a8b5 100644
--- a/cli/cache/incremental.rs
+++ b/cli/cache/incremental.rs
@@ -12,8 +12,8 @@ use deno_runtime::deno_webstorage::rusqlite::Connection;
use serde::Serialize;
use tokio::task::JoinHandle;
-use super::common::fast_insecure_hash;
use super::common::run_sqlite_pragma;
+use super::common::FastInsecureHasher;
/// Cache used to skip formatting/linting a file again when we
/// know it is already formatted or has no lint diagnostics.
@@ -79,8 +79,9 @@ impl IncrementalCacheInner {
state: &TState,
initial_file_paths: &[PathBuf],
) -> Result<Self, AnyError> {
- let state_hash =
- fast_insecure_hash(serde_json::to_string(state).unwrap().as_bytes());
+ let state_hash = FastInsecureHasher::new()
+ .write_str(&serde_json::to_string(state).unwrap())
+ .finish();
let sql_cache = SqlIncrementalCache::new(db_file_path, state_hash)?;
Ok(Self::from_sql_incremental_cache(
sql_cache,
@@ -123,13 +124,15 @@ impl IncrementalCacheInner {
pub fn is_file_same(&self, file_path: &Path, file_text: &str) -> bool {
match self.previous_hashes.get(file_path) {
- Some(hash) => *hash == fast_insecure_hash(file_text.as_bytes()),
+ Some(hash) => {
+ *hash == FastInsecureHasher::new().write_str(file_text).finish()
+ }
None => false,
}
}
pub fn update_file(&self, file_path: &Path, file_text: &str) {
- let hash = fast_insecure_hash(file_text.as_bytes());
+ let hash = FastInsecureHasher::new().write_str(file_text).finish();
if let Some(previous_hash) = self.previous_hashes.get(file_path) {
if *previous_hash == hash {
return; // do not bother updating the db file because nothing has changed
@@ -334,7 +337,7 @@ mod test {
.unwrap();
let file_path = PathBuf::from("/mod.ts");
let file_text = "test";
- let file_hash = fast_insecure_hash(file_text.as_bytes());
+ let file_hash = FastInsecureHasher::new().write_str(file_text).finish();
sql_cache.set_source_hash(&file_path, file_hash).unwrap();
let cache = IncrementalCacheInner::from_sql_incremental_cache(
sql_cache,
diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs
index f363d8fa8..7482826cf 100644
--- a/cli/cache/mod.rs
+++ b/cli/cache/mod.rs
@@ -3,10 +3,7 @@
use crate::errors::get_error_class_name;
use crate::file_fetcher::FileFetcher;
-use deno_core::error::AnyError;
use deno_core::futures::FutureExt;
-use deno_core::serde::Deserialize;
-use deno_core::serde::Serialize;
use deno_core::ModuleSpecifier;
use deno_graph::source::CacheInfo;
use deno_graph::source::LoadFuture;
@@ -22,44 +19,15 @@ mod emit;
mod incremental;
pub use check::TypeCheckCache;
+pub use common::FastInsecureHasher;
pub use disk_cache::DiskCache;
pub use emit::EmitCache;
-pub use emit::SpecifierEmitCacheData;
pub use incremental::IncrementalCache;
-#[derive(Debug, Deserialize, Serialize)]
-pub struct EmitMetadata {
- pub version_hash: String,
-}
-
-pub enum CacheType {
- Emit,
- SourceMap,
- Version,
-}
-
-/// A trait which provides a concise implementation to getting and setting
-/// values in a cache.
-pub trait Cacher {
- /// Get a value from the cache.
- fn get(
- &self,
- cache_type: CacheType,
- specifier: &ModuleSpecifier,
- ) -> Option<String>;
- /// Set a value in the cache.
- fn set(
- &self,
- cache_type: CacheType,
- specifier: &ModuleSpecifier,
- value: String,
- ) -> Result<(), AnyError>;
-}
-
/// A "wrapper" for the FileFetcher and DiskCache for the Deno CLI that provides
/// a concise interface to the DENO_DIR when building module graphs.
pub struct FetchCacher {
- disk_cache: DiskCache,
+ emit_cache: EmitCache,
dynamic_permissions: Permissions,
file_fetcher: Arc<FileFetcher>,
root_permissions: Permissions,
@@ -67,7 +35,7 @@ pub struct FetchCacher {
impl FetchCacher {
pub fn new(
- disk_cache: DiskCache,
+ emit_cache: EmitCache,
file_fetcher: FileFetcher,
root_permissions: Permissions,
dynamic_permissions: Permissions,
@@ -75,7 +43,7 @@ impl FetchCacher {
let file_fetcher = Arc::new(file_fetcher);
Self {
- disk_cache,
+ emit_cache,
dynamic_permissions,
file_fetcher,
root_permissions,
@@ -87,21 +55,14 @@ impl Loader for FetchCacher {
fn get_cache_info(&self, specifier: &ModuleSpecifier) -> Option<CacheInfo> {
let local = self.file_fetcher.get_local_path(specifier)?;
if local.is_file() {
- let location = &self.disk_cache.location;
let emit = self
- .disk_cache
- .get_cache_filename_with_extension(specifier, "js")
- .map(|p| location.join(p))
- .filter(|p| p.is_file());
- let map = self
- .disk_cache
- .get_cache_filename_with_extension(specifier, "js.map")
- .map(|p| location.join(p))
+ .emit_cache
+ .get_emit_filepath(specifier)
.filter(|p| p.is_file());
Some(CacheInfo {
local: Some(local),
emit,
- map,
+ map: None,
})
} else {
None