diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2023-08-01 20:49:09 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-02 00:49:09 +0000 |
commit | 1cefa831fd74b14121494045a347024502d74e34 (patch) | |
tree | cc7791cf674e427fe4165262db416e6c537e99a3 /cli/cache | |
parent | 36ae37604a0ddab4349df6eb6fafb8ae39fd20fc (diff) |
feat(unstable): optional `deno_modules` directory (#19977)
Closes #15633
Diffstat (limited to 'cli/cache')
-rw-r--r-- | cli/cache/deno_dir.rs | 1 | ||||
-rw-r--r-- | cli/cache/disk_cache.rs | 63 | ||||
-rw-r--r-- | cli/cache/http_cache.rs | 330 | ||||
-rw-r--r-- | cli/cache/http_cache/common.rs | 42 | ||||
-rw-r--r-- | cli/cache/http_cache/global.rs | 296 | ||||
-rw-r--r-- | cli/cache/http_cache/local.rs | 872 | ||||
-rw-r--r-- | cli/cache/http_cache/mod.rs | 76 | ||||
-rw-r--r-- | cli/cache/mod.rs | 33 |
8 files changed, 1325 insertions, 388 deletions
diff --git a/cli/cache/deno_dir.rs b/cli/cache/deno_dir.rs index 07bd4a61d..9b12dc5b3 100644 --- a/cli/cache/deno_dir.rs +++ b/cli/cache/deno_dir.rs @@ -71,7 +71,6 @@ impl DenoDir { root, gen_cache: DiskCache::new(&gen_path), }; - deno_dir.gen_cache.ensure_dir_exists(&gen_path)?; Ok(deno_dir) } diff --git a/cli/cache/disk_cache.rs b/cli/cache/disk_cache.rs index 456b59912..6950c056a 100644 --- a/cli/cache/disk_cache.rs +++ b/cli/cache/disk_cache.rs @@ -8,7 +8,6 @@ use deno_core::url::Host; use deno_core::url::Url; use std::ffi::OsStr; use std::fs; -use std::io; use std::path::Component; use std::path::Path; use std::path::PathBuf; @@ -20,13 +19,6 @@ pub struct DiskCache { pub location: PathBuf, } -fn with_io_context<T: AsRef<str>>( - e: &std::io::Error, - context: T, -) -> std::io::Error { - std::io::Error::new(e.kind(), format!("{} (for '{}')", e, context.as_ref())) -} - impl DiskCache { /// `location` must be an absolute path. pub fn new(location: &Path) -> Self { @@ -36,27 +28,6 @@ impl DiskCache { } } - /// Ensures the location of the cache. - pub fn ensure_dir_exists(&self, path: &Path) -> io::Result<()> { - if path.is_dir() { - return Ok(()); - } - fs::create_dir_all(path).map_err(|e| { - io::Error::new( - e.kind(), - format!( - concat!( - "Could not create TypeScript compiler cache location: {}\n", - "Check the permission of the directory.\n", - "{:#}", - ), - path.display(), - e - ), - ) - }) - } - fn get_cache_filename(&self, url: &Url) -> Option<PathBuf> { let mut out = PathBuf::new(); @@ -78,7 +49,7 @@ impl DiskCache { out.push(path_seg); } } - "http" | "https" | "data" | "blob" => out = url_to_filename(url)?, + "http" | "https" | "data" | "blob" => out = url_to_filename(url).ok()?, "file" => { let path = match url.to_file_path() { Ok(path) => path, @@ -149,12 +120,7 @@ impl DiskCache { pub fn set(&self, filename: &Path, data: &[u8]) -> std::io::Result<()> { let path = self.location.join(filename); - match path.parent() { - Some(parent) => self.ensure_dir_exists(parent), - None => Ok(()), - }?; atomic_write_file(&path, data, CACHE_PERM) - .map_err(|e| with_io_context(&e, format!("{:#?}", &path))) } } @@ -164,28 +130,13 @@ mod tests { use test_util::TempDir; #[test] - fn test_create_cache_if_dir_exits() { - let cache_location = TempDir::new(); - let cache_path = cache_location.path().join("foo"); - let cache = DiskCache::new(cache_path.as_path()); - cache - .ensure_dir_exists(&cache.location) - .expect("Testing expect:"); - assert!(cache_path.is_dir()); - } - - #[test] - fn test_create_cache_if_dir_not_exits() { + fn test_set_get_cache_file() { let temp_dir = TempDir::new(); - let cache_location = temp_dir.path(); - cache_location.remove_dir_all(); - let cache_location = cache_location.join("foo"); - assert!(!cache_location.is_dir()); - let cache = DiskCache::new(cache_location.as_path()); - cache - .ensure_dir_exists(&cache.location) - .expect("Testing expect:"); - assert!(cache_location.is_dir()); + let sub_dir = temp_dir.path().join("sub_dir"); + let cache = DiskCache::new(&sub_dir.to_path_buf()); + let path = PathBuf::from("foo/bar.txt"); + cache.set(&path, b"hello").unwrap(); + assert_eq!(cache.get(&path).unwrap(), b"hello"); } #[test] diff --git a/cli/cache/http_cache.rs b/cli/cache/http_cache.rs deleted file mode 100644 index c65960086..000000000 --- a/cli/cache/http_cache.rs +++ /dev/null @@ -1,330 +0,0 @@ -// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -//! This module is meant to eventually implement HTTP cache -//! as defined in RFC 7234 (<https://tools.ietf.org/html/rfc7234>). -//! Currently it's a very simplified version to fulfill Deno needs -//! at hand. -use crate::http_util::HeadersMap; -use crate::util; -use deno_core::error::generic_error; -use deno_core::error::AnyError; -use deno_core::serde::Deserialize; -use deno_core::serde::Serialize; -use deno_core::serde_json; -use deno_core::url::Url; -use std::fs; -use std::io; -use std::path::Path; -use std::path::PathBuf; -use std::time::SystemTime; - -use super::CACHE_PERM; - -/// Turn base of url (scheme, hostname, port) into a valid filename. -/// This method replaces port part with a special string token (because -/// ":" cannot be used in filename on some platforms). -/// Ex: $DENO_DIR/deps/https/deno.land/ -fn base_url_to_filename(url: &Url) -> Option<PathBuf> { - let mut out = PathBuf::new(); - - let scheme = url.scheme(); - out.push(scheme); - - match scheme { - "http" | "https" => { - let host = url.host_str().unwrap(); - let host_port = match url.port() { - Some(port) => format!("{host}_PORT{port}"), - None => host.to_string(), - }; - out.push(host_port); - } - "data" | "blob" => (), - scheme => { - log::debug!("Don't know how to create cache name for scheme: {}", scheme); - return None; - } - }; - - Some(out) -} - -/// Turn provided `url` into a hashed filename. -/// URLs can contain a lot of characters that cannot be used -/// in filenames (like "?", "#", ":"), so in order to cache -/// them properly they are deterministically hashed into ASCII -/// strings. -/// -/// NOTE: this method is `pub` because it's used in integration_tests -pub fn url_to_filename(url: &Url) -> Option<PathBuf> { - let mut cache_filename = base_url_to_filename(url)?; - - let mut rest_str = url.path().to_string(); - if let Some(query) = url.query() { - rest_str.push('?'); - rest_str.push_str(query); - } - // NOTE: fragment is omitted on purpose - it's not taken into - // account when caching - it denotes parts of webpage, which - // in case of static resources doesn't make much sense - let hashed_filename = util::checksum::gen(&[rest_str.as_bytes()]); - cache_filename.push(hashed_filename); - Some(cache_filename) -} - -/// Cached metadata about a url. -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] -pub struct CachedUrlMetadata { - pub headers: HeadersMap, - pub url: String, - #[serde(default = "SystemTime::now", rename = "now")] - pub time: SystemTime, -} - -// DO NOT make the path public. The fact that this is stored in a file -// is an implementation detail. -pub struct MaybeHttpCacheItem(PathBuf); - -impl MaybeHttpCacheItem { - #[cfg(test)] - pub fn read_to_string(&self) -> Result<Option<String>, AnyError> { - let Some(bytes) = self.read_to_bytes()? else { - return Ok(None); - }; - Ok(Some(String::from_utf8(bytes)?)) - } - - pub fn read_to_bytes(&self) -> Result<Option<Vec<u8>>, AnyError> { - match std::fs::read(&self.0) { - Ok(s) => Ok(Some(s)), - Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(err) => Err(err.into()), - } - } - - pub fn read_metadata(&self) -> Result<Option<CachedUrlMetadata>, AnyError> { - let metadata_filepath = self.0.with_extension("metadata.json"); - match fs::read_to_string(metadata_filepath) { - Ok(metadata) => Ok(Some(serde_json::from_str(&metadata)?)), - Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(err) => Err(err.into()), - } - } -} - -#[derive(Debug, Clone, Default)] -pub struct HttpCache { - pub location: PathBuf, -} - -impl HttpCache { - /// Returns a new instance. - /// - /// `location` must be an absolute path. - pub fn new(location: PathBuf) -> Self { - assert!(location.is_absolute()); - Self { location } - } - - /// Ensures the location of the cache. - fn ensure_dir_exists(&self, path: &Path) -> io::Result<()> { - if path.is_dir() { - return Ok(()); - } - fs::create_dir_all(path).map_err(|e| { - io::Error::new( - e.kind(), - format!( - "Could not create remote modules cache location: {path:?}\nCheck the permission of the directory." - ), - ) - }) - } - - pub fn get_modified_time( - &self, - url: &Url, - ) -> Result<Option<SystemTime>, AnyError> { - let filepath = self.get_cache_filepath_internal(url)?; - match fs::metadata(filepath) { - Ok(metadata) => Ok(Some(metadata.modified()?)), - Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), - Err(err) => Err(err.into()), - } - } - - // DEPRECATED: Where the file is stored and how it's stored should be an implementation - // detail of the cache. - #[deprecated(note = "Do not assume the cache will be stored at a file path.")] - pub fn get_cache_filepath(&self, url: &Url) -> Result<PathBuf, AnyError> { - self.get_cache_filepath_internal(url) - } - - fn get_cache_filepath_internal( - &self, - url: &Url, - ) -> Result<PathBuf, AnyError> { - Ok( - self.location.join( - url_to_filename(url) - .ok_or_else(|| generic_error("Can't convert url to filename."))?, - ), - ) - } - - #[cfg(test)] - pub fn write_metadata( - &self, - url: &Url, - meta_data: &CachedUrlMetadata, - ) -> Result<(), AnyError> { - let cache_path = self.get_cache_filepath_internal(url)?; - self.write_metadata_at_path(&cache_path, meta_data) - } - - fn write_metadata_at_path( - &self, - path: &Path, - meta_data: &CachedUrlMetadata, - ) -> Result<(), AnyError> { - let cache_path = path.with_extension("metadata.json"); - let json = serde_json::to_string_pretty(meta_data)?; - util::fs::atomic_write_file(&cache_path, json, CACHE_PERM)?; - Ok(()) - } - - // TODO(bartlomieju): this method should check headers file - // and validate against ETAG/Last-modified-as headers. - // ETAG check is currently done in `cli/file_fetcher.rs`. - pub fn get(&self, url: &Url) -> Result<MaybeHttpCacheItem, AnyError> { - let cache_filepath = self.get_cache_filepath_internal(url)?; - Ok(MaybeHttpCacheItem(cache_filepath)) - } - - pub fn set( - &self, - url: &Url, - headers_map: HeadersMap, - content: &[u8], - ) -> Result<(), AnyError> { - let cache_filepath = self.get_cache_filepath_internal(url)?; - // Create parent directory - let parent_filename = cache_filepath - .parent() - .expect("Cache filename should have a parent dir"); - self.ensure_dir_exists(parent_filename)?; - // Cache content - util::fs::atomic_write_file(&cache_filepath, content, CACHE_PERM)?; - - let metadata = CachedUrlMetadata { - time: SystemTime::now(), - url: url.to_string(), - headers: headers_map, - }; - self.write_metadata_at_path(&cache_filepath, &metadata)?; - - Ok(()) - } - - pub fn contains(&self, url: &Url) -> bool { - let Ok(cache_filepath) = self.get_cache_filepath_internal(url) else { - return false - }; - cache_filepath.is_file() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashMap; - use test_util::TempDir; - - #[test] - fn test_create_cache() { - let dir = TempDir::new(); - let cache_path = dir.path().join("foobar"); - // HttpCache should be created lazily on first use: - // when zipping up a local project with no external dependencies - // "$DENO_DIR/deps" is empty. When unzipping such project - // "$DENO_DIR/deps" might not get restored and in situation - // when directory is owned by root we might not be able - // to create that directory. However if it's not needed it - // doesn't make sense to return error in such specific scenarios. - // For more details check issue: - // https://github.com/denoland/deno/issues/5688 - let cache = HttpCache::new(cache_path.to_path_buf()); - assert!(!cache.location.exists()); - cache - .set( - &Url::parse("http://example.com/foo/bar.js").unwrap(), - HeadersMap::new(), - b"hello world", - ) - .expect("Failed to add to cache"); - assert!(cache.ensure_dir_exists(&cache.location).is_ok()); - assert!(cache_path.is_dir()); - } - - #[test] - fn test_get_set() { - let dir = TempDir::new(); - let cache = HttpCache::new(dir.path().to_path_buf()); - let url = Url::parse("https://deno.land/x/welcome.ts").unwrap(); - let mut headers = HashMap::new(); - headers.insert( - "content-type".to_string(), - "application/javascript".to_string(), - ); - headers.insert("etag".to_string(), "as5625rqdsfb".to_string()); - let content = b"Hello world"; - let r = cache.set(&url, headers, content); - eprintln!("result {r:?}"); - assert!(r.is_ok()); - let cache_item = cache.get(&url).unwrap(); - let content = cache_item.read_to_string().unwrap().unwrap(); - let headers = cache_item.read_metadata().unwrap().unwrap().headers; - assert_eq!(content, "Hello world"); - assert_eq!( - headers.get("content-type").unwrap(), - "application/javascript" - ); - assert_eq!(headers.get("etag").unwrap(), "as5625rqdsfb"); - assert_eq!(headers.get("foobar"), None); - } - - #[test] - fn test_url_to_filename() { - let test_cases = [ - ("https://deno.land/x/foo.ts", "https/deno.land/2c0a064891b9e3fbe386f5d4a833bce5076543f5404613656042107213a7bbc8"), - ( - "https://deno.land:8080/x/foo.ts", - "https/deno.land_PORT8080/2c0a064891b9e3fbe386f5d4a833bce5076543f5404613656042107213a7bbc8", - ), - ("https://deno.land/", "https/deno.land/8a5edab282632443219e051e4ade2d1d5bbc671c781051bf1437897cbdfea0f1"), - ( - "https://deno.land/?asdf=qwer", - "https/deno.land/e4edd1f433165141015db6a823094e6bd8f24dd16fe33f2abd99d34a0a21a3c0", - ), - // should be the same as case above, fragment (#qwer) is ignored - // when hashing - ( - "https://deno.land/?asdf=qwer#qwer", - "https/deno.land/e4edd1f433165141015db6a823094e6bd8f24dd16fe33f2abd99d34a0a21a3c0", - ), - ( - "data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=", - "data/c21c7fc382b2b0553dc0864aa81a3acacfb7b3d1285ab5ae76da6abec213fb37", - ), - ( - "data:text/plain,Hello%2C%20Deno!", - "data/967374e3561d6741234131e342bf5c6848b70b13758adfe23ee1a813a8131818", - ) - ]; - - for (url, expected) in test_cases.iter() { - let u = Url::parse(url).unwrap(); - let p = url_to_filename(&u).unwrap(); - assert_eq!(p, PathBuf::from(expected)); - } - } -} diff --git a/cli/cache/http_cache/common.rs b/cli/cache/http_cache/common.rs new file mode 100644 index 000000000..690412293 --- /dev/null +++ b/cli/cache/http_cache/common.rs @@ -0,0 +1,42 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::path::Path; + +use deno_core::url::Url; + +pub fn base_url_to_filename_parts( + url: &Url, + port_separator: &str, +) -> Option<Vec<String>> { + let mut out = Vec::with_capacity(2); + + let scheme = url.scheme(); + out.push(scheme.to_string()); + + match scheme { + "http" | "https" => { + let host = url.host_str().unwrap(); + let host_port = match url.port() { + // underscores are not allowed in domains, so adding one here is fine + Some(port) => format!("{host}{port_separator}{port}"), + None => host.to_string(), + }; + out.push(host_port); + } + "data" | "blob" => (), + scheme => { + log::debug!("Don't know how to create cache name for scheme: {}", scheme); + return None; + } + }; + + Some(out) +} + +pub fn read_file_bytes(path: &Path) -> std::io::Result<Option<Vec<u8>>> { + match std::fs::read(path) { + Ok(s) => Ok(Some(s)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} diff --git a/cli/cache/http_cache/global.rs b/cli/cache/http_cache/global.rs new file mode 100644 index 000000000..5c77553a8 --- /dev/null +++ b/cli/cache/http_cache/global.rs @@ -0,0 +1,296 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::io; +use std::path::Path; +use std::path::PathBuf; +use std::time::SystemTime; + +use deno_core::error::AnyError; +use deno_core::serde_json; +use deno_core::url::Url; +use thiserror::Error; + +use crate::cache::CACHE_PERM; +use crate::http_util::HeadersMap; +use crate::util; +use crate::util::fs::atomic_write_file; + +use super::common::base_url_to_filename_parts; +use super::common::read_file_bytes; +use super::CachedUrlMetadata; +use super::HttpCache; +use super::HttpCacheItemKey; + +#[derive(Debug, Error)] +#[error("Can't convert url (\"{}\") to filename.", .url)] +pub struct UrlToFilenameConversionError { + pub(super) url: String, +} + +/// Turn provided `url` into a hashed filename. +/// URLs can contain a lot of characters that cannot be used +/// in filenames (like "?", "#", ":"), so in order to cache +/// them properly they are deterministically hashed into ASCII +/// strings. +pub fn url_to_filename( + url: &Url, +) -> Result<PathBuf, UrlToFilenameConversionError> { + let Some(mut cache_filename) = base_url_to_filename(url) else { + return Err(UrlToFilenameConversionError { url: url.to_string() }); + }; + + let mut rest_str = url.path().to_string(); + if let Some(query) = url.query() { + rest_str.push('?'); + rest_str.push_str(query); + } + // NOTE: fragment is omitted on purpose - it's not taken into + // account when caching - it denotes parts of webpage, which + // in case of static resources doesn't make much sense + let hashed_filename = util::checksum::gen(&[rest_str.as_bytes()]); + cache_filename.push(hashed_filename); + Ok(cache_filename) +} + +// Turn base of url (scheme, hostname, port) into a valid filename. +/// This method replaces port part with a special string token (because +/// ":" cannot be used in filename on some platforms). +/// Ex: $DENO_DIR/deps/https/deno.land/ +fn base_url_to_filename(url: &Url) -> Option<PathBuf> { + base_url_to_filename_parts(url, "_PORT").map(|parts| { + let mut out = PathBuf::new(); + for part in parts { + out.push(part); + } + out + }) +} + +#[derive(Debug)] +pub struct GlobalHttpCache(PathBuf); + +impl GlobalHttpCache { + pub fn new(path: PathBuf) -> Self { + assert!(path.is_absolute()); + Self(path) + } + + // Deprecated to discourage using this as where the file is stored and + // how it's stored should be an implementation detail of the cache. + #[deprecated(note = "Should only be used for deno info.")] + pub fn get_global_cache_location(&self) -> &PathBuf { + &self.0 + } + + // DEPRECATED: Where the file is stored and how it's stored should be an implementation + // detail of the cache. + #[deprecated(note = "Do not assume the cache will be stored at a file path.")] + pub fn get_global_cache_filepath( + &self, + url: &Url, + ) -> Result<PathBuf, AnyError> { + Ok(self.0.join(url_to_filename(url)?)) + } + + fn get_cache_filepath(&self, url: &Url) -> Result<PathBuf, AnyError> { + Ok(self.0.join(url_to_filename(url)?)) + } + + #[inline] + fn key_file_path<'a>(&self, key: &'a HttpCacheItemKey) -> &'a PathBuf { + // The key file path is always set for the global cache because + // the file will always exist, unlike the local cache, which won't + // have this for redirects. + key.file_path.as_ref().unwrap() + } +} + +impl HttpCache for GlobalHttpCache { + fn cache_item_key<'a>( + &self, + url: &'a Url, + ) -> Result<HttpCacheItemKey<'a>, AnyError> { + Ok(HttpCacheItemKey { + #[cfg(debug_assertions)] + is_local_key: false, + url, + file_path: Some(self.get_cache_filepath(url)?), + }) + } + + fn contains(&self, url: &Url) -> bool { + let Ok(cache_filepath) = self.get_cache_filepath(url) else { + return false + }; + cache_filepath.is_file() + } + + fn read_modified_time( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<SystemTime>, AnyError> { + #[cfg(debug_assertions)] + debug_assert!(!key.is_local_key); + + match std::fs::metadata(self.key_file_path(key)) { + Ok(metadata) => Ok(Some(metadata.modified()?)), + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err.into()), + } + } + + fn set( + &self, + url: &Url, + headers: HeadersMap, + content: &[u8], + ) -> Result<(), AnyError> { + let cache_filepath = self.get_cache_filepath(url)?; + // Cache content + atomic_write_file(&cache_filepath, content, CACHE_PERM)?; + + let metadata = CachedUrlMetadata { + time: SystemTime::now(), + url: url.to_string(), + headers, + }; + write_metadata(&cache_filepath, &metadata)?; + + Ok(()) + } + + fn read_file_bytes( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<Vec<u8>>, AnyError> { + #[cfg(debug_assertions)] + debug_assert!(!key.is_local_key); + + Ok(read_file_bytes(self.key_file_path(key))?) + } + + fn read_metadata( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<CachedUrlMetadata>, AnyError> { + #[cfg(debug_assertions)] + debug_assert!(!key.is_local_key); + + match read_metadata(self.key_file_path(key))? { + Some(metadata) => Ok(Some(metadata)), + None => Ok(None), + } + } +} + +fn read_metadata(path: &Path) -> Result<Option<CachedUrlMetadata>, AnyError> { + let path = path.with_extension("metadata.json"); + match read_file_bytes(&path)? { + Some(metadata) => Ok(Some(serde_json::from_slice(&metadata)?)), + None => Ok(None), + } +} + +fn write_metadata( + path: &Path, + meta_data: &CachedUrlMetadata, +) -> Result<(), AnyError> { + let path = path.with_extension("metadata.json"); + let json = serde_json::to_string_pretty(meta_data)?; + atomic_write_file(&path, json, CACHE_PERM)?; + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use std::collections::HashMap; + use test_util::TempDir; + + #[test] + fn test_url_to_filename() { + let test_cases = [ + ("https://deno.land/x/foo.ts", "https/deno.land/2c0a064891b9e3fbe386f5d4a833bce5076543f5404613656042107213a7bbc8"), + ( + "https://deno.land:8080/x/foo.ts", + "https/deno.land_PORT8080/2c0a064891b9e3fbe386f5d4a833bce5076543f5404613656042107213a7bbc8", + ), + ("https://deno.land/", "https/deno.land/8a5edab282632443219e051e4ade2d1d5bbc671c781051bf1437897cbdfea0f1"), + ( + "https://deno.land/?asdf=qwer", + "https/deno.land/e4edd1f433165141015db6a823094e6bd8f24dd16fe33f2abd99d34a0a21a3c0", + ), + // should be the same as case above, fragment (#qwer) is ignored + // when hashing + ( + "https://deno.land/?asdf=qwer#qwer", + "https/deno.land/e4edd1f433165141015db6a823094e6bd8f24dd16fe33f2abd99d34a0a21a3c0", + ), + ( + "data:application/typescript;base64,ZXhwb3J0IGNvbnN0IGEgPSAiYSI7CgpleHBvcnQgZW51bSBBIHsKICBBLAogIEIsCiAgQywKfQo=", + "data/c21c7fc382b2b0553dc0864aa81a3acacfb7b3d1285ab5ae76da6abec213fb37", + ), + ( + "data:text/plain,Hello%2C%20Deno!", + "data/967374e3561d6741234131e342bf5c6848b70b13758adfe23ee1a813a8131818", + ) + ]; + + for (url, expected) in test_cases.iter() { + let u = Url::parse(url).unwrap(); + let p = url_to_filename(&u).unwrap(); + assert_eq!(p, PathBuf::from(expected)); + } + } + + #[test] + fn test_create_cache() { + let dir = TempDir::new(); + let cache_path = dir.path().join("foobar"); + // HttpCache should be created lazily on first use: + // when zipping up a local project with no external dependencies + // "$DENO_DIR/deps" is empty. When unzipping such project + // "$DENO_DIR/deps" might not get restored and in situation + // when directory is owned by root we might not be able + // to create that directory. However if it's not needed it + // doesn't make sense to return error in such specific scenarios. + // For more details check issue: + // https://github.com/denoland/deno/issues/5688 + let cache = GlobalHttpCache::new(cache_path.to_path_buf()); + assert!(!cache.0.exists()); + let url = Url::parse("http://example.com/foo/bar.js").unwrap(); + cache + .set(&url, HeadersMap::new(), b"hello world") + .expect("Failed to add to cache"); + assert!(cache_path.is_dir()); + assert!(cache.get_cache_filepath(&url).unwrap().is_file()); + } + + #[test] + fn test_get_set() { + let dir = TempDir::new(); + let cache = GlobalHttpCache::new(dir.path().to_path_buf()); + let url = Url::parse("https://deno.land/x/welcome.ts").unwrap(); + let mut headers = HashMap::new(); + headers.insert( + "content-type".to_string(), + "application/javascript".to_string(), + ); + headers.insert("etag".to_string(), "as5625rqdsfb".to_string()); + let content = b"Hello world"; + let r = cache.set(&url, headers, content); + eprintln!("result {r:?}"); + assert!(r.is_ok()); + let key = cache.cache_item_key(&url).unwrap(); + let content = + String::from_utf8(cache.read_file_bytes(&key).unwrap().unwrap()).unwrap(); + let headers = cache.read_metadata(&key).unwrap().unwrap().headers; + assert_eq!(content, "Hello world"); + assert_eq!( + headers.get("content-type").unwrap(), + "application/javascript" + ); + assert_eq!(headers.get("etag").unwrap(), "as5625rqdsfb"); + assert_eq!(headers.get("foobar"), None); + } +} diff --git a/cli/cache/http_cache/local.rs b/cli/cache/http_cache/local.rs new file mode 100644 index 000000000..016118c3f --- /dev/null +++ b/cli/cache/http_cache/local.rs @@ -0,0 +1,872 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::borrow::Cow; +use std::collections::HashMap; +use std::collections::HashSet; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::SystemTime; + +use deno_ast::MediaType; +use deno_core::error::AnyError; +use deno_core::parking_lot::RwLock; +use deno_core::serde_json; +use deno_core::url::Url; +use indexmap::IndexMap; +use once_cell::sync::Lazy; +use serde::Deserialize; +use serde::Serialize; + +use crate::cache::CACHE_PERM; +use crate::http_util::HeadersMap; +use crate::util; +use crate::util::fs::atomic_write_file; + +use super::common::base_url_to_filename_parts; +use super::common::read_file_bytes; +use super::global::GlobalHttpCache; +use super::global::UrlToFilenameConversionError; +use super::CachedUrlMetadata; +use super::HttpCache; +use super::HttpCacheItemKey; + +#[derive(Debug)] +pub struct LocalHttpCache { + path: PathBuf, + manifest: LocalCacheManifest, + global_cache: Arc<GlobalHttpCache>, +} + +impl LocalHttpCache { + pub fn new(path: PathBuf, global_cache: Arc<GlobalHttpCache>) -> Self { + assert!(path.is_absolute()); + let manifest = LocalCacheManifest::new(path.join("manifest.json")); + Self { + path, + manifest, + global_cache, + } + } + + fn get_cache_filepath( + &self, + url: &Url, + headers: &HeadersMap, + ) -> Result<PathBuf, AnyError> { + Ok(url_to_local_sub_path(url, headers)?.as_path_from_root(&self.path)) + } + + /// Copies the file from the global cache to the local cache returning + /// if the data was successfully copied to the local cache. + fn check_copy_global_to_local(&self, url: &Url) -> Result<bool, AnyError> { + let global_key = self.global_cache.cache_item_key(url)?; + let Some(metadata) = self.global_cache.read_metadata(&global_key)? else { + return Ok(false); + }; + + if !metadata.is_redirect() { + let Some(cached_bytes) = self.global_cache.read_file_bytes(&global_key)? else { + return Ok(false); + }; + + let local_file_path = self.get_cache_filepath(url, &metadata.headers)?; + // if we're here, then this will be set + atomic_write_file(&local_file_path, cached_bytes, CACHE_PERM)?; + } + self.manifest.insert_data( + url_to_local_sub_path(url, &metadata.headers)?, + url.clone(), + metadata.headers, + ); + + Ok(true) + } + + fn get_url_metadata_checking_global_cache( + &self, + url: &Url, + ) -> Result<Option<CachedUrlMetadata>, AnyError> { + if let Some(metadata) = self.manifest.get_metadata(url) { + Ok(Some(metadata)) + } else if self.check_copy_global_to_local(url)? { + // try again now that it's saved + Ok(self.manifest.get_metadata(url)) + } else { + Ok(None) + } + } +} + +impl HttpCache for LocalHttpCache { + fn cache_item_key<'a>( + &self, + url: &'a Url, + ) -> Result<HttpCacheItemKey<'a>, AnyError> { + Ok(HttpCacheItemKey { + #[cfg(debug_assertions)] + is_local_key: true, + url, + file_path: None, // need to compute this every time + }) + } + + fn contains(&self, url: &Url) -> bool { + self.manifest.get_metadata(url).is_some() + } + + fn read_modified_time( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<SystemTime>, AnyError> { + #[cfg(debug_assertions)] + debug_assert!(key.is_local_key); + + self + .get_url_metadata_checking_global_cache(key.url) + .map(|m| m.map(|m| m.time)) + } + + fn set( + &self, + url: &Url, + headers: crate::http_util::HeadersMap, + content: &[u8], + ) -> Result<(), AnyError> { + let is_redirect = headers.contains_key("location"); + if !is_redirect { + let cache_filepath = self.get_cache_filepath(url, &headers)?; + // Cache content + atomic_write_file(&cache_filepath, content, CACHE_PERM)?; + } + + let sub_path = url_to_local_sub_path(url, &headers)?; + self.manifest.insert_data(sub_path, url.clone(), headers); + + Ok(()) + } + + fn read_file_bytes( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<Vec<u8>>, AnyError> { + #[cfg(debug_assertions)] + debug_assert!(key.is_local_key); + + let metadata = self.get_url_metadata_checking_global_cache(key.url)?; + match metadata { + Some(data) => { + if data.is_redirect() { + // return back an empty file for redirect + Ok(Some(Vec::new())) + } else { + // if it's not a redirect, then it should have a file path + let cache_filepath = + self.get_cache_filepath(key.url, &data.headers)?; + Ok(read_file_bytes(&cache_filepath)?) + } + } + None => Ok(None), + } + } + + fn read_metadata( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<CachedUrlMetadata>, AnyError> { + #[cfg(debug_assertions)] + debug_assert!(key.is_local_key); + + self.get_url_metadata_checking_global_cache(key.url) + } +} + +struct LocalCacheSubPath { + pub has_hash: bool, + pub parts: Vec<String>, +} + +impl LocalCacheSubPath { + pub fn as_path_from_root(&self, root_path: &Path) -> PathBuf { + let mut path = root_path.to_path_buf(); + for part in &self.parts { + path.push(part); + } + path + } +} + +fn url_to_local_sub_path( + url: &Url, + headers: &HeadersMap, +) -> Result<LocalCacheSubPath, UrlToFilenameConversionError> { + // https://stackoverflow.com/a/31976060/188246 + static FORBIDDEN_CHARS: Lazy<HashSet<char>> = Lazy::new(|| { + HashSet::from(['?', '<', '>', ':', '*', '|', '\\', ':', '"', '\'', '/']) + }); + + fn has_forbidden_chars(segment: &str) -> bool { + segment.chars().any(|c| { + let is_uppercase = c.is_ascii_alphabetic() && !c.is_ascii_lowercase(); + FORBIDDEN_CHARS.contains(&c) + // do not allow uppercase letters in order to make this work + // the same on case insensitive file systems + || is_uppercase + }) + } + + fn has_known_extension(path: &str) -> bool { + let path = path.to_lowercase(); + path.ends_with(".js") + || path.ends_with(".ts") + || path.ends_with(".jsx") + || path.ends_with(".tsx") + || path.ends_with(".mts") + || path.ends_with(".mjs") + || path.ends_with(".json") + || path.ends_with(".wasm") + } + + fn get_extension(url: &Url, headers: &HeadersMap) -> &'static str { + MediaType::from_specifier_and_headers(url, Some(headers)).as_ts_extension() + } + + fn short_hash(data: &str, last_ext: Option<&str>) -> String { + // This function is a bit of a balancing act between readability + // and avoiding collisions. + let hash = util::checksum::gen(&[data.as_bytes()]); + // keep the paths short because of windows path limit + const MAX_LENGTH: usize = 20; + let mut sub = String::with_capacity(MAX_LENGTH); + for c in data.chars().take(MAX_LENGTH) { + // don't include the query string (only use it in the hash) + if c == '?' { + break; + } + if FORBIDDEN_CHARS.contains(&c) { + sub.push('_'); + } else { + sub.extend(c.to_lowercase()); + } + } + let sub = match last_ext { + Some(ext) => sub.strip_suffix(ext).unwrap_or(&sub), + None => &sub, + }; + let ext = last_ext.unwrap_or(""); + if sub.is_empty() { + format!("#{}{}", &hash[..7], ext) + } else { + format!("#{}_{}{}", &sub, &hash[..5], ext) + } + } + + fn should_hash_part(part: &str, last_ext: Option<&str>) -> bool { + if part.is_empty() || part.len() > 30 { + // keep short due to windows path limit + return true; + } + let hash_context_specific = if let Some(last_ext) = last_ext { + // if the last part does not have a known extension, hash it in order to + // prevent collisions with a directory of the same name + !has_known_extension(part) || !part.ends_with(last_ext) + } else { + // if any non-ending path part has a known extension, hash it in order to + // prevent collisions where a filename has the same name as a directory name + has_known_extension(part) + }; + + // the hash symbol at the start designates a hash for the url part + hash_context_specific || part.starts_with('#') || has_forbidden_chars(part) + } + + // get the base url + let port_separator = "_"; // make this shorter with just an underscore + let Some(mut base_parts) = base_url_to_filename_parts(url, port_separator) else { + return Err(UrlToFilenameConversionError { url: url.to_string() }); + }; + + if base_parts[0] == "https" { + base_parts.remove(0); + } else { + let scheme = base_parts.remove(0); + base_parts[0] = format!("{}_{}", scheme, base_parts[0]); + } + + // first, try to get the filename of the path + let path_segments = url + .path() + .strip_prefix('/') + .unwrap_or(url.path()) + .split('/'); + let mut parts = base_parts + .into_iter() + .chain(path_segments.map(|s| s.to_string())) + .collect::<Vec<_>>(); + + // push the query parameter onto the last part + if let Some(query) = url.query() { + let last_part = parts.last_mut().unwrap(); + last_part.push('?'); + last_part.push_str(query); + } + + let mut has_hash = false; + let parts_len = parts.len(); + let parts = parts + .into_iter() + .enumerate() + .map(|(i, part)| { + let is_last = i == parts_len - 1; + let last_ext = if is_last { + Some(get_extension(url, headers)) + } else { + None + }; + if should_hash_part(&part, last_ext) { + has_hash = true; + short_hash(&part, last_ext) + } else { + part + } + }) + .collect::<Vec<_>>(); + + Ok(LocalCacheSubPath { has_hash, parts }) +} + +#[derive(Debug, Default, Clone)] +struct LocalCacheManifestData { + serialized: SerializedLocalCacheManifestData, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +struct SerializedLocalCacheManifestDataModule { + #[serde(skip_serializing_if = "Option::is_none")] + pub path: Option<String>, + #[serde( + default = "IndexMap::new", + skip_serializing_if = "IndexMap::is_empty" + )] + pub headers: IndexMap<String, String>, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +struct SerializedLocalCacheManifestData { + pub modules: IndexMap<Url, SerializedLocalCacheManifestDataModule>, +} + +#[derive(Debug)] +struct LocalCacheManifest { + file_path: PathBuf, + data: RwLock<LocalCacheManifestData>, +} + +impl LocalCacheManifest { + pub fn new(file_path: PathBuf) -> Self { + let serialized: SerializedLocalCacheManifestData = + std::fs::read(&file_path) + .ok() + .and_then(|data| match serde_json::from_slice(&data) { + Ok(data) => Some(data), + Err(err) => { + log::debug!("Failed deserializing local cache manifest: {:#}", err); + None + } + }) + .unwrap_or_default(); + Self { + data: RwLock::new(LocalCacheManifestData { serialized }), + file_path, + } + } + + pub fn insert_data( + &self, + sub_path: LocalCacheSubPath, + url: Url, + mut original_headers: HashMap<String, String>, + ) { + fn should_keep_content_type_header( + url: &Url, + headers: &HashMap<String, String>, + ) -> bool { + // only keep the location header if it can't be derived from the url + MediaType::from_specifier(url) + != MediaType::from_specifier_and_headers(url, Some(headers)) + } + + let mut headers_subset = IndexMap::new(); + + const HEADER_KEYS_TO_KEEP: [&str; 4] = [ + // keep alphabetical for cleanliness in the output + "content-type", + "location", + "x-deno-warning", + "x-typescript-types", + ]; + for key in HEADER_KEYS_TO_KEEP { + if key == "content-type" + && !should_keep_content_type_header(&url, &original_headers) + { + continue; + } + if let Some((k, v)) = original_headers.remove_entry(key) { + headers_subset.insert(k, v); + } + } + + let mut data = self.data.write(); + let is_empty = headers_subset.is_empty() && !sub_path.has_hash; + let has_changed = if is_empty { + data.serialized.modules.remove(&url).is_some() + } else { + let new_data = SerializedLocalCacheManifestDataModule { + path: if headers_subset.contains_key("location") { + None + } else { + Some(sub_path.parts.join("/")) + }, + headers: headers_subset, + }; + if data.serialized.modules.get(&url) == Some(&new_data) { + false + } else { + data.serialized.modules.insert(url.clone(), new_data); + true + } + }; + + if has_changed { + // don't bother ensuring the directory here because it will + // eventually be created by files being added to the cache + let result = atomic_write_file( + &self.file_path, + serde_json::to_string_pretty(&data.serialized).unwrap(), + CACHE_PERM, + ); + if let Err(err) = result { + log::debug!("Failed saving local cache manifest: {:#}", err); + } + } + } + + pub fn get_metadata(&self, url: &Url) -> Option<CachedUrlMetadata> { + let data = self.data.read(); + match data.serialized.modules.get(url) { + Some(module) => { + let headers = module + .headers + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect::<HashMap<_, _>>(); + let sub_path = match &module.path { + Some(sub_path) => { + Cow::Owned(self.file_path.parent().unwrap().join(sub_path)) + } + None => Cow::Borrowed(&self.file_path), + }; + + let Ok(metadata) = sub_path.metadata() else { + return None; + }; + + Some(CachedUrlMetadata { + headers, + url: url.to_string(), + time: metadata.modified().unwrap_or_else(|_| SystemTime::now()), + }) + } + None => { + let folder_path = self.file_path.parent().unwrap(); + let sub_path = url_to_local_sub_path(url, &Default::default()).ok()?; + if sub_path.has_hash { + // only paths without a hash are considered as in the cache + // when they don't have a metadata entry + return None; + } + let file_path = sub_path.as_path_from_root(folder_path); + if let Ok(metadata) = file_path.metadata() { + Some(CachedUrlMetadata { + headers: Default::default(), + url: url.to_string(), + time: metadata.modified().unwrap_or_else(|_| SystemTime::now()), + }) + } else { + None + } + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + use deno_core::serde_json::json; + use pretty_assertions::assert_eq; + use test_util::TempDir; + + #[test] + fn test_url_to_local_sub_path() { + run_test("https://deno.land/x/mod.ts", &[], "deno.land/x/mod.ts"); + run_test( + "http://deno.land/x/mod.ts", + &[], + // http gets added to the folder name, but not https + "http_deno.land/x/mod.ts", + ); + run_test( + // capital letter in filename + "https://deno.land/x/MOD.ts", + &[], + "deno.land/x/#mod_fa860.ts", + ); + run_test( + // query string + "https://deno.land/x/mod.ts?testing=1", + &[], + "deno.land/x/#mod_2eb80.ts", + ); + run_test( + // capital letter in directory + "https://deno.land/OTHER/mod.ts", + &[], + "deno.land/#other_1c55d/mod.ts", + ); + run_test( + // under max of 30 chars + "https://deno.land/x/012345678901234567890123456.js", + &[], + "deno.land/x/012345678901234567890123456.js", + ); + run_test( + // max 30 chars + "https://deno.land/x/0123456789012345678901234567.js", + &[], + "deno.land/x/#01234567890123456789_836de.js", + ); + run_test( + // forbidden char + "https://deno.land/x/mod's.js", + &[], + "deno.land/x/#mod_s_44fc8.js", + ); + run_test( + // no extension + "https://deno.land/x/mod", + &[("content-type", "application/typescript")], + "deno.land/x/#mod_e55cf.ts", + ); + run_test( + // known extension in directory is not allowed + // because it could conflict with a file of the same name + "https://deno.land/x/mod.js/mod.js", + &[], + "deno.land/x/#mod.js_59c58/mod.js", + ); + run_test( + // slash slash in path + "http://localhost//mod.js", + &[], + "http_localhost/#e3b0c44/mod.js", + ); + run_test( + // headers same extension + "https://deno.land/x/mod.ts", + &[("content-type", "application/typescript")], + "deno.land/x/mod.ts", + ); + run_test( + // headers different extension... We hash this because + // if someone deletes the manifest file, then we don't want + // https://deno.land/x/mod.ts to resolve as a typescript file + "https://deno.land/x/mod.ts", + &[("content-type", "application/javascript")], + "deno.land/x/#mod.ts_e8c36.js", + ); + + #[track_caller] + fn run_test(url: &str, headers: &[(&str, &str)], expected: &str) { + let url = Url::parse(url).unwrap(); + let headers = headers + .iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + let result = url_to_local_sub_path(&url, &headers).unwrap(); + let parts = result.parts.join("/"); + assert_eq!(parts, expected); + assert_eq!( + result.parts.iter().any(|p| p.starts_with('#')), + result.has_hash + ) + } + } + + #[test] + fn test_local_global_cache() { + let temp_dir = TempDir::new(); + let global_cache_path = temp_dir.path().join("global"); + let local_cache_path = temp_dir.path().join("local"); + let global_cache = + Arc::new(GlobalHttpCache::new(global_cache_path.to_path_buf())); + let local_cache = + LocalHttpCache::new(local_cache_path.to_path_buf(), global_cache.clone()); + + let manifest_file = local_cache_path.join("manifest.json"); + // mapped url + { + let url = Url::parse("https://deno.land/x/mod.ts").unwrap(); + let content = "export const test = 5;"; + global_cache + .set( + &url, + HashMap::from([( + "content-type".to_string(), + "application/typescript".to_string(), + )]), + content.as_bytes(), + ) + .unwrap(); + let key = local_cache.cache_item_key(&url).unwrap(); + assert_eq!( + String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap()) + .unwrap(), + content + ); + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + // won't have any headers because the content-type is derivable from the url + assert_eq!(metadata.headers, HashMap::new()); + assert_eq!(metadata.url, url.to_string()); + // no manifest file yet + assert!(!manifest_file.exists()); + + // now try deleting the global cache and we should still be able to load it + global_cache_path.remove_dir_all(); + assert_eq!( + String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap()) + .unwrap(), + content + ); + } + + // file that's directly mappable to a url + { + let content = "export const a = 1;"; + local_cache_path + .join("deno.land") + .join("main.js") + .write(content); + + // now we should be able to read this file because it's directly mappable to a url + let url = Url::parse("https://deno.land/main.js").unwrap(); + let key = local_cache.cache_item_key(&url).unwrap(); + assert_eq!( + String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap()) + .unwrap(), + content + ); + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + assert_eq!(metadata.headers, HashMap::new()); + assert_eq!(metadata.url, url.to_string()); + } + + // now try a file with a different content-type header + { + let url = + Url::parse("https://deno.land/x/different_content_type.ts").unwrap(); + let content = "export const test = 5;"; + global_cache + .set( + &url, + HashMap::from([( + "content-type".to_string(), + "application/javascript".to_string(), + )]), + content.as_bytes(), + ) + .unwrap(); + let key = local_cache.cache_item_key(&url).unwrap(); + assert_eq!( + String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap()) + .unwrap(), + content + ); + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + assert_eq!( + metadata.headers, + HashMap::from([( + "content-type".to_string(), + "application/javascript".to_string(), + )]) + ); + assert_eq!(metadata.url, url.to_string()); + assert_eq!( + manifest_file.read_json_value(), + json!({ + "modules": { + "https://deno.land/x/different_content_type.ts": { + "path": "deno.land/x/#different_content_ty_f15dc.js", + "headers": { + "content-type": "application/javascript" + } + } + } + }) + ); + // delete the manifest file + manifest_file.remove_file(); + + // Now try resolving the key again and the content type should still be application/javascript. + // This is maintained because we hash the filename when the headers don't match the extension. + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + assert_eq!( + metadata.headers, + HashMap::from([( + "content-type".to_string(), + "application/javascript".to_string(), + )]) + ); + } + + // reset the local cache + local_cache_path.remove_dir_all(); + let local_cache = + LocalHttpCache::new(local_cache_path.to_path_buf(), global_cache.clone()); + + // now try caching a file with many headers + { + let url = Url::parse("https://deno.land/x/my_file.ts").unwrap(); + let content = "export const test = 5;"; + global_cache + .set( + &url, + HashMap::from([ + ( + "content-type".to_string(), + "application/typescript".to_string(), + ), + ("x-typescript-types".to_string(), "./types.d.ts".to_string()), + ("x-deno-warning".to_string(), "Stop right now.".to_string()), + ( + "x-other-header".to_string(), + "Thank you very much.".to_string(), + ), + ]), + content.as_bytes(), + ) + .unwrap(); + let check_output = |local_cache: &LocalHttpCache| { + let key = local_cache.cache_item_key(&url).unwrap(); + assert_eq!( + String::from_utf8( + local_cache.read_file_bytes(&key).unwrap().unwrap() + ) + .unwrap(), + content + ); + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + assert_eq!( + metadata.headers, + HashMap::from([ + ("x-typescript-types".to_string(), "./types.d.ts".to_string(),), + ("x-deno-warning".to_string(), "Stop right now.".to_string(),) + ]) + ); + assert_eq!(metadata.url, url.to_string()); + assert_eq!( + manifest_file.read_json_value(), + json!({ + "modules": { + "https://deno.land/x/my_file.ts": { + "path": "deno.land/x/my_file.ts", + "headers": { + "x-deno-warning": "Stop right now.", + "x-typescript-types": "./types.d.ts" + } + } + } + }) + ); + }; + check_output(&local_cache); + // now ensure it's the same when re-creating the cache + check_output(&LocalHttpCache::new( + local_cache_path.to_path_buf(), + global_cache.clone(), + )); + } + + // reset the local cache + local_cache_path.remove_dir_all(); + let local_cache = + LocalHttpCache::new(local_cache_path.to_path_buf(), global_cache.clone()); + + // try a file that can't be mapped to the file system + { + let url = Url::parse("https://deno.land/INVALID/Module.ts?dev").unwrap(); + let content = "export const test = 5;"; + global_cache + .set(&url, HashMap::new(), content.as_bytes()) + .unwrap(); + let key = local_cache.cache_item_key(&url).unwrap(); + assert_eq!( + String::from_utf8(local_cache.read_file_bytes(&key).unwrap().unwrap()) + .unwrap(), + content + ); + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + // won't have any headers because the content-type is derivable from the url + assert_eq!(metadata.headers, HashMap::new()); + assert_eq!(metadata.url, url.to_string()); + + assert_eq!( + manifest_file.read_json_value(), + json!({ + "modules": { + "https://deno.land/INVALID/Module.ts?dev": { + "path": "deno.land/#invalid_1ee01/#module_b8d2b.ts" + } + } + }) + ); + } + + // reset the local cache + local_cache_path.remove_dir_all(); + let local_cache = + LocalHttpCache::new(local_cache_path.to_path_buf(), global_cache.clone()); + + // now try a redirect + { + let url = Url::parse("https://deno.land/redirect.ts").unwrap(); + global_cache + .set( + &url, + HashMap::from([("location".to_string(), "./x/mod.ts".to_string())]), + "Redirecting to other url...".as_bytes(), + ) + .unwrap(); + let key = local_cache.cache_item_key(&url).unwrap(); + let metadata = local_cache.read_metadata(&key).unwrap().unwrap(); + assert_eq!( + metadata.headers, + HashMap::from([("location".to_string(), "./x/mod.ts".to_string())]) + ); + assert_eq!(metadata.url, url.to_string()); + assert_eq!( + manifest_file.read_json_value(), + json!({ + "modules": { + "https://deno.land/redirect.ts": { + "headers": { + "location": "./x/mod.ts" + } + } + } + }) + ); + } + } +} diff --git a/cli/cache/http_cache/mod.rs b/cli/cache/http_cache/mod.rs new file mode 100644 index 000000000..eb5c38bbd --- /dev/null +++ b/cli/cache/http_cache/mod.rs @@ -0,0 +1,76 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use deno_core::error::AnyError; +use deno_core::serde::Deserialize; +use deno_core::serde::Serialize; +use deno_core::url::Url; +use std::path::PathBuf; +use std::time::SystemTime; + +use crate::http_util::HeadersMap; + +mod common; +mod global; +mod local; + +pub use global::url_to_filename; +pub use global::GlobalHttpCache; +pub use local::LocalHttpCache; + +/// Cached metadata about a url. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct CachedUrlMetadata { + pub headers: HeadersMap, + pub url: String, + #[serde(default = "SystemTime::now", rename = "now")] + pub time: SystemTime, +} + +impl CachedUrlMetadata { + pub fn is_redirect(&self) -> bool { + self.headers.contains_key("location") + } +} + +/// Computed cache key, which can help reduce the work of computing the cache key multiple times. +pub struct HttpCacheItemKey<'a> { + // The key is specific to the implementation of HttpCache, + // so keep these private to the module. For example, the + // fact that these may be stored in a file is an implementation + // detail. + #[cfg(debug_assertions)] + pub(super) is_local_key: bool, + pub(super) url: &'a Url, + /// This will be set all the time for the global cache, but it + /// won't ever be set for the local cache because that also needs + /// header information to determine the final path. + pub(super) file_path: Option<PathBuf>, +} + +pub trait HttpCache: Send + Sync + std::fmt::Debug { + /// A pre-computed key for looking up items in the cache. + fn cache_item_key<'a>( + &self, + url: &'a Url, + ) -> Result<HttpCacheItemKey<'a>, AnyError>; + + fn contains(&self, url: &Url) -> bool; + fn set( + &self, + url: &Url, + headers: HeadersMap, + content: &[u8], + ) -> Result<(), AnyError>; + fn read_modified_time( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<SystemTime>, AnyError>; + fn read_file_bytes( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<Vec<u8>>, AnyError>; + fn read_metadata( + &self, + key: &HttpCacheItemKey, + ) -> Result<Option<CachedUrlMetadata>, AnyError>; +} diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index 94ccb42e4..7903a9665 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -12,6 +12,7 @@ use deno_graph::source::LoadResponse; use deno_graph::source::Loader; use deno_runtime::permissions::PermissionsContainer; use std::collections::HashMap; +use std::path::PathBuf; use std::sync::Arc; mod cache_db; @@ -34,7 +35,9 @@ pub use deno_dir::DenoDirProvider; pub use disk_cache::DiskCache; pub use emit::EmitCache; pub use http_cache::CachedUrlMetadata; +pub use http_cache::GlobalHttpCache; pub use http_cache::HttpCache; +pub use http_cache::LocalHttpCache; pub use incremental::IncrementalCache; pub use node::NodeAnalysisCache; pub use parsed_source::ParsedSourceCache; @@ -48,6 +51,7 @@ pub struct FetchCacher { emit_cache: EmitCache, file_fetcher: Arc<FileFetcher>, file_header_overrides: HashMap<ModuleSpecifier, HashMap<String, String>>, + global_http_cache: Arc<GlobalHttpCache>, permissions: PermissionsContainer, cache_info_enabled: bool, maybe_local_node_modules_url: Option<ModuleSpecifier>, @@ -58,6 +62,7 @@ impl FetchCacher { emit_cache: EmitCache, file_fetcher: Arc<FileFetcher>, file_header_overrides: HashMap<ModuleSpecifier, HashMap<String, String>>, + global_http_cache: Arc<GlobalHttpCache>, permissions: PermissionsContainer, maybe_local_node_modules_url: Option<ModuleSpecifier>, ) -> Self { @@ -65,6 +70,7 @@ impl FetchCacher { emit_cache, file_fetcher, file_header_overrides, + global_http_cache, permissions, cache_info_enabled: false, maybe_local_node_modules_url, @@ -76,6 +82,31 @@ impl FetchCacher { pub fn enable_loading_cache_info(&mut self) { self.cache_info_enabled = true; } + + // DEPRECATED: Where the file is stored and how it's stored should be an implementation + // detail of the cache. + // + // todo(dsheret): remove once implementing + // * https://github.com/denoland/deno/issues/17707 + // * https://github.com/denoland/deno/issues/17703 + #[deprecated( + note = "There should not be a way to do this because the file may not be cached at a local path in the future." + )] + fn get_local_path(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { + // TODO(@kitsonk) fix when deno_graph does not query cache for synthetic + // modules + if specifier.scheme() == "flags" { + None + } else if specifier.scheme() == "file" { + specifier.to_file_path().ok() + } else { + #[allow(deprecated)] + self + .global_http_cache + .get_global_cache_filepath(specifier) + .ok() + } + } } impl Loader for FetchCacher { @@ -85,7 +116,7 @@ impl Loader for FetchCacher { } #[allow(deprecated)] - let local = self.file_fetcher.get_local_path(specifier)?; + let local = self.get_local_path(specifier)?; if local.is_file() { let emit = self .emit_cache |