summaryrefslogtreecommitdiff
path: root/ext/url/01_urlpattern.js
diff options
context:
space:
mode:
authorLuca Casonato <hello@lcas.dev>2023-12-08 12:02:52 +0100
committerGitHub <noreply@github.com>2023-12-08 12:02:52 +0100
commite15c735edee6a6faf17f618b245908f635a3d9a7 (patch)
tree4de9d660757abccfa39b6e72fadb00e37516a525 /ext/url/01_urlpattern.js
parentb24356d9b9827062bee35e720b5889a403ae89f3 (diff)
perf(ext/url): improve URLPattern perf (#21488)
This significantly optimizes URLPattern in the case where the same URL is matched against many patterns (like in a router). Also minor speedups to other use-cases.
Diffstat (limited to 'ext/url/01_urlpattern.js')
-rw-r--r--ext/url/01_urlpattern.js189
1 files changed, 146 insertions, 43 deletions
diff --git a/ext/url/01_urlpattern.js b/ext/url/01_urlpattern.js
index e0f63b707..2ab39f51f 100644
--- a/ext/url/01_urlpattern.js
+++ b/ext/url/01_urlpattern.js
@@ -12,17 +12,20 @@ const ops = core.ops;
import * as webidl from "ext:deno_webidl/00_webidl.js";
import { createFilteredInspectProxy } from "ext:deno_console/01_console.js";
const {
- ArrayPrototypePop,
+ ArrayPrototypePush,
+ MathRandom,
+ ObjectAssign,
+ ObjectCreate,
+ ObjectPrototypeIsPrototypeOf,
RegExpPrototypeExec,
RegExpPrototypeTest,
- ObjectPrototypeIsPrototypeOf,
+ SafeMap,
SafeRegExp,
Symbol,
SymbolFor,
TypeError,
} = primordials;
-const EMPTY_MATCH = [""];
const _components = Symbol("components");
/**
@@ -54,10 +57,88 @@ const COMPONENTS_KEYS = [
* @property {string[]} groupNameList
*/
+/**
+ * This implements a least-recently-used cache that has a pseudo-"young
+ * generation" by using sampling. The idea is that we want to keep the most
+ * recently used items in the cache, but we don't want to pay the cost of
+ * updating the cache on every access. This relies on the fact that the data
+ * we're caching is not uniformly distributed, and that the most recently used
+ * items are more likely to be used again soon (long tail distribution).
+ *
+ * The LRU cache is implemented as a Map, with the key being the cache key and
+ * the value being the cache value. When an item is accessed, it is moved to the
+ * end of the Map. When an item is inserted, if the Map is at capacity, the
+ * first item in the Map is deleted. Because maps iterate using insertion order,
+ * this means that the oldest item is always the first.
+ *
+ * The sampling is implemented by using a random number generator to decide
+ * whether to update the cache on each access. This means that the cache will
+ * not be updated on every access, but will be updated on a random subset of
+ * accesses.
+ *
+ * @template K
+ * @template V
+ */
+class SampledLRUCache {
+ /** @type {SafeMap<K, V>} */
+ #map = new SafeMap();
+ #capacity = 0;
+ #sampleRate = 0.1;
+
+ /** @type {K} */
+ #lastUsedKey = undefined;
+ /** @type {V} */
+ #lastUsedValue = undefined;
+
+ /** @param {number} capacity */
+ constructor(capacity) {
+ this.#capacity = capacity;
+ }
+
+ /**
+ * @param {K} key
+ * @param {(key: K) => V} factory
+ * @return {V}
+ */
+ getOrInsert(key, factory) {
+ if (this.#lastUsedKey === key) return this.#lastUsedValue;
+ const value = this.#map.get(key);
+ if (value !== undefined) {
+ if (MathRandom() < this.#sampleRate) {
+ // put the item into the map
+ this.#map.delete(key);
+ this.#map.set(key, value);
+ }
+ this.#lastUsedKey = key;
+ this.#lastUsedValue = value;
+ return value;
+ } else {
+ // value doesn't exist yet, create
+ const value = factory(key);
+ if (MathRandom() < this.#sampleRate) {
+ // if the map is at capacity, delete the oldest (first) element
+ if (this.#map.size > this.#capacity) {
+ // deno-lint-ignore prefer-primordials
+ this.#map.delete(this.#map.keys().next().value);
+ }
+ // insert the new value
+ this.#map.set(key, value);
+ }
+ this.#lastUsedKey = key;
+ this.#lastUsedValue = value;
+ return value;
+ }
+ }
+}
+
+const matchInputCache = new SampledLRUCache(4096);
+
class URLPattern {
/** @type {Components} */
[_components];
+ #reusedResult;
+
/**
* @param {URLPatternInput} input
* @param {string} [baseURL]
@@ -80,9 +161,6 @@ class URLPattern {
components[key].regexpString,
"u",
);
- // used for fast path
- components[key].matchOnEmptyInput =
- components[key].regexpString === "^$";
} catch (e) {
throw new TypeError(`${prefix}: ${key} is invalid; ${e.message}`);
}
@@ -144,20 +222,28 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
}
- const res = ops.op_urlpattern_process_match_input(
- input,
- baseURL,
- );
- if (res === null) {
- return false;
- }
+ const res = baseURL === undefined
+ ? matchInputCache.getOrInsert(
+ input,
+ ops.op_urlpattern_process_match_input,
+ )
+ : ops.op_urlpattern_process_match_input(input, baseURL);
+ if (res === null) return false;
const values = res[0];
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i];
- if (!RegExpPrototypeTest(this[_components][key].regexp, values[key])) {
- return false;
+ const component = this[_components][key];
+ switch (component.regexpString) {
+ case "^$":
+ if (values[key] !== "") return false;
+ break;
+ case "^(.*)$":
+ break;
+ default: {
+ if (!RegExpPrototypeTest(component.regexp, values[key])) return false;
+ }
}
}
@@ -178,48 +264,65 @@ class URLPattern {
baseURL = webidl.converters.USVString(baseURL, prefix, "Argument 2");
}
- const res = ops.op_urlpattern_process_match_input(
- input,
- baseURL,
- );
+ const res = baseURL === undefined
+ ? matchInputCache.getOrInsert(
+ input,
+ ops.op_urlpattern_process_match_input,
+ )
+ : ops.op_urlpattern_process_match_input(input, baseURL);
if (res === null) {
return null;
}
- const { 0: values, 1: inputs } = res;
- if (inputs[1] === null) {
- ArrayPrototypePop(inputs);
- }
+ const { 0: values, 1: inputs } = res; /** @type {URLPatternResult} */
+
+ // globalThis.allocAttempt++;
+ this.#reusedResult ??= { inputs: [undefined] };
+ const result = this.#reusedResult;
+ // We don't construct the `inputs` until after the matching is done under
+ // the assumption that most patterns do not match.
- /** @type {URLPatternResult} */
- const result = { inputs };
+ const components = this[_components];
for (let i = 0; i < COMPONENTS_KEYS.length; ++i) {
const key = COMPONENTS_KEYS[i];
/** @type {Component} */
- const component = this[_components][key];
- const input = values[key];
-
- const match = component.matchOnEmptyInput && input === ""
- ? EMPTY_MATCH // fast path
- : RegExpPrototypeExec(component.regexp, input);
+ const component = components[key];
- if (match === null) {
- return null;
- }
+ const res = result[key] ??= {
+ input: values[key],
+ groups: component.regexpString === "^(.*)$" ? { "0": values[key] } : {},
+ };
- const groups = {};
- const groupList = component.groupNameList;
- for (let i = 0; i < groupList.length; ++i) {
- groups[groupList[i]] = match[i + 1] ?? "";
+ switch (component.regexpString) {
+ case "^$":
+ if (values[key] !== "") return null;
+ break;
+ case "^(.*)$":
+ res.groups["0"] = values[key];
+ break;
+ default: {
+ const match = RegExpPrototypeExec(component.regexp, values[key]);
+ if (match === null) return null;
+ const groupList = component.groupNameList;
+ const groups = res.groups;
+ for (let i = 0; i < groupList.length; ++i) {
+ // TODO(lucacasonato): this is vulnerable to override mistake
+ groups[groupList[i]] = match[i + 1] ?? "";
+ }
+ break;
+ }
}
-
- result[key] = {
- input,
- groups,
- };
+ res.input = values[key];
}
+ // Now populate result.inputs
+ result.inputs[0] = typeof inputs[0] === "string"
+ ? inputs[0]
+ : ObjectAssign(ObjectCreate(null), inputs[0]);
+ if (inputs[1] !== null) ArrayPrototypePush(result.inputs, inputs[1]);
+
+ this.#reusedResult = undefined;
return result;
}