summaryrefslogtreecommitdiff
path: root/cli/js/web/text_encoding.ts
diff options
context:
space:
mode:
Diffstat (limited to 'cli/js/web/text_encoding.ts')
-rw-r--r--cli/js/web/text_encoding.ts581
1 files changed, 0 insertions, 581 deletions
diff --git a/cli/js/web/text_encoding.ts b/cli/js/web/text_encoding.ts
deleted file mode 100644
index 97848cb77..000000000
--- a/cli/js/web/text_encoding.ts
+++ /dev/null
@@ -1,581 +0,0 @@
-// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
-
-// The following code is based off of text-encoding at:
-// https://github.com/inexorabletash/text-encoding
-//
-// Anyone is free to copy, modify, publish, use, compile, sell, or
-// distribute this software, either in source code form or as a compiled
-// binary, for any purpose, commercial or non-commercial, and by any
-// means.
-//
-// In jurisdictions that recognize copyright laws, the author or authors
-// of this software dedicate any and all copyright interest in the
-// software to the public domain. We make this dedication for the benefit
-// of the public at large and to the detriment of our heirs and
-// successors. We intend this dedication to be an overt act of
-// relinquishment in perpetuity of all present and future rights to this
-// software under copyright law.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-import { DOMExceptionImpl as DOMException } from "./dom_exception.ts";
-import * as base64 from "./base64.ts";
-import { decodeUtf8 } from "./decode_utf8.ts";
-import { core } from "../core.ts";
-
-const CONTINUE = null;
-const END_OF_STREAM = -1;
-const FINISHED = -1;
-
-function decoderError(fatal: boolean): number | never {
- if (fatal) {
- throw new TypeError("Decoder error.");
- }
- return 0xfffd; // default code point
-}
-
-function inRange(a: number, min: number, max: number): boolean {
- return min <= a && a <= max;
-}
-
-function isASCIIByte(a: number): boolean {
- return inRange(a, 0x00, 0x7f);
-}
-
-function stringToCodePoints(input: string): number[] {
- const u: number[] = [];
- for (const c of input) {
- u.push(c.codePointAt(0)!);
- }
- return u;
-}
-
-class UTF8Encoder implements Encoder {
- handler(codePoint: number): "finished" | number[] {
- if (codePoint === END_OF_STREAM) {
- return "finished";
- }
-
- if (inRange(codePoint, 0x00, 0x7f)) {
- return [codePoint];
- }
-
- let count: number;
- let offset: number;
- if (inRange(codePoint, 0x0080, 0x07ff)) {
- count = 1;
- offset = 0xc0;
- } else if (inRange(codePoint, 0x0800, 0xffff)) {
- count = 2;
- offset = 0xe0;
- } else if (inRange(codePoint, 0x10000, 0x10ffff)) {
- count = 3;
- offset = 0xf0;
- } else {
- throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`);
- }
-
- const bytes = [(codePoint >> (6 * count)) + offset];
-
- while (count > 0) {
- const temp = codePoint >> (6 * (count - 1));
- bytes.push(0x80 | (temp & 0x3f));
- count--;
- }
-
- return bytes;
- }
-}
-
-export function atob(s: string): string {
- s = String(s);
- s = s.replace(/[\t\n\f\r ]/g, "");
-
- if (s.length % 4 === 0) {
- s = s.replace(/==?$/, "");
- }
-
- const rem = s.length % 4;
- if (rem === 1 || /[^+/0-9A-Za-z]/.test(s)) {
- throw new DOMException(
- "The string to be decoded is not correctly encoded",
- "DataDecodeError",
- );
- }
-
- // base64-js requires length exactly times of 4
- if (rem > 0) {
- s = s.padEnd(s.length + (4 - rem), "=");
- }
-
- const byteArray: Uint8Array = base64.toByteArray(s);
- let result = "";
- for (let i = 0; i < byteArray.length; i++) {
- result += String.fromCharCode(byteArray[i]);
- }
- return result;
-}
-
-export function btoa(s: string): string {
- const byteArray = [];
- for (let i = 0; i < s.length; i++) {
- const charCode = s[i].charCodeAt(0);
- if (charCode > 0xff) {
- throw new TypeError(
- "The string to be encoded contains characters " +
- "outside of the Latin1 range.",
- );
- }
- byteArray.push(charCode);
- }
- const result = base64.fromByteArray(Uint8Array.from(byteArray));
- return result;
-}
-
-interface DecoderOptions {
- fatal?: boolean;
- ignoreBOM?: boolean;
-}
-
-interface Decoder {
- handler(stream: Stream, byte: number): number | null;
-}
-
-interface Encoder {
- handler(codePoint: number): "finished" | number[];
-}
-
-class SingleByteDecoder implements Decoder {
- readonly #index: number[];
- readonly #fatal: boolean;
-
- constructor(
- index: number[],
- { ignoreBOM = false, fatal = false }: DecoderOptions = {},
- ) {
- if (ignoreBOM) {
- throw new TypeError("Ignoring the BOM is available only with utf-8.");
- }
- this.#fatal = fatal;
- this.#index = index;
- }
- handler(_stream: Stream, byte: number): number {
- if (byte === END_OF_STREAM) {
- return FINISHED;
- }
- if (isASCIIByte(byte)) {
- return byte;
- }
- const codePoint = this.#index[byte - 0x80];
-
- if (codePoint == null) {
- return decoderError(this.#fatal);
- }
-
- return codePoint;
- }
-}
-
-// The encodingMap is a hash of labels that are indexed by the conical
-// encoding.
-const encodingMap: { [key: string]: string[] } = {
- "windows-1252": [
- "ansi_x3.4-1968",
- "ascii",
- "cp1252",
- "cp819",
- "csisolatin1",
- "ibm819",
- "iso-8859-1",
- "iso-ir-100",
- "iso8859-1",
- "iso88591",
- "iso_8859-1",
- "iso_8859-1:1987",
- "l1",
- "latin1",
- "us-ascii",
- "windows-1252",
- "x-cp1252",
- ],
- "utf-8": ["unicode-1-1-utf-8", "utf-8", "utf8"],
-};
-// We convert these into a Map where every label resolves to its canonical
-// encoding type.
-const encodings = new Map<string, string>();
-for (const key of Object.keys(encodingMap)) {
- const labels = encodingMap[key];
- for (const label of labels) {
- encodings.set(label, key);
- }
-}
-
-// A map of functions that return new instances of a decoder indexed by the
-// encoding type.
-const decoders = new Map<string, (options: DecoderOptions) => Decoder>();
-
-// Single byte decoders are an array of code point lookups
-const encodingIndexes = new Map<string, number[]>();
-// deno-fmt-ignore
-encodingIndexes.set("windows-1252", [
- 8364,
- 129,
- 8218,
- 402,
- 8222,
- 8230,
- 8224,
- 8225,
- 710,
- 8240,
- 352,
- 8249,
- 338,
- 141,
- 381,
- 143,
- 144,
- 8216,
- 8217,
- 8220,
- 8221,
- 8226,
- 8211,
- 8212,
- 732,
- 8482,
- 353,
- 8250,
- 339,
- 157,
- 382,
- 376,
- 160,
- 161,
- 162,
- 163,
- 164,
- 165,
- 166,
- 167,
- 168,
- 169,
- 170,
- 171,
- 172,
- 173,
- 174,
- 175,
- 176,
- 177,
- 178,
- 179,
- 180,
- 181,
- 182,
- 183,
- 184,
- 185,
- 186,
- 187,
- 188,
- 189,
- 190,
- 191,
- 192,
- 193,
- 194,
- 195,
- 196,
- 197,
- 198,
- 199,
- 200,
- 201,
- 202,
- 203,
- 204,
- 205,
- 206,
- 207,
- 208,
- 209,
- 210,
- 211,
- 212,
- 213,
- 214,
- 215,
- 216,
- 217,
- 218,
- 219,
- 220,
- 221,
- 222,
- 223,
- 224,
- 225,
- 226,
- 227,
- 228,
- 229,
- 230,
- 231,
- 232,
- 233,
- 234,
- 235,
- 236,
- 237,
- 238,
- 239,
- 240,
- 241,
- 242,
- 243,
- 244,
- 245,
- 246,
- 247,
- 248,
- 249,
- 250,
- 251,
- 252,
- 253,
- 254,
- 255,
-]);
-for (const [key, index] of encodingIndexes) {
- decoders.set(
- key,
- (options: DecoderOptions): SingleByteDecoder => {
- return new SingleByteDecoder(index, options);
- },
- );
-}
-
-function codePointsToString(codePoints: number[]): string {
- let s = "";
- for (const cp of codePoints) {
- s += String.fromCodePoint(cp);
- }
- return s;
-}
-
-class Stream {
- #tokens: number[];
- constructor(tokens: number[] | Uint8Array) {
- this.#tokens = [...tokens];
- this.#tokens.reverse();
- }
-
- endOfStream(): boolean {
- return !this.#tokens.length;
- }
-
- read(): number {
- return !this.#tokens.length ? END_OF_STREAM : this.#tokens.pop()!;
- }
-
- prepend(token: number | number[]): void {
- if (Array.isArray(token)) {
- while (token.length) {
- this.#tokens.push(token.pop()!);
- }
- } else {
- this.#tokens.push(token);
- }
- }
-
- push(token: number | number[]): void {
- if (Array.isArray(token)) {
- while (token.length) {
- this.#tokens.unshift(token.shift()!);
- }
- } else {
- this.#tokens.unshift(token);
- }
- }
-}
-
-export interface TextDecodeOptions {
- stream?: false;
-}
-
-export interface TextDecoderOptions {
- fatal?: boolean;
- ignoreBOM?: boolean;
-}
-
-type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer;
-
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-function isEitherArrayBuffer(x: any): x is EitherArrayBuffer {
- return x instanceof SharedArrayBuffer || x instanceof ArrayBuffer;
-}
-
-export class TextDecoder {
- readonly #encoding: string;
-
- get encoding(): string {
- return this.#encoding;
- }
- readonly fatal: boolean = false;
- readonly ignoreBOM: boolean = false;
-
- constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) {
- if (options.ignoreBOM) {
- this.ignoreBOM = true;
- }
- if (options.fatal) {
- this.fatal = true;
- }
- label = String(label).trim().toLowerCase();
- const encoding = encodings.get(label);
- if (!encoding) {
- throw new RangeError(
- `The encoding label provided ('${label}') is invalid.`,
- );
- }
- if (!decoders.has(encoding) && encoding !== "utf-8") {
- throw new TypeError(`Internal decoder ('${encoding}') not found.`);
- }
- this.#encoding = encoding;
- }
-
- decode(
- input?: BufferSource,
- options: TextDecodeOptions = { stream: false },
- ): string {
- if (options.stream) {
- throw new TypeError("Stream not supported.");
- }
-
- let bytes: Uint8Array;
- if (input instanceof Uint8Array) {
- bytes = input;
- } else if (isEitherArrayBuffer(input)) {
- bytes = new Uint8Array(input);
- } else if (
- typeof input === "object" &&
- "buffer" in input &&
- isEitherArrayBuffer(input.buffer)
- ) {
- bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
- } else {
- bytes = new Uint8Array(0);
- }
-
- // For simple utf-8 decoding "Deno.core.decode" can be used for performance
- if (
- this.#encoding === "utf-8" &&
- this.fatal === false &&
- this.ignoreBOM === false
- ) {
- return core.decode(bytes);
- }
-
- // For performance reasons we utilise a highly optimised decoder instead of
- // the general decoder.
- if (this.#encoding === "utf-8") {
- return decodeUtf8(bytes, this.fatal, this.ignoreBOM);
- }
-
- const decoder = decoders.get(this.#encoding)!({
- fatal: this.fatal,
- ignoreBOM: this.ignoreBOM,
- });
- const inputStream = new Stream(bytes);
- const output: number[] = [];
-
- while (true) {
- const result = decoder.handler(inputStream, inputStream.read());
- if (result === FINISHED) {
- break;
- }
-
- if (result !== CONTINUE) {
- output.push(result);
- }
- }
-
- if (output.length > 0 && output[0] === 0xfeff) {
- output.shift();
- }
-
- return codePointsToString(output);
- }
-
- get [Symbol.toStringTag](): string {
- return "TextDecoder";
- }
-}
-
-interface TextEncoderEncodeIntoResult {
- read: number;
- written: number;
-}
-
-export class TextEncoder {
- readonly encoding = "utf-8";
- encode(input = ""): Uint8Array {
- // Deno.core.encode() provides very efficient utf-8 encoding
- if (this.encoding === "utf-8") {
- return core.encode(input);
- }
-
- const encoder = new UTF8Encoder();
- const inputStream = new Stream(stringToCodePoints(input));
- const output: number[] = [];
-
- while (true) {
- const result = encoder.handler(inputStream.read());
- if (result === "finished") {
- break;
- }
- output.push(...result);
- }
-
- return new Uint8Array(output);
- }
- encodeInto(input: string, dest: Uint8Array): TextEncoderEncodeIntoResult {
- const encoder = new UTF8Encoder();
- const inputStream = new Stream(stringToCodePoints(input));
-
- let written = 0;
- let read = 0;
- while (true) {
- const result = encoder.handler(inputStream.read());
- if (result === "finished") {
- break;
- }
- if (dest.length - written >= result.length) {
- read++;
- dest.set(result, written);
- written += result.length;
- if (result.length > 3) {
- // increment read a second time if greater than U+FFFF
- read++;
- }
- } else {
- break;
- }
- }
-
- return {
- read,
- written,
- };
- }
- get [Symbol.toStringTag](): string {
- return "TextEncoder";
- }
-}