diff options
Diffstat (limited to 'js/text_encoding.ts')
-rw-r--r-- | js/text_encoding.ts | 330 |
1 files changed, 312 insertions, 18 deletions
diff --git a/js/text_encoding.ts b/js/text_encoding.ts index 72a355d5f..7cd324c72 100644 --- a/js/text_encoding.ts +++ b/js/text_encoding.ts @@ -1,5 +1,29 @@ -// Copyright 2018 the Deno authors. All rights reserved. MIT license. +// The following code is based off of text-encoding at: +// https://github.com/inexorabletash/text-encoding +// +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. +// +// In jurisdictions that recognize copyright laws, the author or authors +// of this software dedicate any and all copyright interest in the +// software to the public domain. We make this dedication for the benefit +// of the public at large and to the detriment of our heirs and +// successors. We intend this dedication to be an overt act of +// relinquishment in perpetuity of all present and future rights to this +// software under copyright law. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + import * as base64 from "base64-js"; +import * as domTypes from "./dom_types"; import { DenoError, ErrorKind } from "./errors"; /** Decodes a string of data which has been encoded using base-64. */ @@ -43,29 +67,299 @@ export function btoa(s: string): string { return result; } -// @types/text-encoding relies on lib.dom.d.ts for some interfaces. We do not -// want to include lib.dom.d.ts (due to size) into deno's global type scope. -// Therefore this hack: add a few of the missing interfaces in -// @types/text-encoding to the global scope before importing. +interface Decoder { + handler(stream: Stream, byte: number): number | number[] | null; +} + +interface Encoder { + handler(codePoint: number): number | number[]; +} + +const CONTINUE = null; +const END_OF_STREAM = -1; +const FINISHED = -1; + +function codePointsToString(codePoints: number[]): string { + let s = ""; + for (const cp of codePoints) { + s += String.fromCodePoint(cp); + } + return s; +} + +function decoderError(fatal: boolean): number | never { + if (fatal) { + throw new TypeError("Decoder error."); + } + return 0xfffd; // default code point +} + +function inRange(a: number, min: number, max: number) { + return min <= a && a <= max; +} + +function stringToCodePoints(input: string): number[] { + const u: number[] = []; + for (const c of input) { + u.push(c.codePointAt(0)!); + } + return u; +} + +class Stream { + private _tokens: number[]; + constructor(tokens: number[] | Uint8Array) { + this._tokens = [].slice.call(tokens); + this._tokens.reverse(); + } + + endOfStream(): boolean { + return !this._tokens.length; + } + + read(): number { + return !this._tokens.length ? END_OF_STREAM : this._tokens.pop()!; + } + + prepend(token: number | number[]): void { + if (Array.isArray(token)) { + while (token.length) { + this._tokens.push(token.pop()!); + } + } else { + this._tokens.push(token); + } + } + + push(token: number | number[]): void { + if (Array.isArray(token)) { + while (token.length) { + this._tokens.unshift(token.shift()!); + } + } else { + this._tokens.unshift(token); + } + } +} + +class UTF8Decoder implements Decoder { + private _codePoint = 0; + private _bytesSeen = 0; + private _bytesNeeded = 0; + private _fatal: boolean; + private _lowerBoundary = 0x80; + private _upperBoundary = 0xbf; + + constructor(options = { fatal: false }) { + this._fatal = options.fatal; + } + + handler(stream: Stream, byte: number): number | null { + if (byte === END_OF_STREAM && this._bytesNeeded !== 0) { + this._bytesNeeded = 0; + return decoderError(this._fatal); + } + + if (byte === END_OF_STREAM) { + return FINISHED; + } + + if (this._bytesNeeded === 0) { + if (inRange(byte, 0x00, 0x7f)) { + // Single byte code point + return byte; + } else if (inRange(byte, 0xc2, 0xdf)) { + // Two byte code point + this._bytesNeeded = 1; + this._codePoint = byte & 0x1f; + } else if (inRange(byte, 0xe0, 0xef)) { + // Three byte code point + if (byte === 0xe0) { + this._lowerBoundary = 0xa0; + } else if (byte === 0xed) { + this._upperBoundary = 0x9f; + } + this._bytesNeeded = 2; + this._codePoint = byte & 0xf; + } else if (inRange(byte, 0xf0, 0xf4)) { + if (byte === 0xf0) { + this._lowerBoundary = 0x90; + } else if (byte === 0xf4) { + this._upperBoundary = 0x8f; + } + this._bytesNeeded = 3; + this._codePoint = byte & 0x7; + } else { + return decoderError(this._fatal); + } + return CONTINUE; + } + + if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) { + // Byte out of range, so encoding error + this._codePoint = 0; + this._bytesNeeded = 0; + this._bytesSeen = 0; + stream.prepend(byte); + return decoderError(this._fatal); + } + + this._lowerBoundary = 0x80; + this._upperBoundary = 0xbf; + + this._codePoint = (this._codePoint << 6) | (byte & 0x3f); + + this._bytesSeen++; + + if (this._bytesSeen !== this._bytesNeeded) { + return CONTINUE; + } + + const codePoint = this._codePoint; + + this._codePoint = 0; + this._bytesNeeded = 0; + this._bytesSeen = 0; + + return codePoint; + } +} + +class UTF8Encoder implements Encoder { + handler(codePoint: number): number | number[] { + if (codePoint === END_OF_STREAM) { + return FINISHED; + } -declare global { - type BufferSource = ArrayBufferView | ArrayBuffer; + if (inRange(codePoint, 0x00, 0x7f)) { + return codePoint; + } + + let count: number; + let offset: number; + if (inRange(codePoint, 0x0080, 0x07ff)) { + count = 1; + offset = 0xc0; + } else if (inRange(codePoint, 0x0800, 0xffff)) { + count = 2; + offset = 0xe0; + } else if (inRange(codePoint, 0x10000, 0x10ffff)) { + count = 3; + offset = 0xf0; + } else { + throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`); + } + + const bytes = [(codePoint >> (6 * count)) + offset]; + + while (count > 0) { + const temp = codePoint >> (6 * (count - 1)); + bytes.push(0x80 | (temp & 0x3f)); + count--; + } - interface TextDecodeOptions { - stream?: boolean; + return bytes; } +} - interface TextDecoderOptions { - fatal?: boolean; - ignoreBOM?: boolean; +export interface TextDecodeOptions { + stream?: false; +} + +export interface TextDecoderOptions { + fatal?: boolean; + ignoreBOM?: false; +} + +export class TextDecoder { + /** Returns encoding's name, lowercased. */ + readonly encoding = "utf-8"; + /** Returns `true` if error mode is "fatal", and `false` otherwise. */ + readonly fatal: boolean = false; + /** Returns `true` if ignore BOM flag is set, and `false` otherwise. */ + readonly ignoreBOM = false; + + constructor( + label: "utf-8" = "utf-8", + options: TextDecoderOptions = { fatal: false } + ) { + if (label !== "utf-8") { + throw new TypeError("Only UTF8 decoding supported."); + } + if (options.ignoreBOM) { + throw new TypeError("Ignoring the BOM not supported."); + } + if (options.fatal) { + this.fatal = true; + } } - interface TextDecoder { - readonly encoding: string; - readonly fatal: boolean; - readonly ignoreBOM: boolean; - decode(input?: BufferSource, options?: TextDecodeOptions): string; + /** Returns the result of running encoding's decoder. */ + decode( + input?: domTypes.BufferSource, + options: TextDecodeOptions = { stream: false } + ): string { + if (options.stream) { + throw new TypeError("Stream not supported."); + } + + let bytes: Uint8Array; + if (typeof input === "object" && input instanceof ArrayBuffer) { + bytes = new Uint8Array(input); + } else if ( + typeof input === "object" && + "buffer" in input && + input.buffer instanceof ArrayBuffer + ) { + bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength); + } else { + bytes = new Uint8Array(0); + } + + const decoder = new UTF8Decoder({ fatal: this.fatal }); + const inputStream = new Stream(bytes); + const output: number[] = []; + + while (true) { + const result = decoder.handler(inputStream, inputStream.read()); + if (result === FINISHED) { + break; + } + + if (result !== CONTINUE) { + output.push(result); + } + } + + if (output.length > 0 && output[0] === 0xfeff) { + output.shift(); + } + + return codePointsToString(output); } } -export { TextEncoder, TextDecoder } from "text-encoding"; +export class TextEncoder { + /** Returns "utf-8". */ + readonly encoding = "utf-8"; + /** Returns the result of running UTF-8's encoder. */ + encode(input = ""): Uint8Array { + const encoder = new UTF8Encoder(); + const inputStream = new Stream(stringToCodePoints(input)); + const output: number[] = []; + + while (true) { + const result = encoder.handler(inputStream.read()); + if (result === FINISHED) { + break; + } + if (Array.isArray(result)) { + output.push.apply(output, result); + } else { + output.push(result); + } + } + + return new Uint8Array(output); + } +} |