summaryrefslogtreecommitdiff
path: root/std/encoding/csv.ts
diff options
context:
space:
mode:
Diffstat (limited to 'std/encoding/csv.ts')
-rw-r--r--std/encoding/csv.ts462
1 files changed, 0 insertions, 462 deletions
diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts
deleted file mode 100644
index bda28c1d9..000000000
--- a/std/encoding/csv.ts
+++ /dev/null
@@ -1,462 +0,0 @@
-// Ported from Go:
-// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
-// Copyright 2011 The Go Authors. All rights reserved. BSD license.
-// https://github.com/golang/go/blob/master/LICENSE
-// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
-
-import { BufReader } from "../io/bufio.ts";
-import { TextProtoReader } from "../textproto/mod.ts";
-import { StringReader } from "../io/readers.ts";
-import { assert } from "../_util/assert.ts";
-
-export { NEWLINE, stringify, StringifyError } from "./csv_stringify.ts";
-
-export type {
- Column,
- ColumnDetails,
- DataItem,
- StringifyOptions,
-} from "./csv_stringify.ts";
-
-const INVALID_RUNE = ["\r", "\n", '"'];
-
-export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';
-export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
-export const ERR_INVALID_DELIM = "Invalid Delimiter";
-export const ERR_FIELD_COUNT = "wrong number of fields";
-
-/**
- * A ParseError is returned for parsing errors.
- * Line numbers are 1-indexed and columns are 0-indexed.
- */
-export class ParseError extends Error {
- /** Line where the record starts*/
- startLine: number;
- /** Line where the error occurred */
- line: number;
- /** Column (rune index) where the error occurred */
- column: number | null;
-
- constructor(
- start: number,
- line: number,
- column: number | null,
- message: string,
- ) {
- super();
- this.startLine = start;
- this.column = column;
- this.line = line;
-
- if (message === ERR_FIELD_COUNT) {
- this.message = `record on line ${line}: ${message}`;
- } else if (start !== line) {
- this.message =
- `record on line ${start}; parse error on line ${line}, column ${column}: ${message}`;
- } else {
- this.message =
- `parse error on line ${line}, column ${column}: ${message}`;
- }
- }
-}
-
-/**
- * @property separator - Character which separates values. Default: ','
- * @property comment - Character to start a comment. Default: '#'
- * @property trimLeadingSpace - Flag to trim the leading space of the value.
- * Default: 'false'
- * @property lazyQuotes - Allow unquoted quote in a quoted field or non double
- * quoted quotes in quoted field. Default: 'false'
- * @property fieldsPerRecord - Enabling the check of fields for each row.
- * If == 0, first row is used as referral for the number of fields.
- */
-export interface ReadOptions {
- separator?: string;
- comment?: string;
- trimLeadingSpace?: boolean;
- lazyQuotes?: boolean;
- fieldsPerRecord?: number;
-}
-
-function chkOptions(opt: ReadOptions): void {
- if (!opt.separator) {
- opt.separator = ",";
- }
- if (!opt.trimLeadingSpace) {
- opt.trimLeadingSpace = false;
- }
- if (
- INVALID_RUNE.includes(opt.separator) ||
- (typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
- opt.separator === opt.comment
- ) {
- throw new Error(ERR_INVALID_DELIM);
- }
-}
-
-async function readRecord(
- startLine: number,
- reader: BufReader,
- opt: ReadOptions = { separator: ",", trimLeadingSpace: false },
-): Promise<string[] | null> {
- const tp = new TextProtoReader(reader);
- let line = await readLine(tp);
- let lineIndex = startLine + 1;
-
- if (line === null) return null;
- if (line.length === 0) {
- return [];
- }
- // line starting with comment character is ignored
- if (opt.comment && line[0] === opt.comment) {
- return [];
- }
-
- assert(opt.separator != null);
-
- let fullLine = line;
- let quoteError: ParseError | null = null;
- const quote = '"';
- const quoteLen = quote.length;
- const separatorLen = opt.separator.length;
- let recordBuffer = "";
- const fieldIndexes = [] as number[];
- parseField:
- for (;;) {
- if (opt.trimLeadingSpace) {
- line = line.trimLeft();
- }
-
- if (line.length === 0 || !line.startsWith(quote)) {
- // Non-quoted string field
- const i = line.indexOf(opt.separator);
- let field = line;
- if (i >= 0) {
- field = field.substring(0, i);
- }
- // Check to make sure a quote does not appear in field.
- if (!opt.lazyQuotes) {
- const j = field.indexOf(quote);
- if (j >= 0) {
- const col = runeCount(
- fullLine.slice(0, fullLine.length - line.slice(j).length),
- );
- quoteError = new ParseError(
- startLine + 1,
- lineIndex,
- col,
- ERR_BARE_QUOTE,
- );
- break parseField;
- }
- }
- recordBuffer += field;
- fieldIndexes.push(recordBuffer.length);
- if (i >= 0) {
- line = line.substring(i + separatorLen);
- continue parseField;
- }
- break parseField;
- } else {
- // Quoted string field
- line = line.substring(quoteLen);
- for (;;) {
- const i = line.indexOf(quote);
- if (i >= 0) {
- // Hit next quote.
- recordBuffer += line.substring(0, i);
- line = line.substring(i + quoteLen);
- if (line.startsWith(quote)) {
- // `""` sequence (append quote).
- recordBuffer += quote;
- line = line.substring(quoteLen);
- } else if (line.startsWith(opt.separator)) {
- // `","` sequence (end of field).
- line = line.substring(separatorLen);
- fieldIndexes.push(recordBuffer.length);
- continue parseField;
- } else if (0 === line.length) {
- // `"\n` sequence (end of line).
- fieldIndexes.push(recordBuffer.length);
- break parseField;
- } else if (opt.lazyQuotes) {
- // `"` sequence (bare quote).
- recordBuffer += quote;
- } else {
- // `"*` sequence (invalid non-escaped quote).
- const col = runeCount(
- fullLine.slice(0, fullLine.length - line.length - quoteLen),
- );
- quoteError = new ParseError(
- startLine + 1,
- lineIndex,
- col,
- ERR_QUOTE,
- );
- break parseField;
- }
- } else if (line.length > 0 || !(await isEOF(tp))) {
- // Hit end of line (copy all data so far).
- recordBuffer += line;
- const r = await readLine(tp);
- lineIndex++;
- line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
- fullLine = line;
- if (r === null) {
- // Abrupt end of file (EOF or error).
- if (!opt.lazyQuotes) {
- const col = runeCount(fullLine);
- quoteError = new ParseError(
- startLine + 1,
- lineIndex,
- col,
- ERR_QUOTE,
- );
- break parseField;
- }
- fieldIndexes.push(recordBuffer.length);
- break parseField;
- }
- recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
- } else {
- // Abrupt end of file (EOF on error).
- if (!opt.lazyQuotes) {
- const col = runeCount(fullLine);
- quoteError = new ParseError(
- startLine + 1,
- lineIndex,
- col,
- ERR_QUOTE,
- );
- break parseField;
- }
- fieldIndexes.push(recordBuffer.length);
- break parseField;
- }
- }
- }
- }
- if (quoteError) {
- throw quoteError;
- }
- const result = [] as string[];
- let preIdx = 0;
- for (const i of fieldIndexes) {
- result.push(recordBuffer.slice(preIdx, i));
- preIdx = i;
- }
- return result;
-}
-
-async function isEOF(tp: TextProtoReader): Promise<boolean> {
- return (await tp.r.peek(0)) === null;
-}
-
-function runeCount(s: string): number {
- // Array.from considers the surrogate pair.
- return Array.from(s).length;
-}
-
-async function readLine(tp: TextProtoReader): Promise<string | null> {
- let line: string;
- const r = await tp.readLine();
- if (r === null) return null;
- line = r;
-
- // For backwards compatibility, drop trailing \r before EOF.
- if ((await isEOF(tp)) && line.length > 0 && line[line.length - 1] === "\r") {
- line = line.substring(0, line.length - 1);
- }
-
- // Normalize \r\n to \n on all input lines.
- if (
- line.length >= 2 &&
- line[line.length - 2] === "\r" &&
- line[line.length - 1] === "\n"
- ) {
- line = line.substring(0, line.length - 2);
- line = line + "\n";
- }
-
- return line;
-}
-
-/**
- * Parse the CSV from the `reader` with the options provided and return `string[][]`.
- *
- * @param reader provides the CSV data to parse
- * @param opt controls the parsing behavior
- */
-export async function readMatrix(
- reader: BufReader,
- opt: ReadOptions = {
- separator: ",",
- trimLeadingSpace: false,
- lazyQuotes: false,
- },
-): Promise<string[][]> {
- const result: string[][] = [];
- let _nbFields: number | undefined;
- let lineResult: string[];
- let first = true;
- let lineIndex = 0;
- chkOptions(opt);
-
- for (;;) {
- const r = await readRecord(lineIndex, reader, opt);
- if (r === null) break;
- lineResult = r;
- lineIndex++;
- // If fieldsPerRecord is 0, Read sets it to
- // the number of fields in the first record
- if (first) {
- first = false;
- if (opt.fieldsPerRecord !== undefined) {
- if (opt.fieldsPerRecord === 0) {
- _nbFields = lineResult.length;
- } else {
- _nbFields = opt.fieldsPerRecord;
- }
- }
- }
-
- if (lineResult.length > 0) {
- if (_nbFields && _nbFields !== lineResult.length) {
- throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT);
- }
- result.push(lineResult);
- }
- }
- return result;
-}
-
-/**
- * Parse the CSV string/buffer with the options provided.
- *
- * ColumnOptions provides the column definition
- * and the parse function for each entry of the
- * column.
- */
-export interface ColumnOptions {
- /**
- * Name of the column to be used as property
- */
- name: string;
- /**
- * Parse function for the column.
- * This is executed on each entry of the header.
- * This can be combined with the Parse function of the rows.
- */
- parse?: (input: string) => unknown;
-}
-
-export interface ParseOptions extends ReadOptions {
- /**
- * If you provide `skipFirstRow: true` and `columns`, the first line will be skipped.
- * If you provide `skipFirstRow: true` but not `columns`, the first line will be skipped and used as header definitions.
- */
- skipFirstRow?: boolean;
-
- /**
- * If you provide `string[]` or `ColumnOptions[]`, those names will be used for header definition.
- */
- columns?: string[] | ColumnOptions[];
-
- /** Parse function for rows.
- * Example:
- * const r = await parseFile('a,b,c\ne,f,g\n', {
- * columns: ["this", "is", "sparta"],
- * parse: (e: Record<string, unknown>) => {
- * return { super: e.this, street: e.is, fighter: e.sparta };
- * }
- * });
- * // output
- * [
- * { super: "a", street: "b", fighter: "c" },
- * { super: "e", street: "f", fighter: "g" }
- * ]
- */
- parse?: (input: unknown) => unknown;
-}
-
-/**
- * Csv parse helper to manipulate data.
- * Provides an auto/custom mapper for columns and parse function
- * for columns and rows.
- * @param input Input to parse. Can be a string or BufReader.
- * @param opt options of the parser.
- * @returns If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it returns `string[][]`.
- * If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it returns `object[]`.
- * If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`.
- */
-export async function parse(
- input: string | BufReader,
- opt: ParseOptions = {
- skipFirstRow: false,
- },
-): Promise<unknown[]> {
- let r: string[][];
- if (input instanceof BufReader) {
- r = await readMatrix(input, opt);
- } else {
- r = await readMatrix(new BufReader(new StringReader(input)), opt);
- }
- if (opt.skipFirstRow || opt.columns) {
- let headers: ColumnOptions[] = [];
- let i = 0;
-
- if (opt.skipFirstRow) {
- const head = r.shift();
- assert(head != null);
- headers = head.map(
- (e): ColumnOptions => {
- return {
- name: e,
- };
- },
- );
- i++;
- }
-
- if (opt.columns) {
- if (typeof opt.columns[0] !== "string") {
- headers = opt.columns as ColumnOptions[];
- } else {
- const h = opt.columns as string[];
- headers = h.map(
- (e): ColumnOptions => {
- return {
- name: e,
- };
- },
- );
- }
- }
- return r.map((e): unknown => {
- if (e.length !== headers.length) {
- throw `Error number of fields line:${i}`;
- }
- i++;
- const out: Record<string, unknown> = {};
- for (let j = 0; j < e.length; j++) {
- const h = headers[j];
- if (h.parse) {
- out[h.name] = h.parse(e[j]);
- } else {
- out[h.name] = e[j];
- }
- }
- if (opt.parse) {
- return opt.parse(out);
- }
- return out;
- });
- }
- if (opt.parse) {
- return r.map((e: string[]): unknown => {
- assert(opt.parse, "opt.parse must be set");
- return opt.parse(e);
- });
- }
- return r;
-}