diff options
Diffstat (limited to 'std/encoding/csv.ts')
-rw-r--r-- | std/encoding/csv.ts | 462 |
1 files changed, 0 insertions, 462 deletions
diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts deleted file mode 100644 index bda28c1d9..000000000 --- a/std/encoding/csv.ts +++ /dev/null @@ -1,462 +0,0 @@ -// Ported from Go: -// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/ -// Copyright 2011 The Go Authors. All rights reserved. BSD license. -// https://github.com/golang/go/blob/master/LICENSE -// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license. - -import { BufReader } from "../io/bufio.ts"; -import { TextProtoReader } from "../textproto/mod.ts"; -import { StringReader } from "../io/readers.ts"; -import { assert } from "../_util/assert.ts"; - -export { NEWLINE, stringify, StringifyError } from "./csv_stringify.ts"; - -export type { - Column, - ColumnDetails, - DataItem, - StringifyOptions, -} from "./csv_stringify.ts"; - -const INVALID_RUNE = ["\r", "\n", '"']; - -export const ERR_BARE_QUOTE = 'bare " in non-quoted-field'; -export const ERR_QUOTE = 'extraneous or missing " in quoted-field'; -export const ERR_INVALID_DELIM = "Invalid Delimiter"; -export const ERR_FIELD_COUNT = "wrong number of fields"; - -/** - * A ParseError is returned for parsing errors. - * Line numbers are 1-indexed and columns are 0-indexed. - */ -export class ParseError extends Error { - /** Line where the record starts*/ - startLine: number; - /** Line where the error occurred */ - line: number; - /** Column (rune index) where the error occurred */ - column: number | null; - - constructor( - start: number, - line: number, - column: number | null, - message: string, - ) { - super(); - this.startLine = start; - this.column = column; - this.line = line; - - if (message === ERR_FIELD_COUNT) { - this.message = `record on line ${line}: ${message}`; - } else if (start !== line) { - this.message = - `record on line ${start}; parse error on line ${line}, column ${column}: ${message}`; - } else { - this.message = - `parse error on line ${line}, column ${column}: ${message}`; - } - } -} - -/** - * @property separator - Character which separates values. Default: ',' - * @property comment - Character to start a comment. Default: '#' - * @property trimLeadingSpace - Flag to trim the leading space of the value. - * Default: 'false' - * @property lazyQuotes - Allow unquoted quote in a quoted field or non double - * quoted quotes in quoted field. Default: 'false' - * @property fieldsPerRecord - Enabling the check of fields for each row. - * If == 0, first row is used as referral for the number of fields. - */ -export interface ReadOptions { - separator?: string; - comment?: string; - trimLeadingSpace?: boolean; - lazyQuotes?: boolean; - fieldsPerRecord?: number; -} - -function chkOptions(opt: ReadOptions): void { - if (!opt.separator) { - opt.separator = ","; - } - if (!opt.trimLeadingSpace) { - opt.trimLeadingSpace = false; - } - if ( - INVALID_RUNE.includes(opt.separator) || - (typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) || - opt.separator === opt.comment - ) { - throw new Error(ERR_INVALID_DELIM); - } -} - -async function readRecord( - startLine: number, - reader: BufReader, - opt: ReadOptions = { separator: ",", trimLeadingSpace: false }, -): Promise<string[] | null> { - const tp = new TextProtoReader(reader); - let line = await readLine(tp); - let lineIndex = startLine + 1; - - if (line === null) return null; - if (line.length === 0) { - return []; - } - // line starting with comment character is ignored - if (opt.comment && line[0] === opt.comment) { - return []; - } - - assert(opt.separator != null); - - let fullLine = line; - let quoteError: ParseError | null = null; - const quote = '"'; - const quoteLen = quote.length; - const separatorLen = opt.separator.length; - let recordBuffer = ""; - const fieldIndexes = [] as number[]; - parseField: - for (;;) { - if (opt.trimLeadingSpace) { - line = line.trimLeft(); - } - - if (line.length === 0 || !line.startsWith(quote)) { - // Non-quoted string field - const i = line.indexOf(opt.separator); - let field = line; - if (i >= 0) { - field = field.substring(0, i); - } - // Check to make sure a quote does not appear in field. - if (!opt.lazyQuotes) { - const j = field.indexOf(quote); - if (j >= 0) { - const col = runeCount( - fullLine.slice(0, fullLine.length - line.slice(j).length), - ); - quoteError = new ParseError( - startLine + 1, - lineIndex, - col, - ERR_BARE_QUOTE, - ); - break parseField; - } - } - recordBuffer += field; - fieldIndexes.push(recordBuffer.length); - if (i >= 0) { - line = line.substring(i + separatorLen); - continue parseField; - } - break parseField; - } else { - // Quoted string field - line = line.substring(quoteLen); - for (;;) { - const i = line.indexOf(quote); - if (i >= 0) { - // Hit next quote. - recordBuffer += line.substring(0, i); - line = line.substring(i + quoteLen); - if (line.startsWith(quote)) { - // `""` sequence (append quote). - recordBuffer += quote; - line = line.substring(quoteLen); - } else if (line.startsWith(opt.separator)) { - // `","` sequence (end of field). - line = line.substring(separatorLen); - fieldIndexes.push(recordBuffer.length); - continue parseField; - } else if (0 === line.length) { - // `"\n` sequence (end of line). - fieldIndexes.push(recordBuffer.length); - break parseField; - } else if (opt.lazyQuotes) { - // `"` sequence (bare quote). - recordBuffer += quote; - } else { - // `"*` sequence (invalid non-escaped quote). - const col = runeCount( - fullLine.slice(0, fullLine.length - line.length - quoteLen), - ); - quoteError = new ParseError( - startLine + 1, - lineIndex, - col, - ERR_QUOTE, - ); - break parseField; - } - } else if (line.length > 0 || !(await isEOF(tp))) { - // Hit end of line (copy all data so far). - recordBuffer += line; - const r = await readLine(tp); - lineIndex++; - line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go. - fullLine = line; - if (r === null) { - // Abrupt end of file (EOF or error). - if (!opt.lazyQuotes) { - const col = runeCount(fullLine); - quoteError = new ParseError( - startLine + 1, - lineIndex, - col, - ERR_QUOTE, - ); - break parseField; - } - fieldIndexes.push(recordBuffer.length); - break parseField; - } - recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.) - } else { - // Abrupt end of file (EOF on error). - if (!opt.lazyQuotes) { - const col = runeCount(fullLine); - quoteError = new ParseError( - startLine + 1, - lineIndex, - col, - ERR_QUOTE, - ); - break parseField; - } - fieldIndexes.push(recordBuffer.length); - break parseField; - } - } - } - } - if (quoteError) { - throw quoteError; - } - const result = [] as string[]; - let preIdx = 0; - for (const i of fieldIndexes) { - result.push(recordBuffer.slice(preIdx, i)); - preIdx = i; - } - return result; -} - -async function isEOF(tp: TextProtoReader): Promise<boolean> { - return (await tp.r.peek(0)) === null; -} - -function runeCount(s: string): number { - // Array.from considers the surrogate pair. - return Array.from(s).length; -} - -async function readLine(tp: TextProtoReader): Promise<string | null> { - let line: string; - const r = await tp.readLine(); - if (r === null) return null; - line = r; - - // For backwards compatibility, drop trailing \r before EOF. - if ((await isEOF(tp)) && line.length > 0 && line[line.length - 1] === "\r") { - line = line.substring(0, line.length - 1); - } - - // Normalize \r\n to \n on all input lines. - if ( - line.length >= 2 && - line[line.length - 2] === "\r" && - line[line.length - 1] === "\n" - ) { - line = line.substring(0, line.length - 2); - line = line + "\n"; - } - - return line; -} - -/** - * Parse the CSV from the `reader` with the options provided and return `string[][]`. - * - * @param reader provides the CSV data to parse - * @param opt controls the parsing behavior - */ -export async function readMatrix( - reader: BufReader, - opt: ReadOptions = { - separator: ",", - trimLeadingSpace: false, - lazyQuotes: false, - }, -): Promise<string[][]> { - const result: string[][] = []; - let _nbFields: number | undefined; - let lineResult: string[]; - let first = true; - let lineIndex = 0; - chkOptions(opt); - - for (;;) { - const r = await readRecord(lineIndex, reader, opt); - if (r === null) break; - lineResult = r; - lineIndex++; - // If fieldsPerRecord is 0, Read sets it to - // the number of fields in the first record - if (first) { - first = false; - if (opt.fieldsPerRecord !== undefined) { - if (opt.fieldsPerRecord === 0) { - _nbFields = lineResult.length; - } else { - _nbFields = opt.fieldsPerRecord; - } - } - } - - if (lineResult.length > 0) { - if (_nbFields && _nbFields !== lineResult.length) { - throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT); - } - result.push(lineResult); - } - } - return result; -} - -/** - * Parse the CSV string/buffer with the options provided. - * - * ColumnOptions provides the column definition - * and the parse function for each entry of the - * column. - */ -export interface ColumnOptions { - /** - * Name of the column to be used as property - */ - name: string; - /** - * Parse function for the column. - * This is executed on each entry of the header. - * This can be combined with the Parse function of the rows. - */ - parse?: (input: string) => unknown; -} - -export interface ParseOptions extends ReadOptions { - /** - * If you provide `skipFirstRow: true` and `columns`, the first line will be skipped. - * If you provide `skipFirstRow: true` but not `columns`, the first line will be skipped and used as header definitions. - */ - skipFirstRow?: boolean; - - /** - * If you provide `string[]` or `ColumnOptions[]`, those names will be used for header definition. - */ - columns?: string[] | ColumnOptions[]; - - /** Parse function for rows. - * Example: - * const r = await parseFile('a,b,c\ne,f,g\n', { - * columns: ["this", "is", "sparta"], - * parse: (e: Record<string, unknown>) => { - * return { super: e.this, street: e.is, fighter: e.sparta }; - * } - * }); - * // output - * [ - * { super: "a", street: "b", fighter: "c" }, - * { super: "e", street: "f", fighter: "g" } - * ] - */ - parse?: (input: unknown) => unknown; -} - -/** - * Csv parse helper to manipulate data. - * Provides an auto/custom mapper for columns and parse function - * for columns and rows. - * @param input Input to parse. Can be a string or BufReader. - * @param opt options of the parser. - * @returns If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it returns `string[][]`. - * If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it returns `object[]`. - * If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`. - */ -export async function parse( - input: string | BufReader, - opt: ParseOptions = { - skipFirstRow: false, - }, -): Promise<unknown[]> { - let r: string[][]; - if (input instanceof BufReader) { - r = await readMatrix(input, opt); - } else { - r = await readMatrix(new BufReader(new StringReader(input)), opt); - } - if (opt.skipFirstRow || opt.columns) { - let headers: ColumnOptions[] = []; - let i = 0; - - if (opt.skipFirstRow) { - const head = r.shift(); - assert(head != null); - headers = head.map( - (e): ColumnOptions => { - return { - name: e, - }; - }, - ); - i++; - } - - if (opt.columns) { - if (typeof opt.columns[0] !== "string") { - headers = opt.columns as ColumnOptions[]; - } else { - const h = opt.columns as string[]; - headers = h.map( - (e): ColumnOptions => { - return { - name: e, - }; - }, - ); - } - } - return r.map((e): unknown => { - if (e.length !== headers.length) { - throw `Error number of fields line:${i}`; - } - i++; - const out: Record<string, unknown> = {}; - for (let j = 0; j < e.length; j++) { - const h = headers[j]; - if (h.parse) { - out[h.name] = h.parse(e[j]); - } else { - out[h.name] = e[j]; - } - } - if (opt.parse) { - return opt.parse(out); - } - return out; - }); - } - if (opt.parse) { - return r.map((e: string[]): unknown => { - assert(opt.parse, "opt.parse must be set"); - return opt.parse(e); - }); - } - return r; -} |