summaryrefslogtreecommitdiff
path: root/std/encoding/csv.ts
diff options
context:
space:
mode:
authorRyan Dahl <ry@tinyclouds.org>2019-10-09 17:10:09 -0400
committerRyan Dahl <ry@tinyclouds.org>2019-10-09 17:10:09 -0400
commit151ce0266eb4de2c8fc600c81c192a5f791b6169 (patch)
tree7cb04016a1c7ee88adde83814548d7a9409dcde3 /std/encoding/csv.ts
parenta355f7c807686918734416d91b79c26c21effba9 (diff)
Move everything into std subdir
Diffstat (limited to 'std/encoding/csv.ts')
-rw-r--r--std/encoding/csv.ts251
1 files changed, 251 insertions, 0 deletions
diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts
new file mode 100644
index 000000000..10d72a8a5
--- /dev/null
+++ b/std/encoding/csv.ts
@@ -0,0 +1,251 @@
+// Ported from Go:
+// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
+// Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.
+
+import { BufReader } from "../io/bufio.ts";
+import { TextProtoReader } from "../textproto/mod.ts";
+import { StringReader } from "../io/readers.ts";
+
+const INVALID_RUNE = ["\r", "\n", '"'];
+
+export class ParseError extends Error {
+ StartLine: number;
+ Line: number;
+ constructor(start: number, line: number, message: string) {
+ super(message);
+ this.StartLine = start;
+ this.Line = line;
+ }
+}
+
+/**
+ * @property comma - Character which separates values. Default: ','
+ * @property comment - Character to start a comment. Default: '#'
+ * @property trimLeadingSpace - Flag to trim the leading space of the value.
+ * Default: 'false'
+ * @property lazyQuotes - Allow unquoted quote in a quoted field or non double
+ * quoted quotes in quoted field Default: 'false'
+ * @property fieldsPerRecord - Enabling the check of fields for each row.
+ * If == 0, first row is used as referal for the number of fields.
+ */
+export interface ParseOptions {
+ comma?: string;
+ comment?: string;
+ trimLeadingSpace?: boolean;
+ lazyQuotes?: boolean;
+ fieldsPerRecord?: number;
+}
+
+function chkOptions(opt: ParseOptions): void {
+ if (!opt.comma) opt.comma = ",";
+ if (!opt.trimLeadingSpace) opt.trimLeadingSpace = false;
+ if (
+ INVALID_RUNE.includes(opt.comma!) ||
+ INVALID_RUNE.includes(opt.comment!) ||
+ opt.comma === opt.comment
+ ) {
+ throw new Error("Invalid Delimiter");
+ }
+}
+
+async function read(
+ Startline: number,
+ reader: BufReader,
+ opt: ParseOptions = { comma: ",", trimLeadingSpace: false }
+): Promise<string[] | Deno.EOF> {
+ const tp = new TextProtoReader(reader);
+ let line: string;
+ let result: string[] = [];
+ const lineIndex = Startline;
+
+ const r = await tp.readLine();
+ if (r === Deno.EOF) return Deno.EOF;
+ line = r;
+ // Normalize \r\n to \n on all input lines.
+ if (
+ line.length >= 2 &&
+ line[line.length - 2] === "\r" &&
+ line[line.length - 1] === "\n"
+ ) {
+ line = line.substring(0, line.length - 2);
+ line = line + "\n";
+ }
+
+ const trimmedLine = line.trimLeft();
+ if (trimmedLine.length === 0) {
+ return [];
+ }
+
+ // line starting with comment character is ignored
+ if (opt.comment && trimmedLine[0] === opt.comment) {
+ return [];
+ }
+
+ result = line.split(opt.comma!);
+
+ let quoteError = false;
+ result = result.map((r): string => {
+ if (opt.trimLeadingSpace) {
+ r = r.trimLeft();
+ }
+ if (r[0] === '"' && r[r.length - 1] === '"') {
+ r = r.substring(1, r.length - 1);
+ } else if (r[0] === '"') {
+ r = r.substring(1, r.length);
+ }
+
+ if (!opt.lazyQuotes) {
+ if (r[0] !== '"' && r.indexOf('"') !== -1) {
+ quoteError = true;
+ }
+ }
+ return r;
+ });
+ if (quoteError) {
+ throw new ParseError(Startline, lineIndex, 'bare " in non-quoted-field');
+ }
+ return result;
+}
+
+export async function readAll(
+ reader: BufReader,
+ opt: ParseOptions = {
+ comma: ",",
+ trimLeadingSpace: false,
+ lazyQuotes: false
+ }
+): Promise<string[][]> {
+ const result: string[][] = [];
+ let _nbFields: number;
+ let lineResult: string[];
+ let first = true;
+ let lineIndex = 0;
+ chkOptions(opt);
+
+ for (;;) {
+ const r = await read(lineIndex, reader, opt);
+ if (r === Deno.EOF) break;
+ lineResult = r;
+ lineIndex++;
+ // If fieldsPerRecord is 0, Read sets it to
+ // the number of fields in the first record
+ if (first) {
+ first = false;
+ if (opt.fieldsPerRecord !== undefined) {
+ if (opt.fieldsPerRecord === 0) {
+ _nbFields = lineResult.length;
+ } else {
+ _nbFields = opt.fieldsPerRecord;
+ }
+ }
+ }
+
+ if (lineResult.length > 0) {
+ if (_nbFields! && _nbFields! !== lineResult.length) {
+ throw new ParseError(lineIndex, lineIndex, "wrong number of fields");
+ }
+ result.push(lineResult);
+ }
+ }
+ return result;
+}
+
+/**
+ * HeaderOption provides the column definition
+ * and the parse function for each entry of the
+ * column.
+ */
+export interface HeaderOption {
+ name: string;
+ parse?: (input: string) => unknown;
+}
+
+export interface ExtendedParseOptions extends ParseOptions {
+ header: boolean | string[] | HeaderOption[];
+ parse?: (input: unknown) => unknown;
+}
+
+/**
+ * Csv parse helper to manipulate data.
+ * Provides an auto/custom mapper for columns and parse function
+ * for columns and rows.
+ * @param input Input to parse. Can be a string or BufReader.
+ * @param opt options of the parser.
+ * @param [opt.header=false] HeaderOptions
+ * @param [opt.parse=null] Parse function for rows.
+ * Example:
+ * const r = await parseFile('a,b,c\ne,f,g\n', {
+ * header: ["this", "is", "sparta"],
+ * parse: (e: Record<string, unknown>) => {
+ * return { super: e.this, street: e.is, fighter: e.sparta };
+ * }
+ * });
+ * // output
+ * [
+ * { super: "a", street: "b", fighter: "c" },
+ * { super: "e", street: "f", fighter: "g" }
+ * ]
+ */
+export async function parse(
+ input: string | BufReader,
+ opt: ExtendedParseOptions = {
+ header: false
+ }
+): Promise<unknown[]> {
+ let r: string[][];
+ if (input instanceof BufReader) {
+ r = await readAll(input, opt);
+ } else {
+ r = await readAll(new BufReader(new StringReader(input)), opt);
+ }
+ if (opt.header) {
+ let headers: HeaderOption[] = [];
+ let i = 0;
+ if (Array.isArray(opt.header)) {
+ if (typeof opt.header[0] !== "string") {
+ headers = opt.header as HeaderOption[];
+ } else {
+ const h = opt.header as string[];
+ headers = h.map(
+ (e): HeaderOption => {
+ return {
+ name: e
+ };
+ }
+ );
+ }
+ } else {
+ headers = r.shift()!.map(
+ (e): HeaderOption => {
+ return {
+ name: e
+ };
+ }
+ );
+ i++;
+ }
+ return r.map((e): unknown => {
+ if (e.length !== headers.length) {
+ throw `Error number of fields line:${i}`;
+ }
+ i++;
+ const out: Record<string, unknown> = {};
+ for (let j = 0; j < e.length; j++) {
+ const h = headers[j];
+ if (h.parse) {
+ out[h.name] = h.parse(e[j]);
+ } else {
+ out[h.name] = e[j];
+ }
+ }
+ if (opt.parse) {
+ return opt.parse(out);
+ }
+ return out;
+ });
+ }
+ if (opt.parse) {
+ return r.map((e: string[]): unknown => opt.parse!(e));
+ }
+ return r;
+}