summaryrefslogtreecommitdiff
path: root/encoding
diff options
context:
space:
mode:
Diffstat (limited to 'encoding')
-rw-r--r--encoding/csv.ts151
-rw-r--r--encoding/csv_test.ts460
-rw-r--r--encoding/test.ts1
3 files changed, 612 insertions, 0 deletions
diff --git a/encoding/csv.ts b/encoding/csv.ts
new file mode 100644
index 000000000..3d50180cc
--- /dev/null
+++ b/encoding/csv.ts
@@ -0,0 +1,151 @@
+// Ported from Go:
+// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
+// Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.
+
+import { BufReader, BufState } from "../io/bufio.ts";
+import { TextProtoReader } from "../textproto/mod.ts";
+
+const INVALID_RUNE = ["\r", "\n", '"'];
+
+export class ParseError extends Error {
+ StartLine: number;
+ Line: number;
+ constructor(start: number, line: number, message: string) {
+ super(message);
+ this.StartLine = start;
+ this.Line = line;
+ }
+}
+
+export interface ParseOptions {
+ comma: string;
+ comment?: string;
+ trimLeadingSpace: boolean;
+ lazyQuotes?: boolean;
+ fieldsPerRecord?: number;
+}
+
+function chkOptions(opt: ParseOptions): Error | null {
+ if (
+ INVALID_RUNE.includes(opt.comma) ||
+ INVALID_RUNE.includes(opt.comment) ||
+ opt.comma === opt.comment
+ ) {
+ return Error("Invalid Delimiter");
+ }
+ return null;
+}
+
+export async function read(
+ Startline: number,
+ reader: BufReader,
+ opt: ParseOptions = { comma: ",", comment: "#", trimLeadingSpace: false }
+): Promise<[string[], BufState]> {
+ const tp = new TextProtoReader(reader);
+ let err: BufState;
+ let line: string;
+ let result: string[] = [];
+ let lineIndex = Startline;
+
+ [line, err] = await tp.readLine();
+
+ // Normalize \r\n to \n on all input lines.
+ if (
+ line.length >= 2 &&
+ line[line.length - 2] === "\r" &&
+ line[line.length - 1] === "\n"
+ ) {
+ line = line.substring(0, line.length - 2);
+ line = line + "\n";
+ }
+
+ const trimmedLine = line.trimLeft();
+ if (trimmedLine.length === 0) {
+ return [[], err];
+ }
+
+ // line starting with comment character is ignored
+ if (opt.comment && trimmedLine[0] === opt.comment) {
+ return [result, err];
+ }
+
+ result = line.split(opt.comma);
+
+ let quoteError = false;
+ result = result.map(
+ (r): string => {
+ if (opt.trimLeadingSpace) {
+ r = r.trimLeft();
+ }
+ if (r[0] === '"' && r[r.length - 1] === '"') {
+ r = r.substring(1, r.length - 1);
+ } else if (r[0] === '"') {
+ r = r.substring(1, r.length);
+ }
+
+ if (!opt.lazyQuotes) {
+ if (r[0] !== '"' && r.indexOf('"') !== -1) {
+ quoteError = true;
+ }
+ }
+ return r;
+ }
+ );
+ if (quoteError) {
+ return [
+ [],
+ new ParseError(Startline, lineIndex, 'bare " in non-quoted-field')
+ ];
+ }
+ return [result, err];
+}
+
+export async function readAll(
+ reader: BufReader,
+ opt: ParseOptions = {
+ comma: ",",
+ trimLeadingSpace: false,
+ lazyQuotes: false
+ }
+): Promise<[string[][], BufState]> {
+ const result: string[][] = [];
+ let _nbFields: number;
+ let err: BufState;
+ let lineResult: string[];
+ let first = true;
+ let lineIndex = 0;
+ err = chkOptions(opt);
+ if (err) return [result, err];
+
+ for (;;) {
+ [lineResult, err] = await read(lineIndex, reader, opt);
+ if (err) break;
+ lineIndex++;
+ // If fieldsPerRecord is 0, Read sets it to
+ // the number of fields in the first record
+ if (first) {
+ first = false;
+ if (opt.fieldsPerRecord !== undefined) {
+ if (opt.fieldsPerRecord === 0) {
+ _nbFields = lineResult.length;
+ } else {
+ _nbFields = opt.fieldsPerRecord;
+ }
+ }
+ }
+
+ if (lineResult.length > 0) {
+ if (_nbFields && _nbFields !== lineResult.length) {
+ return [
+ null,
+ new ParseError(lineIndex, lineIndex, "wrong number of fields")
+ ];
+ }
+ result.push(lineResult);
+ }
+ }
+ if (err !== "EOF") {
+ return [result, err];
+ }
+ return [result, null];
+}
diff --git a/encoding/csv_test.ts b/encoding/csv_test.ts
new file mode 100644
index 000000000..1ca68ea16
--- /dev/null
+++ b/encoding/csv_test.ts
@@ -0,0 +1,460 @@
+// Test ported from Golang
+// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go
+import { test, runIfMain } from "../testing/mod.ts";
+import { assertEquals, assert } from "../testing/asserts.ts";
+import { readAll } from "./csv.ts";
+import { StringReader } from "../io/readers.ts";
+import { BufReader } from "../io/bufio.ts";
+
+const ErrInvalidDelim = "Invalid Delimiter";
+const ErrFieldCount = "wrong number of fields";
+const ErrBareQuote = 'bare " in non-quoted-field';
+
+// TODO(zekth): Activate remaining tests
+const testCases = [
+ {
+ Name: "Simple",
+ Input: "a,b,c\n",
+ Output: [["a", "b", "c"]]
+ },
+ {
+ Name: "CRLF",
+ Input: "a,b\r\nc,d\r\n",
+ Output: [["a", "b"], ["c", "d"]]
+ },
+ {
+ Name: "BareCR",
+ Input: "a,b\rc,d\r\n",
+ Output: [["a", "b\rc", "d"]]
+ },
+ // {
+ // Name: "RFC4180test",
+ // Input: `#field1,field2,field3
+ // "aaa","bbb","ccc"
+ // "a,a","bbb","ccc"
+ // zzz,yyy,xxx`,
+ // UseFieldsPerRecord: true,
+ // FieldsPerRecord: 0,
+ // Output: [
+ // ["#field1", "field2", "field3"],
+ // ["aaa", "bbb", "ccc"],
+ // ["a,a", `bbb`, "ccc"],
+ // ["zzz", "yyy", "xxx"]
+ // ]
+ // },
+ {
+ Name: "NoEOLTest",
+ Input: "a,b,c",
+ Output: [["a", "b", "c"]]
+ },
+ {
+ Name: "Semicolon",
+ Input: "a;b;c\n",
+ Output: [["a", "b", "c"]],
+ Comma: ";"
+ },
+ // {
+ // Name: "MultiLine",
+ // Input: `"two
+ // line","one line","three
+ // line
+ // field"`,
+ // Output: [["two\nline"], ["one line"], ["three\nline\nfield"]]
+ // },
+ {
+ Name: "BlankLine",
+ Input: "a,b,c\n\nd,e,f\n\n",
+ Output: [["a", "b", "c"], ["d", "e", "f"]]
+ },
+ {
+ Name: "BlankLineFieldCount",
+ Input: "a,b,c\n\nd,e,f\n\n",
+ Output: [["a", "b", "c"], ["d", "e", "f"]],
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0
+ },
+ {
+ Name: "TrimSpace",
+ Input: " a, b, c\n",
+ Output: [["a", "b", "c"]],
+ TrimLeadingSpace: true
+ },
+ {
+ Name: "LeadingSpace",
+ Input: " a, b, c\n",
+ Output: [[" a", " b", " c"]]
+ },
+ {
+ Name: "Comment",
+ Input: "#1,2,3\na,b,c\n#comment",
+ Output: [["a", "b", "c"]],
+ Comment: "#"
+ },
+ {
+ Name: "NoComment",
+ Input: "#1,2,3\na,b,c",
+ Output: [["#1", "2", "3"], ["a", "b", "c"]]
+ },
+ {
+ Name: "LazyQuotes",
+ Input: `a "word","1"2",a","b`,
+ Output: [[`a "word"`, `1"2`, `a"`, `b`]],
+ LazyQuotes: true
+ },
+ {
+ Name: "BareQuotes",
+ Input: `a "word","1"2",a"`,
+ Output: [[`a "word"`, `1"2`, `a"`]],
+ LazyQuotes: true
+ },
+ {
+ Name: "BareDoubleQuotes",
+ Input: `a""b,c`,
+ Output: [[`a""b`, `c`]],
+ LazyQuotes: true
+ },
+ {
+ Name: "BadDoubleQuotes",
+ Input: `a""b,c`,
+ Error: ErrBareQuote
+ // Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
+ },
+ {
+ Name: "TrimQuote",
+ Input: ` "a"," b",c`,
+ Output: [["a", " b", "c"]],
+ TrimLeadingSpace: true
+ },
+ {
+ Name: "BadBareQuote",
+ Input: `a "word","b"`,
+ Error: ErrBareQuote
+ // Error: true //&ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
+ },
+ {
+ Name: "BadTrailingQuote",
+ Input: `"a word",b"`,
+ Error: ErrBareQuote
+ },
+ {
+ Name: "ExtraneousQuote",
+ Input: `"a "word","b"`,
+ Error: ErrBareQuote
+ },
+ {
+ Name: "BadFieldCount",
+ Input: "a,b,c\nd,e",
+ Error: ErrFieldCount,
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 0
+ },
+ {
+ Name: "BadFieldCount1",
+ Input: `a,b,c`,
+ // Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
+ UseFieldsPerRecord: true,
+ FieldsPerRecord: 2,
+ Error: ErrFieldCount
+ },
+ {
+ Name: "FieldCount",
+ Input: "a,b,c\nd,e",
+ Output: [["a", "b", "c"], ["d", "e"]]
+ },
+ {
+ Name: "TrailingCommaEOF",
+ Input: "a,b,c,",
+ Output: [["a", "b", "c", ""]]
+ },
+ {
+ Name: "TrailingCommaEOL",
+ Input: "a,b,c,\n",
+ Output: [["a", "b", "c", ""]]
+ },
+ {
+ Name: "TrailingCommaSpaceEOF",
+ Input: "a,b,c, ",
+ Output: [["a", "b", "c", ""]],
+ TrimLeadingSpace: true
+ },
+ {
+ Name: "TrailingCommaSpaceEOL",
+ Input: "a,b,c, \n",
+ Output: [["a", "b", "c", ""]],
+ TrimLeadingSpace: true
+ },
+ {
+ Name: "TrailingCommaLine3",
+ Input: "a,b,c\nd,e,f\ng,hi,",
+ Output: [["a", "b", "c"], ["d", "e", "f"], ["g", "hi", ""]],
+ TrimLeadingSpace: true
+ },
+ {
+ Name: "NotTrailingComma3",
+ Input: "a,b,c, \n",
+ Output: [["a", "b", "c", " "]]
+ },
+ {
+ Name: "CommaFieldTest",
+ Input: `x,y,z,w
+x,y,z,
+x,y,,
+x,,,
+,,,
+"x","y","z","w"
+"x","y","z",""
+"x","y","",""
+"x","","",""
+"","","",""
+`,
+ Output: [
+ ["x", "y", "z", "w"],
+ ["x", "y", "z", ""],
+ ["x", "y", "", ""],
+ ["x", "", "", ""],
+ ["", "", "", ""],
+ ["x", "y", "z", "w"],
+ ["x", "y", "z", ""],
+ ["x", "y", "", ""],
+ ["x", "", "", ""],
+ ["", "", "", ""]
+ ]
+ },
+ {
+ Name: "TrailingCommaIneffective1",
+ Input: "a,b,\nc,d,e",
+ Output: [["a", "b", ""], ["c", "d", "e"]],
+ TrimLeadingSpace: true
+ },
+ {
+ Name: "ReadAllReuseRecord",
+ Input: "a,b\nc,d",
+ Output: [["a", "b"], ["c", "d"]],
+ ReuseRecord: true
+ },
+ // {
+ // Name: "StartLine1", // Issue 19019
+ // Input: 'a,"b\nc"d,e',
+ // Error: true
+ // // Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
+ // },
+ // {
+ // Name: "StartLine2",
+ // Input: 'a,b\n"d\n\n,e',
+ // Error: true
+ // // Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
+ // },
+ // {
+ // Name: "CRLFInQuotedField", // Issue 21201
+ // Input: 'A,"Hello\r\nHi",B\r\n',
+ // Output: [["A", "Hello\nHi", "B"]]
+ // },
+ {
+ Name: "BinaryBlobField", // Issue 19410
+ Input: "x09\x41\xb4\x1c,aktau",
+ Output: [["x09A\xb4\x1c", "aktau"]]
+ },
+ // {
+ // Name: "TrailingCR",
+ // Input: "field1,field2\r",
+ // Output: [["field1", "field2"]]
+ // },
+ // {
+ // Name: "QuotedTrailingCR",
+ // Input: '"field"\r',
+ // Output: [['"field"']]
+ // },
+ // {
+ // Name: "QuotedTrailingCRCR",
+ // Input: '"field"\r\r',
+ // Error: true,
+ // // Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
+ // },
+ // {
+ // Name: "FieldCR",
+ // Input: "field\rfield\r",
+ // Output: [["field\rfield"]]
+ // },
+ // {
+ // Name: "FieldCRCR",
+ // Input: "field\r\rfield\r\r",
+ // Output: [["field\r\rfield\r"]]
+ // },
+ {
+ Name: "FieldCRCRLF",
+ Input: "field\r\r\nfield\r\r\n",
+ Output: [["field\r"], ["field\r"]]
+ },
+ {
+ Name: "FieldCRCRLFCR",
+ Input: "field\r\r\n\rfield\r\r\n\r",
+ Output: [["field\r"], ["\rfield\r"]]
+ },
+ // {
+ // Name: "FieldCRCRLFCRCR",
+ // Input: "field\r\r\n\r\rfield\r\r\n\r\r",
+ // Output: [["field\r"], ["\r\rfield\r"], ["\r"]]
+ // },
+ // {
+ // Name: "MultiFieldCRCRLFCRCR",
+ // Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
+ // Output: [["field1", "field2\r"], ["\r\rfield1", "field2\r"], ["\r\r", ""]]
+ // },
+ {
+ Name: "NonASCIICommaAndComment",
+ Input: "a£b,c£ \td,e\n€ comment\n",
+ Output: [["a", "b,c", "d,e"]],
+ TrimLeadingSpace: true,
+ Comma: "£",
+ Comment: "€"
+ },
+ {
+ Name: "NonASCIICommaAndCommentWithQuotes",
+ Input: 'a€" b,"€ c\nλ comment\n',
+ Output: [["a", " b,", " c"]],
+ Comma: "€",
+ Comment: "λ"
+ },
+ {
+ // λ and θ start with the same byte.
+ // This tests that the parser doesn't confuse such characters.
+ Name: "NonASCIICommaConfusion",
+ Input: '"abθcd"λefθgh',
+ Output: [["abθcd", "efθgh"]],
+ Comma: "λ",
+ Comment: "€"
+ },
+ {
+ Name: "NonASCIICommentConfusion",
+ Input: "λ\nλ\nθ\nλ\n",
+ Output: [["λ"], ["λ"], ["λ"]],
+ Comment: "θ"
+ },
+ // {
+ // Name: "QuotedFieldMultipleLF",
+ // Input: '"\n\n\n\n"',
+ // Output: [["\n\n\n\n"]]
+ // },
+ // {
+ // Name: "MultipleCRLF",
+ // Input: "\r\n\r\n\r\n\r\n"
+ // },
+ // {
+ // // The implementation may read each line in several chunks if it doesn't fit entirely
+ // // in the read buffer, so we should test the code to handle that condition.
+ // Name: "HugeLines",
+ // Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
+ // Output: [[strings.Repeat("@", 5000), strings.Repeat("*", 5000)]],
+ // Comment: '#',
+ // },
+ {
+ Name: "QuoteWithTrailingCRLF",
+ Input: '"foo"bar"\r\n',
+ Error: ErrBareQuote
+ // Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
+ },
+ {
+ Name: "LazyQuoteWithTrailingCRLF",
+ Input: '"foo"bar"\r\n',
+ Output: [[`foo"bar`]],
+ LazyQuotes: true
+ },
+ // {
+ // Name: "DoubleQuoteWithTrailingCRLF",
+ // Input: '"foo""bar"\r\n',
+ // Output: [[`foo"bar`]]
+ // },
+ // {
+ // Name: "EvenQuotes",
+ // Input: `""""""""`,
+ // Output: [[`"""`]]
+ // },
+ // {
+ // Name: "OddQuotes",
+ // Input: `"""""""`,
+ // Error: true
+ // // Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
+ // },
+ // {
+ // Name: "LazyOddQuotes",
+ // Input: `"""""""`,
+ // Output: [[`"""`]],
+ // LazyQuotes: true
+ // },
+ {
+ Name: "BadComma1",
+ Comma: "\n",
+ Error: ErrInvalidDelim
+ },
+ {
+ Name: "BadComma2",
+ Comma: "\r",
+ Error: ErrInvalidDelim
+ },
+ {
+ Name: "BadComma3",
+ Comma: '"',
+ Error: ErrInvalidDelim
+ },
+ {
+ Name: "BadComment1",
+ Comment: "\n",
+ Error: ErrInvalidDelim
+ },
+ {
+ Name: "BadComment2",
+ Comment: "\r",
+ Error: ErrInvalidDelim
+ },
+ {
+ Name: "BadCommaComment",
+ Comma: "X",
+ Comment: "X",
+ Error: ErrInvalidDelim
+ }
+];
+for (const t of testCases) {
+ test({
+ name: `[CSV] ${t.Name}`,
+ async fn(): Promise<void> {
+ let comma = ",";
+ let comment;
+ let fieldsPerRec;
+ let trim = false;
+ let lazyquote = false;
+ if (t.Comma) {
+ comma = t.Comma;
+ }
+ if (t.Comment) {
+ comment = t.Comment;
+ }
+ if (t.TrimLeadingSpace) {
+ trim = true;
+ }
+ if (t.UseFieldsPerRecord) {
+ fieldsPerRec = t.FieldsPerRecord;
+ }
+ if (t.LazyQuotes) {
+ lazyquote = t.LazyQuotes;
+ }
+ const actual = await readAll(new BufReader(new StringReader(t.Input)), {
+ comma: comma,
+ comment: comment,
+ trimLeadingSpace: trim,
+ fieldsPerRecord: fieldsPerRec,
+ lazyQuotes: lazyquote
+ });
+ if (t.Error) {
+ assert(!!actual[1]);
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ const e: any = actual[1];
+ assertEquals(e.message, t.Error);
+ } else {
+ const expected = [t.Output, null];
+ assertEquals(actual, expected);
+ }
+ }
+ });
+}
+
+runIfMain(import.meta);
diff --git a/encoding/test.ts b/encoding/test.ts
index 4ee03572d..e7f779c86 100644
--- a/encoding/test.ts
+++ b/encoding/test.ts
@@ -1,2 +1,3 @@
// Copyright 2018-2019 the Deno authors. All rights reserved. MIT license.
import "./toml_test.ts";
+import "./csv_test.ts";