summaryrefslogtreecommitdiff
path: root/std/encoding/csv.ts
diff options
context:
space:
mode:
Diffstat (limited to 'std/encoding/csv.ts')
-rw-r--r--std/encoding/csv.ts172
1 files changed, 134 insertions, 38 deletions
diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts
index c8c7719ca..711e27772 100644
--- a/std/encoding/csv.ts
+++ b/std/encoding/csv.ts
@@ -1,5 +1,7 @@
// Ported from Go:
// https://github.com/golang/go/blob/go1.12.5/src/encoding/csv/
+// Copyright 2011 The Go Authors. All rights reserved. BSD license.
+// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { BufReader } from "../io/bufio.ts";
@@ -9,6 +11,11 @@ import { assert } from "../testing/asserts.ts";
const INVALID_RUNE = ["\r", "\n", '"'];
+export const ERR_BARE_QUOTE = 'bare " in non-quoted-field';
+export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
+export const ERR_INVALID_DELIM = "Invalid Delimiter";
+export const ERR_FIELD_COUNT = "wrong number of fields";
+
export class ParseError extends Error {
StartLine: number;
Line: number;
@@ -49,70 +56,159 @@ function chkOptions(opt: ReadOptions): void {
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
opt.comma === opt.comment
) {
- throw new Error("Invalid Delimiter");
+ throw new Error(ERR_INVALID_DELIM);
}
}
-async function read(
+async function readRecord(
Startline: number,
reader: BufReader,
opt: ReadOptions = { comma: ",", trimLeadingSpace: false }
): Promise<string[] | Deno.EOF> {
const tp = new TextProtoReader(reader);
- let line: string;
- let result: string[] = [];
const lineIndex = Startline;
+ let line = await readLine(tp);
- const r = await tp.readLine();
- if (r === Deno.EOF) return Deno.EOF;
- line = r;
- // Normalize \r\n to \n on all input lines.
- if (
- line.length >= 2 &&
- line[line.length - 2] === "\r" &&
- line[line.length - 1] === "\n"
- ) {
- line = line.substring(0, line.length - 2);
- line = line + "\n";
- }
-
- const trimmedLine = line.trimLeft();
- if (trimmedLine.length === 0) {
+ if (line === Deno.EOF) return Deno.EOF;
+ if (line.length === 0) {
return [];
}
-
// line starting with comment character is ignored
- if (opt.comment && trimmedLine[0] === opt.comment) {
+ if (opt.comment && line[0] === opt.comment) {
return [];
}
assert(opt.comma != null);
- result = line.split(opt.comma);
- let quoteError = false;
- result = result.map((r): string => {
+ let quoteError: string | null = null;
+ const quote = '"';
+ const quoteLen = quote.length;
+ const commaLen = opt.comma.length;
+ let recordBuffer = "";
+ const fieldIndexes = [] as number[];
+ parseField: for (;;) {
if (opt.trimLeadingSpace) {
- r = r.trimLeft();
- }
- if (r[0] === '"' && r[r.length - 1] === '"') {
- r = r.substring(1, r.length - 1);
- } else if (r[0] === '"') {
- r = r.substring(1, r.length);
+ line = line.trimLeft();
}
- if (!opt.lazyQuotes) {
- if (r[0] !== '"' && r.indexOf('"') !== -1) {
- quoteError = true;
+ if (line.length === 0 || !line.startsWith(quote)) {
+ // Non-quoted string field
+ const i = line.indexOf(opt.comma);
+ let field = line;
+ if (i >= 0) {
+ field = field.substring(0, i);
+ }
+ // Check to make sure a quote does not appear in field.
+ if (!opt.lazyQuotes) {
+ const j = field.indexOf(quote);
+ if (j >= 0) {
+ quoteError = ERR_BARE_QUOTE;
+ break parseField;
+ }
+ }
+ recordBuffer += field;
+ fieldIndexes.push(recordBuffer.length);
+ if (i >= 0) {
+ line = line.substring(i + commaLen);
+ continue parseField;
+ }
+ break parseField;
+ } else {
+ // Quoted string field
+ line = line.substring(quoteLen);
+ for (;;) {
+ const i = line.indexOf(quote);
+ if (i >= 0) {
+ // Hit next quote.
+ recordBuffer += line.substring(0, i);
+ line = line.substring(i + quoteLen);
+ if (line.startsWith(quote)) {
+ // `""` sequence (append quote).
+ recordBuffer += quote;
+ line = line.substring(quoteLen);
+ } else if (line.startsWith(opt.comma)) {
+ // `","` sequence (end of field).
+ line = line.substring(commaLen);
+ fieldIndexes.push(recordBuffer.length);
+ continue parseField;
+ } else if (0 === line.length) {
+ // `"\n` sequence (end of line).
+ fieldIndexes.push(recordBuffer.length);
+ break parseField;
+ } else if (opt.lazyQuotes) {
+ // `"` sequence (bare quote).
+ recordBuffer += quote;
+ } else {
+ // `"*` sequence (invalid non-escaped quote).
+ quoteError = ERR_QUOTE;
+ break parseField;
+ }
+ } else if (line.length > 0 || !(await isEOF(tp))) {
+ // Hit end of line (copy all data so far).
+ recordBuffer += line;
+ const r = await readLine(tp);
+ if (r === Deno.EOF) {
+ if (!opt.lazyQuotes) {
+ quoteError = ERR_QUOTE;
+ break parseField;
+ }
+ fieldIndexes.push(recordBuffer.length);
+ break parseField;
+ }
+ recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
+ line = r;
+ } else {
+ // Abrupt end of file (EOF on error).
+ if (!opt.lazyQuotes) {
+ quoteError = ERR_QUOTE;
+ break parseField;
+ }
+ fieldIndexes.push(recordBuffer.length);
+ break parseField;
+ }
}
}
- return r;
- });
+ }
if (quoteError) {
- throw new ParseError(Startline, lineIndex, 'bare " in non-quoted-field');
+ throw new ParseError(Startline, lineIndex, quoteError);
+ }
+ const result = [] as string[];
+ let preIdx = 0;
+ for (const i of fieldIndexes) {
+ result.push(recordBuffer.slice(preIdx, i));
+ preIdx = i;
}
return result;
}
+async function isEOF(tp: TextProtoReader): Promise<boolean> {
+ return (await tp.r.peek(0)) === Deno.EOF;
+}
+
+async function readLine(tp: TextProtoReader): Promise<string | Deno.EOF> {
+ let line: string;
+ const r = await tp.readLine();
+ if (r === Deno.EOF) return Deno.EOF;
+ line = r;
+
+ // For backwards compatibility, drop trailing \r before EOF.
+ if ((await isEOF(tp)) && line.length > 0 && line[line.length - 1] === "\r") {
+ line = line.substring(0, line.length - 1);
+ }
+
+ // Normalize \r\n to \n on all input lines.
+ if (
+ line.length >= 2 &&
+ line[line.length - 2] === "\r" &&
+ line[line.length - 1] === "\n"
+ ) {
+ line = line.substring(0, line.length - 2);
+ line = line + "\n";
+ }
+
+ return line;
+}
+
export async function readMatrix(
reader: BufReader,
opt: ReadOptions = {
@@ -129,7 +225,7 @@ export async function readMatrix(
chkOptions(opt);
for (;;) {
- const r = await read(lineIndex, reader, opt);
+ const r = await readRecord(lineIndex, reader, opt);
if (r === Deno.EOF) break;
lineResult = r;
lineIndex++;
@@ -148,7 +244,7 @@ export async function readMatrix(
if (lineResult.length > 0) {
if (_nbFields && _nbFields !== lineResult.length) {
- throw new ParseError(lineIndex, lineIndex, "wrong number of fields");
+ throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT);
}
result.push(lineResult);
}