summaryrefslogtreecommitdiff
path: root/std/encoding
diff options
context:
space:
mode:
authoruki00a <uki00a@gmail.com>2020-08-18 06:47:01 +0900
committerGitHub <noreply@github.com>2020-08-17 17:47:01 -0400
commit95a6812e82fbac0826e5e230eef714ce85953125 (patch)
treebe020027bf91338bc1dccc979782f6b4cd902b29 /std/encoding
parentb44b7a9a603477d3d0f9fa6cdafe18d387a36d25 (diff)
fix(std/encoding/csv): improve error message on ParseError (#7057)
Diffstat (limited to 'std/encoding')
-rw-r--r--std/encoding/csv.ts94
-rw-r--r--std/encoding/csv_test.ts84
2 files changed, 125 insertions, 53 deletions
diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts
index 27b40c566..bab856b51 100644
--- a/std/encoding/csv.ts
+++ b/std/encoding/csv.ts
@@ -16,13 +16,38 @@ export const ERR_QUOTE = 'extraneous or missing " in quoted-field';
export const ERR_INVALID_DELIM = "Invalid Delimiter";
export const ERR_FIELD_COUNT = "wrong number of fields";
+/**
+ * A ParseError is returned for parsing errors.
+ * Line numbers are 1-indexed and columns are 0-indexed.
+ */
export class ParseError extends Error {
- StartLine: number;
- Line: number;
- constructor(start: number, line: number, message: string) {
- super(message);
- this.StartLine = start;
- this.Line = line;
+ /** Line where the record starts*/
+ startLine: number;
+ /** Line where the error occurred */
+ line: number;
+ /** Column (rune index) where the error occurred */
+ column: number | null;
+
+ constructor(
+ start: number,
+ line: number,
+ column: number | null,
+ message: string,
+ ) {
+ super();
+ this.startLine = start;
+ this.column = column;
+ this.line = line;
+
+ if (message === ERR_FIELD_COUNT) {
+ this.message = `record on line ${line}: ${message}`;
+ } else if (start !== line) {
+ this.message =
+ `record on line ${start}; parse error on line ${line}, column ${column}: ${message}`;
+ } else {
+ this.message =
+ `parse error on line ${line}, column ${column}: ${message}`;
+ }
}
}
@@ -61,13 +86,13 @@ function chkOptions(opt: ReadOptions): void {
}
async function readRecord(
- Startline: number,
+ startLine: number,
reader: BufReader,
opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
): Promise<string[] | null> {
const tp = new TextProtoReader(reader);
- const lineIndex = Startline;
let line = await readLine(tp);
+ let lineIndex = startLine + 1;
if (line === null) return null;
if (line.length === 0) {
@@ -80,7 +105,8 @@ async function readRecord(
assert(opt.comma != null);
- let quoteError: string | null = null;
+ let fullLine = line;
+ let quoteError: ParseError | null = null;
const quote = '"';
const quoteLen = quote.length;
const commaLen = opt.comma.length;
@@ -103,7 +129,15 @@ async function readRecord(
if (!opt.lazyQuotes) {
const j = field.indexOf(quote);
if (j >= 0) {
- quoteError = ERR_BARE_QUOTE;
+ const col = runeCount(
+ fullLine.slice(0, fullLine.length - line.slice(j).length),
+ );
+ quoteError = new ParseError(
+ startLine + 1,
+ lineIndex,
+ col,
+ ERR_BARE_QUOTE,
+ );
break parseField;
}
}
@@ -141,27 +175,50 @@ async function readRecord(
recordBuffer += quote;
} else {
// `"*` sequence (invalid non-escaped quote).
- quoteError = ERR_QUOTE;
+ const col = runeCount(
+ fullLine.slice(0, fullLine.length - line.length - quoteLen),
+ );
+ quoteError = new ParseError(
+ startLine + 1,
+ lineIndex,
+ col,
+ ERR_QUOTE,
+ );
break parseField;
}
} else if (line.length > 0 || !(await isEOF(tp))) {
// Hit end of line (copy all data so far).
recordBuffer += line;
const r = await readLine(tp);
+ lineIndex++;
+ line = r ?? ""; // This is a workaround for making this module behave similarly to the encoding/csv/reader.go.
+ fullLine = line;
if (r === null) {
+ // Abrupt end of file (EOF or error).
if (!opt.lazyQuotes) {
- quoteError = ERR_QUOTE;
+ const col = runeCount(fullLine);
+ quoteError = new ParseError(
+ startLine + 1,
+ lineIndex,
+ col,
+ ERR_QUOTE,
+ );
break parseField;
}
fieldIndexes.push(recordBuffer.length);
break parseField;
}
recordBuffer += "\n"; // preserve line feed (This is because TextProtoReader removes it.)
- line = r;
} else {
// Abrupt end of file (EOF on error).
if (!opt.lazyQuotes) {
- quoteError = ERR_QUOTE;
+ const col = runeCount(fullLine);
+ quoteError = new ParseError(
+ startLine + 1,
+ lineIndex,
+ col,
+ ERR_QUOTE,
+ );
break parseField;
}
fieldIndexes.push(recordBuffer.length);
@@ -171,7 +228,7 @@ async function readRecord(
}
}
if (quoteError) {
- throw new ParseError(Startline, lineIndex, quoteError);
+ throw quoteError;
}
const result = [] as string[];
let preIdx = 0;
@@ -186,6 +243,11 @@ async function isEOF(tp: TextProtoReader): Promise<boolean> {
return (await tp.r.peek(0)) === null;
}
+function runeCount(s: string): number {
+ // Array.from considers the surrogate pair.
+ return Array.from(s).length;
+}
+
async function readLine(tp: TextProtoReader): Promise<string | null> {
let line: string;
const r = await tp.readLine();
@@ -251,7 +313,7 @@ export async function readMatrix(
if (lineResult.length > 0) {
if (_nbFields && _nbFields !== lineResult.length) {
- throw new ParseError(lineIndex, lineIndex, ERR_FIELD_COUNT);
+ throw new ParseError(lineIndex, lineIndex, null, ERR_FIELD_COUNT);
}
result.push(lineResult);
}
diff --git a/std/encoding/csv_test.ts b/std/encoding/csv_test.ts
index 07a8e0965..aab9e86b1 100644
--- a/std/encoding/csv_test.ts
+++ b/std/encoding/csv_test.ts
@@ -4,7 +4,7 @@
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
-import { assertEquals, assert } from "../testing/asserts.ts";
+import { assertEquals, assertThrowsAsync } from "../testing/asserts.ts";
import {
readMatrix,
parse,
@@ -12,6 +12,7 @@ import {
ERR_QUOTE,
ERR_INVALID_DELIM,
ERR_FIELD_COUNT,
+ ParseError,
} from "./csv.ts";
import { StringReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts";
@@ -133,8 +134,7 @@ field"`,
{
Name: "BadDoubleQuotes",
Input: `a""b,c`,
- Error: ERR_BARE_QUOTE,
- // Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
+ Error: new ParseError(1, 1, 1, ERR_BARE_QUOTE),
},
{
Name: "TrimQuote",
@@ -145,33 +145,31 @@ field"`,
{
Name: "BadBareQuote",
Input: `a "word","b"`,
- Error: ERR_BARE_QUOTE,
- // &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote}
+ Error: new ParseError(1, 1, 2, ERR_BARE_QUOTE),
},
{
Name: "BadTrailingQuote",
Input: `"a word",b"`,
- Error: ERR_BARE_QUOTE,
+ Error: new ParseError(1, 1, 10, ERR_BARE_QUOTE),
},
{
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
- Error: ERR_QUOTE,
+ Error: new ParseError(1, 1, 3, ERR_QUOTE),
},
{
Name: "BadFieldCount",
Input: "a,b,c\nd,e",
- Error: ERR_FIELD_COUNT,
+ Error: new ParseError(2, 2, null, ERR_FIELD_COUNT),
UseFieldsPerRecord: true,
FieldsPerRecord: 0,
},
{
Name: "BadFieldCount1",
Input: `a,b,c`,
- // Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
- Error: ERR_FIELD_COUNT,
+ Error: new ParseError(1, 1, null, ERR_FIELD_COUNT),
},
{
Name: "FieldCount",
@@ -265,14 +263,12 @@ x,,,
{
Name: "StartLine1", // Issue 19019
Input: 'a,"b\nc"d,e',
- Error: ERR_QUOTE,
- // Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
+ Error: new ParseError(1, 2, 1, ERR_QUOTE),
},
{
Name: "StartLine2",
- Input: 'a,b\n"d\n\n,e',
- Error: ERR_QUOTE,
- // Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
+ Input: 'a,b\n\"d\n\n,e',
+ Error: new ParseError(2, 5, 0, ERR_QUOTE),
},
{
Name: "CRLFInQuotedField", // Issue 21201
@@ -297,8 +293,7 @@ x,,,
{
Name: "QuotedTrailingCRCR",
Input: '"field"\r\r',
- Error: ERR_QUOTE,
- // Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
+ Error: new ParseError(1, 1, 6, ERR_QUOTE),
},
{
Name: "FieldCR",
@@ -389,8 +384,7 @@ x,,,
{
Name: "QuoteWithTrailingCRLF",
Input: '"foo"bar"\r\n',
- Error: ERR_QUOTE,
- // Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
+ Error: new ParseError(1, 1, 4, ERR_QUOTE),
},
{
Name: "LazyQuoteWithTrailingCRLF",
@@ -411,8 +405,7 @@ x,,,
{
Name: "OddQuotes",
Input: `"""""""`,
- Error: ERR_QUOTE,
- // Error:" &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote}",
+ Error: new ParseError(1, 1, 7, ERR_QUOTE),
},
{
Name: "LazyOddQuotes",
@@ -423,33 +416,33 @@ x,,,
{
Name: "BadComma1",
Comma: "\n",
- Error: ERR_INVALID_DELIM,
+ Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadComma2",
Comma: "\r",
- Error: ERR_INVALID_DELIM,
+ Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadComma3",
Comma: '"',
- Error: ERR_INVALID_DELIM,
+ Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadComment1",
Comment: "\n",
- Error: ERR_INVALID_DELIM,
+ Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadComment2",
Comment: "\r",
- Error: ERR_INVALID_DELIM,
+ Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadCommaComment",
Comma: "X",
Comment: "X",
- Error: ERR_INVALID_DELIM,
+ Error: new Error(ERR_INVALID_DELIM),
},
];
for (const t of testCases) {
@@ -457,8 +450,8 @@ for (const t of testCases) {
name: `[CSV] ${t.Name}`,
async fn(): Promise<void> {
let comma = ",";
- let comment;
- let fieldsPerRec;
+ let comment: string | undefined;
+ let fieldsPerRec: number | undefined;
let trim = false;
let lazyquote = false;
if (t.Comma) {
@@ -478,9 +471,8 @@ for (const t of testCases) {
}
let actual;
if (t.Error) {
- let err;
- try {
- actual = await readMatrix(
+ const err = await assertThrowsAsync(async () => {
+ await readMatrix(
new BufReader(new StringReader(t.Input ?? "")),
{
comma: comma,
@@ -490,11 +482,9 @@ for (const t of testCases) {
lazyQuotes: lazyquote,
},
);
- } catch (e) {
- err = e;
- }
- assert(err);
- assertEquals(err.message, t.Error);
+ });
+
+ assertEquals(err, t.Error);
} else {
actual = await readMatrix(
new BufReader(new StringReader(t.Input ?? "")),
@@ -625,3 +615,23 @@ for (const testCase of parseTestCases) {
},
});
}
+
+Deno.test({
+ name: "[CSV] ParseError.message",
+ fn(): void {
+ assertEquals(
+ new ParseError(2, 2, null, ERR_FIELD_COUNT).message,
+ `record on line 2: ${ERR_FIELD_COUNT}`,
+ );
+
+ assertEquals(
+ new ParseError(1, 2, 1, ERR_QUOTE).message,
+ `record on line 1; parse error on line 2, column 1: ${ERR_QUOTE}`,
+ );
+
+ assertEquals(
+ new ParseError(1, 1, 7, ERR_QUOTE).message,
+ `parse error on line 1, column 7: ${ERR_QUOTE}`,
+ );
+ },
+});