diff options
Diffstat (limited to 'std')
-rw-r--r-- | std/encoding/README.md | 20 | ||||
-rw-r--r-- | std/encoding/csv.ts | 91 | ||||
-rw-r--r-- | std/encoding/csv_test.ts | 62 |
3 files changed, 101 insertions, 72 deletions
diff --git a/std/encoding/README.md b/std/encoding/README.md index 21797a451..2f0ac91f1 100644 --- a/std/encoding/README.md +++ b/std/encoding/README.md @@ -37,25 +37,29 @@ writeVarbig(w: Deno.Writer, x: bigint, o: VarbigOptions = {}): Promise<number> Parse the CSV from the `reader` with the options provided and return `string[][]`. -#### `parse(input: string | BufReader, opt: ParseOptions = { header: false }): Promise<unknown[]>`: +#### `parse(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]>`: Parse the CSV string/buffer with the options provided. The result of this function is as follows: -- If you don't provide both `opt.header` and `opt.parse`, it returns - `string[][]`. -- If you provide `opt.header` but not `opt.parse`, it returns `object[]`. +- If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it + returns `string[][]`. +- If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it + returns `object[]`. - If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`. ##### `ParseOptions` -- **`header: boolean | string[] | HeaderOptions[];`**: If a boolean is provided, - the first line will be used as Header definitions. If `string[]` or - `HeaderOptions[]` those names will be used for header definition. +- **`skipFirstRow: boolean;`**: If you provide `skipFirstRow: true` and + `columns`, the first line will be skipped. If you provide `skipFirstRow: true` + but not `columns`, the first line will be skipped and used as header + definitions. +- **`columns: string[] | HeaderOptions[];`**: If you provide `string[]` or + `ColumnOptions[]`, those names will be used for header definition. - **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which will be executed after parsing of all columns. Therefore if you don't provide - header and parse function with headers, input will be `string[]`. + `skipFirstRow`, `columns`, and `parse` function, input will be `string[]`. ##### `HeaderOptions` diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts index bab856b51..78dd602e7 100644 --- a/std/encoding/csv.ts +++ b/std/encoding/csv.ts @@ -52,7 +52,7 @@ export class ParseError extends Error { } /** - * @property comma - Character which separates values. Default: ',' + * @property separator - Character which separates values. Default: ',' * @property comment - Character to start a comment. Default: '#' * @property trimLeadingSpace - Flag to trim the leading space of the value. * Default: 'false' @@ -62,7 +62,7 @@ export class ParseError extends Error { * If == 0, first row is used as referral for the number of fields. */ export interface ReadOptions { - comma?: string; + separator?: string; comment?: string; trimLeadingSpace?: boolean; lazyQuotes?: boolean; @@ -70,16 +70,16 @@ export interface ReadOptions { } function chkOptions(opt: ReadOptions): void { - if (!opt.comma) { - opt.comma = ","; + if (!opt.separator) { + opt.separator = ","; } if (!opt.trimLeadingSpace) { opt.trimLeadingSpace = false; } if ( - INVALID_RUNE.includes(opt.comma) || + INVALID_RUNE.includes(opt.separator) || (typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) || - opt.comma === opt.comment + opt.separator === opt.comment ) { throw new Error(ERR_INVALID_DELIM); } @@ -88,7 +88,7 @@ function chkOptions(opt: ReadOptions): void { async function readRecord( startLine: number, reader: BufReader, - opt: ReadOptions = { comma: ",", trimLeadingSpace: false }, + opt: ReadOptions = { separator: ",", trimLeadingSpace: false }, ): Promise<string[] | null> { const tp = new TextProtoReader(reader); let line = await readLine(tp); @@ -103,13 +103,13 @@ async function readRecord( return []; } - assert(opt.comma != null); + assert(opt.separator != null); let fullLine = line; let quoteError: ParseError | null = null; const quote = '"'; const quoteLen = quote.length; - const commaLen = opt.comma.length; + const separatorLen = opt.separator.length; let recordBuffer = ""; const fieldIndexes = [] as number[]; parseField: @@ -120,7 +120,7 @@ async function readRecord( if (line.length === 0 || !line.startsWith(quote)) { // Non-quoted string field - const i = line.indexOf(opt.comma); + const i = line.indexOf(opt.separator); let field = line; if (i >= 0) { field = field.substring(0, i); @@ -144,7 +144,7 @@ async function readRecord( recordBuffer += field; fieldIndexes.push(recordBuffer.length); if (i >= 0) { - line = line.substring(i + commaLen); + line = line.substring(i + separatorLen); continue parseField; } break parseField; @@ -161,9 +161,9 @@ async function readRecord( // `""` sequence (append quote). recordBuffer += quote; line = line.substring(quoteLen); - } else if (line.startsWith(opt.comma)) { + } else if (line.startsWith(opt.separator)) { // `","` sequence (end of field). - line = line.substring(commaLen); + line = line.substring(separatorLen); fieldIndexes.push(recordBuffer.length); continue parseField; } else if (0 === line.length) { @@ -281,7 +281,7 @@ async function readLine(tp: TextProtoReader): Promise<string | null> { export async function readMatrix( reader: BufReader, opt: ReadOptions = { - comma: ",", + separator: ",", trimLeadingSpace: false, lazyQuotes: false, }, @@ -324,13 +324,13 @@ export async function readMatrix( /** * Parse the CSV string/buffer with the options provided. * - * HeaderOptions provides the column definition + * ColumnOptions provides the column definition * and the parse function for each entry of the * column. */ -export interface HeaderOptions { +export interface ColumnOptions { /** - * Name of the header to be used as property + * Name of the column to be used as property */ name: string; /** @@ -343,14 +343,20 @@ export interface HeaderOptions { export interface ParseOptions extends ReadOptions { /** - * If a boolean is provided, the first line will be used as Header definitions. - * If `string[]` or `HeaderOptions[]` those names will be used for header definition. + * If you provide `skipFirstRow: true` and `columns`, the first line will be skipped. + * If you provide `skipFirstRow: true` but not `columns`, the first line will be skipped and used as header definitions. */ - header: boolean | string[] | HeaderOptions[]; + skipFirstRow?: boolean; + + /** + * If you provide `string[]` or `ColumnOptions[]`, those names will be used for header definition. + */ + columns?: string[] | ColumnOptions[]; + /** Parse function for rows. * Example: * const r = await parseFile('a,b,c\ne,f,g\n', { - * header: ["this", "is", "sparta"], + * columns: ["this", "is", "sparta"], * parse: (e: Record<string, unknown>) => { * return { super: e.this, street: e.is, fighter: e.sparta }; * } @@ -370,14 +376,14 @@ export interface ParseOptions extends ReadOptions { * for columns and rows. * @param input Input to parse. Can be a string or BufReader. * @param opt options of the parser. - * @returns If you don't provide both `opt.header` and `opt.parse`, it returns `string[][]`. - * If you provide `opt.header` but not `opt.parse`, it returns `object[]`. + * @returns If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it returns `string[][]`. + * If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it returns `object[]`. * If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`. */ export async function parse( input: string | BufReader, opt: ParseOptions = { - header: false, + skipFirstRow: false, }, ): Promise<unknown[]> { let r: string[][]; @@ -386,27 +392,15 @@ export async function parse( } else { r = await readMatrix(new BufReader(new StringReader(input)), opt); } - if (opt.header) { - let headers: HeaderOptions[] = []; + if (opt.skipFirstRow || opt.columns) { + let headers: ColumnOptions[] = []; let i = 0; - if (Array.isArray(opt.header)) { - if (typeof opt.header[0] !== "string") { - headers = opt.header as HeaderOptions[]; - } else { - const h = opt.header as string[]; - headers = h.map( - (e): HeaderOptions => { - return { - name: e, - }; - }, - ); - } - } else { + + if (opt.skipFirstRow) { const head = r.shift(); assert(head != null); headers = head.map( - (e): HeaderOptions => { + (e): ColumnOptions => { return { name: e, }; @@ -414,6 +408,21 @@ export async function parse( ); i++; } + + if (opt.columns) { + if (typeof opt.columns[0] !== "string") { + headers = opt.columns as ColumnOptions[]; + } else { + const h = opt.columns as string[]; + headers = h.map( + (e): ColumnOptions => { + return { + name: e, + }; + }, + ); + } + } return r.map((e): unknown => { if (e.length !== headers.length) { throw `Error number of fields line:${i}`; diff --git a/std/encoding/csv_test.ts b/std/encoding/csv_test.ts index 1a2d892cc..e81c1401f 100644 --- a/std/encoding/csv_test.ts +++ b/std/encoding/csv_test.ts @@ -17,6 +17,7 @@ import { import { StringReader } from "../io/readers.ts"; import { BufReader } from "../io/bufio.ts"; +// Test cases for `readMatrix()` const testCases = [ { Name: "Simple", @@ -60,7 +61,7 @@ zzz,yyy,xxx`, Name: "Semicolon", Input: "a;b;c\n", Output: [["a", "b", "c"]], - Comma: ";", + Separator: ";", }, { Name: "MultiLine", @@ -334,14 +335,14 @@ x,,, Input: "a£b,c£ \td,e\n€ comment\n", Output: [["a", "b,c", "d,e"]], TrimLeadingSpace: true, - Comma: "£", + Separator: "£", Comment: "€", }, { Name: "NonASCIICommaAndCommentWithQuotes", Input: 'a€" b,"€ c\nλ comment\n', Output: [["a", " b,", " c"]], - Comma: "€", + Separator: "€", Comment: "λ", }, { @@ -350,7 +351,7 @@ x,,, Name: "NonASCIICommaConfusion", Input: '"abθcd"λefθgh', Output: [["abθcd", "efθgh"]], - Comma: "λ", + Separator: "λ", Comment: "€", }, { @@ -415,17 +416,17 @@ x,,, }, { Name: "BadComma1", - Comma: "\n", + Separator: "\n", Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComma2", - Comma: "\r", + Separator: "\r", Error: new Error(ERR_INVALID_DELIM), }, { Name: "BadComma3", - Comma: '"', + Separator: '"', Error: new Error(ERR_INVALID_DELIM), }, { @@ -440,7 +441,7 @@ x,,, }, { Name: "BadCommaComment", - Comma: "X", + Separator: "X", Comment: "X", Error: new Error(ERR_INVALID_DELIM), }, @@ -449,13 +450,13 @@ for (const t of testCases) { Deno.test({ name: `[CSV] ${t.Name}`, async fn(): Promise<void> { - let comma = ","; + let separator = ","; let comment: string | undefined; let fieldsPerRec: number | undefined; let trim = false; let lazyquote = false; - if (t.Comma) { - comma = t.Comma; + if (t.Separator) { + separator = t.Separator; } if (t.Comment) { comment = t.Comment; @@ -475,7 +476,7 @@ for (const t of testCases) { await readMatrix( new BufReader(new StringReader(t.Input ?? "")), { - comma: comma, + separator, comment: comment, trimLeadingSpace: trim, fieldsPerRecord: fieldsPerRec, @@ -489,7 +490,7 @@ for (const t of testCases) { actual = await readMatrix( new BufReader(new StringReader(t.Input ?? "")), { - comma: comma, + separator, comment: comment, trimLeadingSpace: trim, fieldsPerRecord: fieldsPerRec, @@ -507,19 +508,19 @@ const parseTestCases = [ { name: "simple", in: "a,b,c", - header: false, + skipFirstRow: false, result: [["a", "b", "c"]], }, { name: "simple Bufreader", in: new BufReader(new StringReader("a,b,c")), - header: false, + skipFirstRow: false, result: [["a", "b", "c"]], }, { name: "multiline", in: "a,b,c\ne,f,g\n", - header: false, + skipFirstRow: false, result: [ ["a", "b", "c"], ["e", "f", "g"], @@ -528,13 +529,13 @@ const parseTestCases = [ { name: "header mapping boolean", in: "a,b,c\ne,f,g\n", - header: true, + skipFirstRow: true, result: [{ a: "e", b: "f", c: "g" }], }, { name: "header mapping array", in: "a,b,c\ne,f,g\n", - header: ["this", "is", "sparta"], + columns: ["this", "is", "sparta"], result: [ { this: "a", is: "b", sparta: "c" }, { this: "e", is: "f", sparta: "g" }, @@ -543,7 +544,7 @@ const parseTestCases = [ { name: "header mapping object", in: "a,b,c\ne,f,g\n", - header: [{ name: "this" }, { name: "is" }, { name: "sparta" }], + columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }], result: [ { this: "a", is: "b", sparta: "c" }, { this: "e", is: "f", sparta: "g" }, @@ -552,7 +553,7 @@ const parseTestCases = [ { name: "header mapping parse entry", in: "a,b,c\ne,f,g\n", - header: [ + columns: [ { name: "this", parse: (e: string): string => { @@ -583,7 +584,7 @@ const parseTestCases = [ parse: (e: string[]): unknown => { return { super: e[0], street: e[1], fighter: e[2] }; }, - header: false, + skipFirstRow: false, result: [ { super: "a", street: "b", fighter: "c" }, { super: "e", street: "f", fighter: "g" }, @@ -592,7 +593,7 @@ const parseTestCases = [ { name: "header mapping object parseline", in: "a,b,c\ne,f,g\n", - header: [{ name: "this" }, { name: "is" }, { name: "sparta" }], + columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }], parse: (e: Record<string, unknown>): unknown => { return { super: e.this, street: e.is, fighter: e.sparta }; }, @@ -601,6 +602,20 @@ const parseTestCases = [ { super: "e", street: "f", fighter: "g" }, ], }, + { + name: "provides both opts.skipFirstRow and opts.columns", + in: "a,b,1\nc,d,2\ne,f,3", + skipFirstRow: true, + columns: [ + { name: "foo" }, + { name: "bar" }, + { name: "baz", parse: (e: string) => Number(e) }, + ], + result: [ + { foo: "c", bar: "d", baz: 2 }, + { foo: "e", bar: "f", baz: 3 }, + ], + }, ]; for (const testCase of parseTestCases) { @@ -608,7 +623,8 @@ for (const testCase of parseTestCases) { name: `[CSV] Parse ${testCase.name}`, async fn(): Promise<void> { const r = await parse(testCase.in, { - header: testCase.header, + skipFirstRow: testCase.skipFirstRow, + columns: testCase.columns, parse: testCase.parse as (input: unknown) => unknown, }); assertEquals(r, testCase.result); |