diff options
author | Jesse Jackson <jsejcksn@users.noreply.github.com> | 2020-11-25 08:50:00 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-25 09:50:00 -0500 |
commit | ed11eb66871c0cdca96881bd1daffb3e8af72d1a (patch) | |
tree | a10544bf989ff65ab5fd7446b050759ba4f8f978 | |
parent | d40b0711a786d04b5e6321ae9bbf73c11220e865 (diff) |
feat(std/encoding/csv): Add stringify functionality (#8408)
-rw-r--r-- | std/encoding/README.md | 189 | ||||
-rw-r--r-- | std/encoding/csv.ts | 9 | ||||
-rw-r--r-- | std/encoding/csv_stringify.ts | 172 | ||||
-rw-r--r-- | std/encoding/csv_stringify_test.ts | 373 |
4 files changed, 737 insertions, 6 deletions
diff --git a/std/encoding/README.md b/std/encoding/README.md index a165af072..cf889877f 100644 --- a/std/encoding/README.md +++ b/std/encoding/README.md @@ -32,12 +32,24 @@ writeVarbig(w: Deno.Writer, x: bigint, o: VarbigOptions = {}): Promise<number> ### API -#### `readMatrix(reader: BufReader, opt: ReadOptions = { comma: ",", trimLeadingSpace: false, lazyQuotes: false }): Promise<string[][]>` +#### `readMatrix` + +```ts +(reader: BufReader, opt: ReadOptions = { + comma: ",", + trimLeadingSpace: false, + lazyQuotes: false, +}): Promise<string[][]> +``` Parse the CSV from the `reader` with the options provided and return `string[][]`. -#### `parse(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]>`: +#### `parse` + +```ts +(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]> +``` Parse the CSV string/buffer with the options provided. The result of this function is as follows: @@ -70,8 +82,8 @@ function is as follows: ##### `ReadOptions` -- **`comma?: string;`**: Character which separates values. Default: `','`. -- **`comment?: string;`**: Character to start a comment. Default: `'#'`. +- **`comma?: string;`**: Character which separates values. Default: `","`. +- **`comment?: string;`**: Character to start a comment. Default: `"#"`. - **`trimLeadingSpace?: boolean;`**: Flag to trim the leading space of the value. Default: `false`. - **`lazyQuotes?: boolean;`**: Allow unquoted quote in a quoted field or non @@ -79,7 +91,111 @@ function is as follows: - **`fieldsPerRecord?`**: Enabling the check of fields for each row. If == 0, first row is used as referral for the number of fields. -### Usage +#### `stringify` + +```ts +(data: DataItem[], columns: Column[], options?: StringifyOptions): Promise<string> +``` + +- **`data`** is the source data to stringify. It's an array of items which are + plain objects or arrays. + + `DataItem: Record<string, unknown> | unknown[]` + + ```ts + const data = [ + { + name: "Deno", + repo: { org: "denoland", name: "deno" }, + runsOn: ["Rust", "TypeScript"], + }, + ]; + ``` + +- **`columns`** is a list of instructions for how to target and transform the + data for each column of output. This is also where you can provide an explicit + header name for the column. + + `Column`: + - The most essential aspect of a column is accessing the property holding the + data for that column on each object in the data array. If that member is at + the top level, `Column` can simply be a property accessor, which is either a + `string` (if it's a plain object) or a `number` (if it's an array). + + ```ts + const columns = [ + "name", + ]; + ``` + + Each property accessor will be used as the header for the column: + + | name | + | :--: | + | Deno | + + - If the required data is not at the top level (it's nested in other + objects/arrays), then a simple property accessor won't work, so an array of + them will be required. + + ```ts + const columns = [ + ["repo", "name"], + ["repo", "org"], + ]; + ``` + + When using arrays of property accessors, the header names inherit the value + of the last accessor in each array: + + | name | org | + | :--: | :------: | + | deno | denoland | + + - If the data is not already in the required output format, or a different + column header is desired, then a `ColumnDetails` object type can be used for + each column: + + - **`fn?: (value: any) => string | Promise<string>`** is an optional + function to transform the targeted data into the desired format + - **`header?: string`** is the optional value to use for the column header + name + - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor + (`string` or `number`) or array of property accessors used to access the + data on each object + + ```ts + const columns = [ + "name", + { + prop: ["runsOn", 0], + header: "language 1", + fn: (str: string) => str.toLowerCase(), + }, + { + prop: ["runsOn", 1], + header: "language 2", + fn: (str: string) => str.toLowerCase(), + }, + ]; + ``` + + | name | language 1 | language 2 | + | :--: | :--------: | :--------: | + | Deno | rust | typescript | + +- **`options`** are options for the delimiter-seprated output. + + - **`headers?: boolean`**: Whether or not to include the row of headers. + Default: `true` + + - **`separator?: string`**: Delimiter used to separate values. Examples: + - `","` _comma_ (Default) + - `"\t"` _tab_ + - `"|"` _pipe_ + - etc. + +### Basic Usage ```ts import { parse } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts"; @@ -94,6 +210,67 @@ console.log( // [["a", "b", "c"], ["d", "e", "f"]] ``` +```ts +import { + Column, + stringify, +} from "https://deno.land/std@$STD_VERSION/encoding/csv.ts"; + +type Character = { + age: number; + name: { + first: string; + last: string; + }; +}; + +const data: Character[] = [ + { + age: 70, + name: { + first: "Rick", + last: "Sanchez", + }, + }, + { + age: 14, + name: { + first: "Morty", + last: "Smith", + }, + }, +]; + +let columns: Column[] = [ + ["name", "first"], + "age", +]; + +console.log(await stringify(data, columns)); +// first,age +// Rick,70 +// Morty,14 +// + +columns = [ + { + prop: "name", + fn: (name: Character["name"]) => `${name.first} ${name.last}`, + }, + { + prop: "age", + header: "is_adult", + fn: (age: Character["age"]) => String(age >= 18), + }, +]; + +console.log(await stringify(data, columns, { separator: "\t" })); +// name is_adult +// Rick Sanchez true +// Morty Smith false +// +``` + ## TOML This module parse TOML files. It follows as much as possible the @@ -231,7 +408,7 @@ console.log(tomlObject); YAML parser / dumper for Deno. -Heavily inspired from [js-yaml]. +Heavily inspired from [`js-yaml`](https://github.com/nodeca/js-yaml). ### Basic usage diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts index 78dd602e7..5dd92fb0f 100644 --- a/std/encoding/csv.ts +++ b/std/encoding/csv.ts @@ -9,6 +9,15 @@ import { TextProtoReader } from "../textproto/mod.ts"; import { StringReader } from "../io/readers.ts"; import { assert } from "../_util/assert.ts"; +export { NEWLINE, stringify, StringifyError } from "./csv_stringify.ts"; + +export type { + Column, + ColumnDetails, + DataItem, + StringifyOptions, +} from "./csv_stringify.ts"; + const INVALID_RUNE = ["\r", "\n", '"']; export const ERR_BARE_QUOTE = 'bare " in non-quoted-field'; diff --git a/std/encoding/csv_stringify.ts b/std/encoding/csv_stringify.ts new file mode 100644 index 000000000..4c5f8c816 --- /dev/null +++ b/std/encoding/csv_stringify.ts @@ -0,0 +1,172 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. +// Implements the CSV spec at https://tools.ietf.org/html/rfc4180 + +/** This module is browser compatible. */ + +const QUOTE = '"'; +export const NEWLINE = "\r\n"; + +export class StringifyError extends Error { + readonly name = "StringifyError"; +} + +function getEscapedString(value: unknown, sep: string): string { + if (value === undefined || value === null) return ""; + let str = ""; + + if (typeof value === "object") str = JSON.stringify(value); + else str = String(value); + + // Is regex.test more performant here? If so, how to dynamically create? + // https://stackoverflow.com/questions/3561493/ + if (str.includes(sep) || str.includes(NEWLINE) || str.includes(QUOTE)) { + return `${QUOTE}${str.replaceAll(QUOTE, `${QUOTE}${QUOTE}`)}${QUOTE}`; + } + + return str; +} + +type PropertyAccessor = number | string; + +/** + * @param fn Optional callback for transforming the value + * + * @param header Explicit column header name. If omitted, + * the (final) property accessor is used for this value. + * + * @param prop Property accessor(s) used to access the value on the object + */ +export type ColumnDetails = { + // "unknown" is more type-safe, but inconvenient for user. How to resolve? + // deno-lint-ignore no-explicit-any + fn?: (value: any) => string | Promise<string>; + header?: string; + prop: PropertyAccessor | PropertyAccessor[]; +}; + +export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[]; + +type NormalizedColumn = Omit<ColumnDetails, "header" | "prop"> & { + header: string; + prop: PropertyAccessor[]; +}; + +function normalizeColumn(column: Column): NormalizedColumn { + let fn: NormalizedColumn["fn"], + header: NormalizedColumn["header"], + prop: NormalizedColumn["prop"]; + + if (typeof column === "object") { + if (Array.isArray(column)) { + header = String(column[column.length - 1]); + prop = column; + } else { + ({ fn } = column); + prop = Array.isArray(column.prop) ? column.prop : [column.prop]; + header = typeof column.header === "string" + ? column.header + : String(prop[prop.length - 1]); + } + } else { + header = String(column); + prop = [column]; + } + + return { fn, header, prop }; +} + +type ObjectWithStringPropertyKeys = Record<string, unknown>; + +/** An object (plain or array) */ +export type DataItem = ObjectWithStringPropertyKeys | unknown[]; + +/** + * Returns an array of values from an object using the property accessors + * (and optional transform function) in each column + */ +async function getValuesFromItem( + item: DataItem, + normalizedColumns: NormalizedColumn[], +): Promise<unknown[]> { + const values: unknown[] = []; + + for (const column of normalizedColumns) { + let value: unknown = item; + + for (const prop of column.prop) { + if (typeof value !== "object" || value === null) continue; + if (Array.isArray(value)) { + if (typeof prop === "number") value = value[prop]; + else { + throw new StringifyError('Property accessor is not of type "number"'); + } + } // I think this assertion is safe. Confirm? + else value = (value as ObjectWithStringPropertyKeys)[prop]; + } + + if (typeof column.fn === "function") value = await column.fn(value); + values.push(value); + } + + return values; +} + +/** + * @param headers Whether or not to include the row of headers. + * Default: `true` + * + * @param separator Delimiter used to separate values. Examples: + * - `","` _comma_ (Default) + * - `"\t"` _tab_ + * - `"|"` _pipe_ + * - etc. + */ +export type StringifyOptions = { + headers?: boolean; + separator?: string; +}; + +/** + * @param data The array of objects to encode + * @param columns Array of values specifying which data to include in the output + * @param options Output formatting options + */ +export async function stringify( + data: DataItem[], + columns: Column[], + options: StringifyOptions = {}, +): Promise<string> { + const { headers, separator: sep } = { + headers: true, + separator: ",", + ...options, + }; + if (sep.includes(QUOTE) || sep.includes(NEWLINE)) { + const message = [ + "Separator cannot include the following strings:", + ' - U+0022: Quotation mark (")', + " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", + ].join("\n"); + throw new StringifyError(message); + } + + const normalizedColumns = columns.map(normalizeColumn); + let output = ""; + + if (headers) { + output += normalizedColumns + .map((column) => getEscapedString(column.header, sep)) + .join(sep); + output += NEWLINE; + } + + for (const item of data) { + const values = await getValuesFromItem(item, normalizedColumns); + output += values + .map((value) => getEscapedString(value, sep)) + .join(sep); + output += NEWLINE; + } + + return output; +} diff --git a/std/encoding/csv_stringify_test.ts b/std/encoding/csv_stringify_test.ts new file mode 100644 index 000000000..7cad190fc --- /dev/null +++ b/std/encoding/csv_stringify_test.ts @@ -0,0 +1,373 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. + +import { assertEquals, assertThrowsAsync } from "../testing/asserts.ts"; + +import { + Column, + DataItem, + NEWLINE, + stringify, + StringifyError, + StringifyOptions, +} from "./csv_stringify.ts"; + +type StringifyTestCaseBase = { + columns: Column[]; + data: DataItem[]; + name: string; + options?: StringifyOptions; +}; + +type StringifyTestCaseError = StringifyTestCaseBase & { + errorMessage?: string; + // deno-lint-ignore no-explicit-any + throwsError: new (...args: any[]) => Error; +}; + +type StringifyTestCase = StringifyTestCaseBase & { expected: string }; + +const stringifyTestCases: (StringifyTestCase | StringifyTestCaseError)[] = [ + { + columns: ["a"], + data: [["foo"], ["bar"]], + errorMessage: 'Property accessor is not of type "number"', + name: "[CSV_stringify] Access array index using string", + throwsError: StringifyError, + }, + { + columns: [0], + data: [["foo"], ["bar"]], + errorMessage: [ + "Separator cannot include the following strings:", + ' - U+0022: Quotation mark (")', + " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", + ].join("\n"), + name: "[CSV_stringify] Double quote in separator", + options: { separator: '"' }, + throwsError: StringifyError, + }, + { + columns: [0], + data: [["foo"], ["bar"]], + errorMessage: [ + "Separator cannot include the following strings:", + ' - U+0022: Quotation mark (")', + " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", + ].join("\n"), + name: "[CSV_stringify] CRLF in separator", + options: { separator: "\r\n" }, + throwsError: StringifyError, + }, + { + columns: [ + { + fn: (obj) => obj.toUpperCase(), + prop: "msg", + }, + ], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + name: "[CSV_stringify] Transform function", + throwsError: TypeError, + }, + { + columns: [], + data: [], + expected: NEWLINE, + name: "[CSV_stringify] No data, no columns", + }, + { + columns: [], + data: [], + expected: ``, + name: "[CSV_stringify] No data, no columns, no headers", + options: { headers: false }, + }, + { + columns: ["a"], + data: [], + expected: `a${NEWLINE}`, + name: "[CSV_stringify] No data, columns", + }, + { + columns: ["a"], + data: [], + expected: ``, + name: "[CSV_stringify] No data, columns, no headers", + options: { headers: false }, + }, + { + columns: [], + data: [{ a: 1 }, { a: 2 }], + expected: `${NEWLINE}${NEWLINE}${NEWLINE}`, + name: "[CSV_stringify] Data, no columns", + }, + { + columns: [0, 1], + data: [["foo", "bar"], ["baz", "qux"]], + expected: `0\r1${NEWLINE}foo\rbar${NEWLINE}baz\rqux${NEWLINE}`, + name: "[CSV_stringify] Separator: CR", + options: { separator: "\r" }, + }, + { + columns: [0, 1], + data: [["foo", "bar"], ["baz", "qux"]], + expected: `0\n1${NEWLINE}foo\nbar${NEWLINE}baz\nqux${NEWLINE}`, + name: "[CSV_stringify] Separator: LF", + options: { separator: "\n" }, + }, + { + columns: [1], + data: [{ 1: 1 }, { 1: 2 }], + expected: `1${NEWLINE}1${NEWLINE}2${NEWLINE}`, + name: "[CSV_stringify] Column: number accessor, Data: object", + }, + { + columns: [{ header: "Value", prop: "value" }], + data: [{ value: "foo" }, { value: "bar" }], + expected: `foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Explicit header value, no headers", + options: { headers: false }, + }, + { + columns: [1], + data: [["key", "foo"], ["key", "bar"]], + expected: `1${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Column: number accessor, Data: array", + }, + { + columns: [[1]], + data: [{ 1: 1 }, { 1: 2 }], + expected: `1${NEWLINE}1${NEWLINE}2${NEWLINE}`, + name: "[CSV_stringify] Column: array number accessor, Data: object", + }, + { + columns: [[1]], + data: [["key", "foo"], ["key", "bar"]], + expected: `1${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Column: array number accessor, Data: array", + }, + { + columns: [[1, 1]], + data: [["key", ["key", "foo"]], ["key", ["key", "bar"]]], + expected: `1${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Column: array number accessor, Data: array", + }, + { + columns: ["value"], + data: [{ value: "foo" }, { value: "bar" }], + expected: `value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Column: string accessor, Data: object", + }, + { + columns: [["value"]], + data: [{ value: "foo" }, { value: "bar" }], + expected: `value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Column: array string accessor, Data: object", + }, + { + columns: [["msg", "value"]], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + expected: `value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Column: array string accessor, Data: object", + }, + { + columns: [ + { + header: "Value", + prop: ["msg", "value"], + }, + ], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + expected: `Value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Explicit header", + }, + { + columns: [ + { + fn: (str: string) => str.toUpperCase(), + prop: ["msg", "value"], + }, + ], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + expected: `value${NEWLINE}FOO${NEWLINE}BAR${NEWLINE}`, + name: "[CSV_stringify] Transform function 1", + }, + { + columns: [ + { + fn: (str: string) => Promise.resolve(str.toUpperCase()), + prop: ["msg", "value"], + }, + ], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + expected: `value${NEWLINE}FOO${NEWLINE}BAR${NEWLINE}`, + name: "[CSV_stringify] Transform function 1 async", + }, + { + columns: [ + { + fn: (obj: { value: string }) => obj.value, + prop: "msg", + }, + ], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + expected: `msg${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Transform function 2", + }, + { + columns: [ + { + fn: (obj: { value: string }) => obj.value, + header: "Value", + prop: "msg", + }, + ], + data: [{ msg: { value: "foo" } }, { msg: { value: "bar" } }], + expected: `Value${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Transform function 2, explicit header", + }, + { + columns: [0], + data: [[{ value: "foo" }], [{ value: "bar" }]], + expected: + `0${NEWLINE}"{""value"":""foo""}"${NEWLINE}"{""value"":""bar""}"${NEWLINE}`, + name: "[CSV_stringify] Targeted value: object", + }, + { + columns: [0], + data: [ + [[{ value: "foo" }, { value: "bar" }]], + [[{ value: "baz" }, { value: "qux" }]], + ], + expected: + `0${NEWLINE}"[{""value"":""foo""},{""value"":""bar""}]"${NEWLINE}"[{""value"":""baz""},{""value"":""qux""}]"${NEWLINE}`, + name: "[CSV_stringify] Targeted value: arary of objects", + }, + { + columns: [0], + data: [[["foo", "bar"]], [["baz", "qux"]]], + expected: + `0${NEWLINE}"[""foo"",""bar""]"${NEWLINE}"[""baz"",""qux""]"${NEWLINE}`, + name: "[CSV_stringify] Targeted value: array", + }, + { + columns: [0], + data: [[["foo", "bar"]], [["baz", "qux"]]], + expected: + `0${NEWLINE}"[""foo"",""bar""]"${NEWLINE}"[""baz"",""qux""]"${NEWLINE}`, + name: "[CSV_stringify] Targeted value: array, separator: tab", + options: { separator: "\t" }, + }, + { + columns: [0], + data: [[], []], + expected: `0${NEWLINE}${NEWLINE}${NEWLINE}`, + name: "[CSV_stringify] Targeted value: undefined", + }, + { + columns: [0], + data: [[null], [null]], + expected: `0${NEWLINE}${NEWLINE}${NEWLINE}`, + name: "[CSV_stringify] Targeted value: null", + }, + { + columns: [0], + data: [[0xa], [0xb]], + expected: `0${NEWLINE}10${NEWLINE}11${NEWLINE}`, + name: "[CSV_stringify] Targeted value: hex number", + }, + { + columns: [0], + data: [[BigInt("1")], [BigInt("2")]], + expected: `0${NEWLINE}1${NEWLINE}2${NEWLINE}`, + name: "[CSV_stringify] Targeted value: BigInt", + }, + { + columns: [0], + data: [[true], [false]], + expected: `0${NEWLINE}true${NEWLINE}false${NEWLINE}`, + name: "[CSV_stringify] Targeted value: boolean", + }, + { + columns: [0], + data: [["foo"], ["bar"]], + expected: `0${NEWLINE}foo${NEWLINE}bar${NEWLINE}`, + name: "[CSV_stringify] Targeted value: string", + }, + { + columns: [0], + data: [[Symbol("foo")], [Symbol("bar")]], + expected: `0${NEWLINE}Symbol(foo)${NEWLINE}Symbol(bar)${NEWLINE}`, + name: "[CSV_stringify] Targeted value: symbol", + }, + { + columns: [0], + data: [[(n: number) => n]], + expected: `0${NEWLINE}(n) => n${NEWLINE}`, + name: "[CSV_stringify] Targeted value: function", + }, + { + columns: [0], + data: [['foo"']], + expected: `0${NEWLINE}"foo"""${NEWLINE}`, + name: "[CSV_stringify] Value with double quote", + }, + { + columns: [0], + data: [["foo\r\n"]], + expected: `0${NEWLINE}"foo\r\n"${NEWLINE}`, + name: "[CSV_stringify] Value with CRLF", + }, + { + columns: [0], + data: [["foo\r"]], + expected: `0${NEWLINE}foo\r${NEWLINE}`, + name: "[CSV_stringify] Value with CR", + }, + { + columns: [0], + data: [["foo\n"]], + expected: `0${NEWLINE}foo\n${NEWLINE}`, + name: "[CSV_stringify] Value with LF", + }, + { + columns: [0], + data: [["foo,"]], + expected: `0${NEWLINE}"foo,"${NEWLINE}`, + name: "[CSV_stringify] Value with comma", + }, + { + columns: [0], + data: [["foo,"]], + expected: `0${NEWLINE}foo,${NEWLINE}`, + name: "[CSV_stringify] Value with comma, tab separator", + options: { separator: "\t" }, + }, +]; + +for (const tc of stringifyTestCases) { + if ((tc as StringifyTestCaseError).throwsError) { + const t = tc as StringifyTestCaseError; + Deno.test({ + async fn() { + await assertThrowsAsync( + async () => { + await stringify(t.data, t.columns, t.options); + }, + t.throwsError, + t.errorMessage, + ); + }, + name: t.name, + }); + } else { + const t = tc as StringifyTestCase; + Deno.test({ + async fn() { + const actual = await stringify(t.data, t.columns, t.options); + assertEquals(actual, t.expected); + }, + name: t.name, + }); + } +} |