summaryrefslogtreecommitdiff
path: root/std/encoding
diff options
context:
space:
mode:
Diffstat (limited to 'std/encoding')
-rw-r--r--std/encoding/README.md20
-rw-r--r--std/encoding/csv.ts91
-rw-r--r--std/encoding/csv_test.ts62
3 files changed, 101 insertions, 72 deletions
diff --git a/std/encoding/README.md b/std/encoding/README.md
index 21797a451..2f0ac91f1 100644
--- a/std/encoding/README.md
+++ b/std/encoding/README.md
@@ -37,25 +37,29 @@ writeVarbig(w: Deno.Writer, x: bigint, o: VarbigOptions = {}): Promise<number>
Parse the CSV from the `reader` with the options provided and return
`string[][]`.
-#### `parse(input: string | BufReader, opt: ParseOptions = { header: false }): Promise<unknown[]>`:
+#### `parse(input: string | BufReader, opt: ParseOptions = { skipFirstRow: false }): Promise<unknown[]>`:
Parse the CSV string/buffer with the options provided. The result of this
function is as follows:
-- If you don't provide both `opt.header` and `opt.parse`, it returns
- `string[][]`.
-- If you provide `opt.header` but not `opt.parse`, it returns `object[]`.
+- If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it
+ returns `string[][]`.
+- If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it
+ returns `object[]`.
- If you provide `opt.parse`, it returns an array where each element is the
value returned from `opt.parse`.
##### `ParseOptions`
-- **`header: boolean | string[] | HeaderOptions[];`**: If a boolean is provided,
- the first line will be used as Header definitions. If `string[]` or
- `HeaderOptions[]` those names will be used for header definition.
+- **`skipFirstRow: boolean;`**: If you provide `skipFirstRow: true` and
+ `columns`, the first line will be skipped. If you provide `skipFirstRow: true`
+ but not `columns`, the first line will be skipped and used as header
+ definitions.
+- **`columns: string[] | HeaderOptions[];`**: If you provide `string[]` or
+ `ColumnOptions[]`, those names will be used for header definition.
- **`parse?: (input: unknown) => unknown;`**: Parse function for the row, which
will be executed after parsing of all columns. Therefore if you don't provide
- header and parse function with headers, input will be `string[]`.
+ `skipFirstRow`, `columns`, and `parse` function, input will be `string[]`.
##### `HeaderOptions`
diff --git a/std/encoding/csv.ts b/std/encoding/csv.ts
index bab856b51..78dd602e7 100644
--- a/std/encoding/csv.ts
+++ b/std/encoding/csv.ts
@@ -52,7 +52,7 @@ export class ParseError extends Error {
}
/**
- * @property comma - Character which separates values. Default: ','
+ * @property separator - Character which separates values. Default: ','
* @property comment - Character to start a comment. Default: '#'
* @property trimLeadingSpace - Flag to trim the leading space of the value.
* Default: 'false'
@@ -62,7 +62,7 @@ export class ParseError extends Error {
* If == 0, first row is used as referral for the number of fields.
*/
export interface ReadOptions {
- comma?: string;
+ separator?: string;
comment?: string;
trimLeadingSpace?: boolean;
lazyQuotes?: boolean;
@@ -70,16 +70,16 @@ export interface ReadOptions {
}
function chkOptions(opt: ReadOptions): void {
- if (!opt.comma) {
- opt.comma = ",";
+ if (!opt.separator) {
+ opt.separator = ",";
}
if (!opt.trimLeadingSpace) {
opt.trimLeadingSpace = false;
}
if (
- INVALID_RUNE.includes(opt.comma) ||
+ INVALID_RUNE.includes(opt.separator) ||
(typeof opt.comment === "string" && INVALID_RUNE.includes(opt.comment)) ||
- opt.comma === opt.comment
+ opt.separator === opt.comment
) {
throw new Error(ERR_INVALID_DELIM);
}
@@ -88,7 +88,7 @@ function chkOptions(opt: ReadOptions): void {
async function readRecord(
startLine: number,
reader: BufReader,
- opt: ReadOptions = { comma: ",", trimLeadingSpace: false },
+ opt: ReadOptions = { separator: ",", trimLeadingSpace: false },
): Promise<string[] | null> {
const tp = new TextProtoReader(reader);
let line = await readLine(tp);
@@ -103,13 +103,13 @@ async function readRecord(
return [];
}
- assert(opt.comma != null);
+ assert(opt.separator != null);
let fullLine = line;
let quoteError: ParseError | null = null;
const quote = '"';
const quoteLen = quote.length;
- const commaLen = opt.comma.length;
+ const separatorLen = opt.separator.length;
let recordBuffer = "";
const fieldIndexes = [] as number[];
parseField:
@@ -120,7 +120,7 @@ async function readRecord(
if (line.length === 0 || !line.startsWith(quote)) {
// Non-quoted string field
- const i = line.indexOf(opt.comma);
+ const i = line.indexOf(opt.separator);
let field = line;
if (i >= 0) {
field = field.substring(0, i);
@@ -144,7 +144,7 @@ async function readRecord(
recordBuffer += field;
fieldIndexes.push(recordBuffer.length);
if (i >= 0) {
- line = line.substring(i + commaLen);
+ line = line.substring(i + separatorLen);
continue parseField;
}
break parseField;
@@ -161,9 +161,9 @@ async function readRecord(
// `""` sequence (append quote).
recordBuffer += quote;
line = line.substring(quoteLen);
- } else if (line.startsWith(opt.comma)) {
+ } else if (line.startsWith(opt.separator)) {
// `","` sequence (end of field).
- line = line.substring(commaLen);
+ line = line.substring(separatorLen);
fieldIndexes.push(recordBuffer.length);
continue parseField;
} else if (0 === line.length) {
@@ -281,7 +281,7 @@ async function readLine(tp: TextProtoReader): Promise<string | null> {
export async function readMatrix(
reader: BufReader,
opt: ReadOptions = {
- comma: ",",
+ separator: ",",
trimLeadingSpace: false,
lazyQuotes: false,
},
@@ -324,13 +324,13 @@ export async function readMatrix(
/**
* Parse the CSV string/buffer with the options provided.
*
- * HeaderOptions provides the column definition
+ * ColumnOptions provides the column definition
* and the parse function for each entry of the
* column.
*/
-export interface HeaderOptions {
+export interface ColumnOptions {
/**
- * Name of the header to be used as property
+ * Name of the column to be used as property
*/
name: string;
/**
@@ -343,14 +343,20 @@ export interface HeaderOptions {
export interface ParseOptions extends ReadOptions {
/**
- * If a boolean is provided, the first line will be used as Header definitions.
- * If `string[]` or `HeaderOptions[]` those names will be used for header definition.
+ * If you provide `skipFirstRow: true` and `columns`, the first line will be skipped.
+ * If you provide `skipFirstRow: true` but not `columns`, the first line will be skipped and used as header definitions.
*/
- header: boolean | string[] | HeaderOptions[];
+ skipFirstRow?: boolean;
+
+ /**
+ * If you provide `string[]` or `ColumnOptions[]`, those names will be used for header definition.
+ */
+ columns?: string[] | ColumnOptions[];
+
/** Parse function for rows.
* Example:
* const r = await parseFile('a,b,c\ne,f,g\n', {
- * header: ["this", "is", "sparta"],
+ * columns: ["this", "is", "sparta"],
* parse: (e: Record<string, unknown>) => {
* return { super: e.this, street: e.is, fighter: e.sparta };
* }
@@ -370,14 +376,14 @@ export interface ParseOptions extends ReadOptions {
* for columns and rows.
* @param input Input to parse. Can be a string or BufReader.
* @param opt options of the parser.
- * @returns If you don't provide both `opt.header` and `opt.parse`, it returns `string[][]`.
- * If you provide `opt.header` but not `opt.parse`, it returns `object[]`.
+ * @returns If you don't provide `opt.skipFirstRow`, `opt.parse`, and `opt.columns`, it returns `string[][]`.
+ * If you provide `opt.skipFirstRow` or `opt.columns` but not `opt.parse`, it returns `object[]`.
* If you provide `opt.parse`, it returns an array where each element is the value returned from `opt.parse`.
*/
export async function parse(
input: string | BufReader,
opt: ParseOptions = {
- header: false,
+ skipFirstRow: false,
},
): Promise<unknown[]> {
let r: string[][];
@@ -386,27 +392,15 @@ export async function parse(
} else {
r = await readMatrix(new BufReader(new StringReader(input)), opt);
}
- if (opt.header) {
- let headers: HeaderOptions[] = [];
+ if (opt.skipFirstRow || opt.columns) {
+ let headers: ColumnOptions[] = [];
let i = 0;
- if (Array.isArray(opt.header)) {
- if (typeof opt.header[0] !== "string") {
- headers = opt.header as HeaderOptions[];
- } else {
- const h = opt.header as string[];
- headers = h.map(
- (e): HeaderOptions => {
- return {
- name: e,
- };
- },
- );
- }
- } else {
+
+ if (opt.skipFirstRow) {
const head = r.shift();
assert(head != null);
headers = head.map(
- (e): HeaderOptions => {
+ (e): ColumnOptions => {
return {
name: e,
};
@@ -414,6 +408,21 @@ export async function parse(
);
i++;
}
+
+ if (opt.columns) {
+ if (typeof opt.columns[0] !== "string") {
+ headers = opt.columns as ColumnOptions[];
+ } else {
+ const h = opt.columns as string[];
+ headers = h.map(
+ (e): ColumnOptions => {
+ return {
+ name: e,
+ };
+ },
+ );
+ }
+ }
return r.map((e): unknown => {
if (e.length !== headers.length) {
throw `Error number of fields line:${i}`;
diff --git a/std/encoding/csv_test.ts b/std/encoding/csv_test.ts
index 1a2d892cc..e81c1401f 100644
--- a/std/encoding/csv_test.ts
+++ b/std/encoding/csv_test.ts
@@ -17,6 +17,7 @@ import {
import { StringReader } from "../io/readers.ts";
import { BufReader } from "../io/bufio.ts";
+// Test cases for `readMatrix()`
const testCases = [
{
Name: "Simple",
@@ -60,7 +61,7 @@ zzz,yyy,xxx`,
Name: "Semicolon",
Input: "a;b;c\n",
Output: [["a", "b", "c"]],
- Comma: ";",
+ Separator: ";",
},
{
Name: "MultiLine",
@@ -334,14 +335,14 @@ x,,,
Input: "a£b,c£ \td,e\n€ comment\n",
Output: [["a", "b,c", "d,e"]],
TrimLeadingSpace: true,
- Comma: "£",
+ Separator: "£",
Comment: "€",
},
{
Name: "NonASCIICommaAndCommentWithQuotes",
Input: 'a€" b,"€ c\nλ comment\n',
Output: [["a", " b,", " c"]],
- Comma: "€",
+ Separator: "€",
Comment: "λ",
},
{
@@ -350,7 +351,7 @@ x,,,
Name: "NonASCIICommaConfusion",
Input: '"abθcd"λefθgh',
Output: [["abθcd", "efθgh"]],
- Comma: "λ",
+ Separator: "λ",
Comment: "€",
},
{
@@ -415,17 +416,17 @@ x,,,
},
{
Name: "BadComma1",
- Comma: "\n",
+ Separator: "\n",
Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadComma2",
- Comma: "\r",
+ Separator: "\r",
Error: new Error(ERR_INVALID_DELIM),
},
{
Name: "BadComma3",
- Comma: '"',
+ Separator: '"',
Error: new Error(ERR_INVALID_DELIM),
},
{
@@ -440,7 +441,7 @@ x,,,
},
{
Name: "BadCommaComment",
- Comma: "X",
+ Separator: "X",
Comment: "X",
Error: new Error(ERR_INVALID_DELIM),
},
@@ -449,13 +450,13 @@ for (const t of testCases) {
Deno.test({
name: `[CSV] ${t.Name}`,
async fn(): Promise<void> {
- let comma = ",";
+ let separator = ",";
let comment: string | undefined;
let fieldsPerRec: number | undefined;
let trim = false;
let lazyquote = false;
- if (t.Comma) {
- comma = t.Comma;
+ if (t.Separator) {
+ separator = t.Separator;
}
if (t.Comment) {
comment = t.Comment;
@@ -475,7 +476,7 @@ for (const t of testCases) {
await readMatrix(
new BufReader(new StringReader(t.Input ?? "")),
{
- comma: comma,
+ separator,
comment: comment,
trimLeadingSpace: trim,
fieldsPerRecord: fieldsPerRec,
@@ -489,7 +490,7 @@ for (const t of testCases) {
actual = await readMatrix(
new BufReader(new StringReader(t.Input ?? "")),
{
- comma: comma,
+ separator,
comment: comment,
trimLeadingSpace: trim,
fieldsPerRecord: fieldsPerRec,
@@ -507,19 +508,19 @@ const parseTestCases = [
{
name: "simple",
in: "a,b,c",
- header: false,
+ skipFirstRow: false,
result: [["a", "b", "c"]],
},
{
name: "simple Bufreader",
in: new BufReader(new StringReader("a,b,c")),
- header: false,
+ skipFirstRow: false,
result: [["a", "b", "c"]],
},
{
name: "multiline",
in: "a,b,c\ne,f,g\n",
- header: false,
+ skipFirstRow: false,
result: [
["a", "b", "c"],
["e", "f", "g"],
@@ -528,13 +529,13 @@ const parseTestCases = [
{
name: "header mapping boolean",
in: "a,b,c\ne,f,g\n",
- header: true,
+ skipFirstRow: true,
result: [{ a: "e", b: "f", c: "g" }],
},
{
name: "header mapping array",
in: "a,b,c\ne,f,g\n",
- header: ["this", "is", "sparta"],
+ columns: ["this", "is", "sparta"],
result: [
{ this: "a", is: "b", sparta: "c" },
{ this: "e", is: "f", sparta: "g" },
@@ -543,7 +544,7 @@ const parseTestCases = [
{
name: "header mapping object",
in: "a,b,c\ne,f,g\n",
- header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
+ columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
result: [
{ this: "a", is: "b", sparta: "c" },
{ this: "e", is: "f", sparta: "g" },
@@ -552,7 +553,7 @@ const parseTestCases = [
{
name: "header mapping parse entry",
in: "a,b,c\ne,f,g\n",
- header: [
+ columns: [
{
name: "this",
parse: (e: string): string => {
@@ -583,7 +584,7 @@ const parseTestCases = [
parse: (e: string[]): unknown => {
return { super: e[0], street: e[1], fighter: e[2] };
},
- header: false,
+ skipFirstRow: false,
result: [
{ super: "a", street: "b", fighter: "c" },
{ super: "e", street: "f", fighter: "g" },
@@ -592,7 +593,7 @@ const parseTestCases = [
{
name: "header mapping object parseline",
in: "a,b,c\ne,f,g\n",
- header: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
+ columns: [{ name: "this" }, { name: "is" }, { name: "sparta" }],
parse: (e: Record<string, unknown>): unknown => {
return { super: e.this, street: e.is, fighter: e.sparta };
},
@@ -601,6 +602,20 @@ const parseTestCases = [
{ super: "e", street: "f", fighter: "g" },
],
},
+ {
+ name: "provides both opts.skipFirstRow and opts.columns",
+ in: "a,b,1\nc,d,2\ne,f,3",
+ skipFirstRow: true,
+ columns: [
+ { name: "foo" },
+ { name: "bar" },
+ { name: "baz", parse: (e: string) => Number(e) },
+ ],
+ result: [
+ { foo: "c", bar: "d", baz: 2 },
+ { foo: "e", bar: "f", baz: 3 },
+ ],
+ },
];
for (const testCase of parseTestCases) {
@@ -608,7 +623,8 @@ for (const testCase of parseTestCases) {
name: `[CSV] Parse ${testCase.name}`,
async fn(): Promise<void> {
const r = await parse(testCase.in, {
- header: testCase.header,
+ skipFirstRow: testCase.skipFirstRow,
+ columns: testCase.columns,
parse: testCase.parse as (input: unknown) => unknown,
});
assertEquals(r, testCase.result);