diff options
-rw-r--r-- | std/examples/tests/xeval_test.ts | 4 | ||||
-rw-r--r-- | std/examples/xeval.ts | 104 | ||||
-rw-r--r-- | std/io/bufio.ts | 99 | ||||
-rw-r--r-- | std/io/bufio_test.ts | 29 |
4 files changed, 138 insertions, 98 deletions
diff --git a/std/examples/tests/xeval_test.ts b/std/examples/tests/xeval_test.ts index bfd66c097..3df11c8af 100644 --- a/std/examples/tests/xeval_test.ts +++ b/std/examples/tests/xeval_test.ts @@ -27,7 +27,7 @@ const xevalPath = "examples/xeval.ts"; Deno.test(async function xevalCliReplvar(): Promise<void> { const p = run({ - args: [execPath(), xevalPath, "--", "--replvar=abc", "console.log(abc)"], + args: [execPath(), xevalPath, "--replvar=abc", "console.log(abc)"], stdin: "piped", stdout: "piped", stderr: "null" @@ -41,7 +41,7 @@ Deno.test(async function xevalCliReplvar(): Promise<void> { Deno.test(async function xevalCliSyntaxError(): Promise<void> { const p = run({ - args: [execPath(), xevalPath, "--", "("], + args: [execPath(), xevalPath, "("], stdin: "null", stdout: "piped", stderr: "piped" diff --git a/std/examples/xeval.ts b/std/examples/xeval.ts index 75c864745..16ce37fb4 100644 --- a/std/examples/xeval.ts +++ b/std/examples/xeval.ts @@ -1,5 +1,6 @@ import { parse } from "../flags/mod.ts"; -const { Buffer, EOF, args, exit, stdin, writeAll } = Deno; +import { readStringDelim } from "../io/bufio.ts"; +const { args, exit, stdin } = Deno; type Reader = Deno.Reader; /* eslint-disable-next-line max-len */ @@ -10,18 +11,16 @@ const AsyncFunction = Object.getPrototypeOf(async function(): Promise<void> {}) /* eslint-disable max-len */ const HELP_MSG = `xeval -Eval a script on lines from stdin. -Read from standard input and eval code on each whitespace-delimited -string chunks. +Run a script for each new-line or otherwise delimited chunk of standard input. Print all the usernames in /etc/passwd: - cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts -- "a = $.split(':'); if (a) console.log(a[0])" + cat /etc/passwd | deno -A https://deno.land/std/examples/xeval.ts "a = $.split(':'); if (a) console.log(a[0])" A complicated way to print the current git branch: - git branch | deno -A https://deno.land/std/examples/xeval.ts -- -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))" + git branch | deno -A https://deno.land/std/examples/xeval.ts -I 'line' "if (line.startsWith('*')) console.log(line.slice(2))" Demonstrates breaking the input up by space delimiter instead of by lines: - cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -- -d " " "if ($ === 'MIT') console.log('MIT licensed')", + cat LICENSE | deno -A https://deno.land/std/examples/xeval.ts -d " " "if ($ === 'MIT') console.log('MIT licensed')", USAGE: deno -A https://deno.land/std/examples/xeval.ts [OPTIONS] <code> @@ -40,98 +39,12 @@ export interface XevalOptions { const DEFAULT_DELIMITER = "\n"; -// Generate longest proper prefix which is also suffix array. -function createLPS(pat: Uint8Array): Uint8Array { - const lps = new Uint8Array(pat.length); - lps[0] = 0; - let prefixEnd = 0; - let i = 1; - while (i < lps.length) { - if (pat[i] == pat[prefixEnd]) { - prefixEnd++; - lps[i] = prefixEnd; - i++; - } else if (prefixEnd === 0) { - lps[i] = 0; - i++; - } else { - prefixEnd = pat[prefixEnd - 1]; - } - } - return lps; -} - -// Read from reader until EOF and emit string chunks separated -// by the given delimiter. -async function* chunks( - reader: Reader, - delim: string -): AsyncIterableIterator<string> { - const encoder = new TextEncoder(); - const decoder = new TextDecoder(); - // Avoid unicode problems - const delimArr = encoder.encode(delim); - const delimLen = delimArr.length; - const delimLPS = createLPS(delimArr); - - let inputBuffer = new Buffer(); - const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1)); - - // Modified KMP - let inspectIndex = 0; - let matchIndex = 0; - while (true) { - const result = await reader.read(inspectArr); - if (result === EOF) { - // Yield last chunk. - const lastChunk = inputBuffer.toString(); - yield lastChunk; - return; - } - if ((result as number) < 0) { - // Discard all remaining and silently fail. - return; - } - const sliceRead = inspectArr.subarray(0, result as number); - await writeAll(inputBuffer, sliceRead); - - let sliceToProcess = inputBuffer.bytes(); - while (inspectIndex < sliceToProcess.length) { - if (sliceToProcess[inspectIndex] === delimArr[matchIndex]) { - inspectIndex++; - matchIndex++; - if (matchIndex === delimLen) { - // Full match - const matchEnd = inspectIndex - delimLen; - const readyBytes = sliceToProcess.subarray(0, matchEnd); - // Copy - const pendingBytes = sliceToProcess.slice(inspectIndex); - const readyChunk = decoder.decode(readyBytes); - yield readyChunk; - // Reset match, different from KMP. - sliceToProcess = pendingBytes; - inspectIndex = 0; - matchIndex = 0; - } - } else { - if (matchIndex === 0) { - inspectIndex++; - } else { - matchIndex = delimLPS[matchIndex - 1]; - } - } - } - // Keep inspectIndex and matchIndex. - inputBuffer = new Buffer(sliceToProcess); - } -} - export async function xeval( reader: Reader, xevalFunc: XevalFunc, { delimiter = DEFAULT_DELIMITER }: XevalOptions = {} ): Promise<void> { - for await (const chunk of chunks(reader, delimiter)) { + for await (const chunk of readStringDelim(reader, delimiter)) { // Ignore empty chunks. if (chunk.length > 0) { await xevalFunc(chunk); @@ -140,7 +53,7 @@ export async function xeval( } async function main(): Promise<void> { - const parsedArgs = parse(args.slice(1), { + const parsedArgs = parse(args, { boolean: ["help"], string: ["delim", "replvar"], alias: { @@ -155,6 +68,7 @@ async function main(): Promise<void> { }); if (parsedArgs._.length != 1) { console.error(HELP_MSG); + console.log(parsedArgs._); exit(1); } if (parsedArgs.help) { diff --git a/std/io/bufio.ts b/std/io/bufio.ts index f721022f3..c0a52ac6a 100644 --- a/std/io/bufio.ts +++ b/std/io/bufio.ts @@ -508,3 +508,102 @@ export class BufWriter implements Writer { return nn; } } + +/** Generate longest proper prefix which is also suffix array. */ +function createLPS(pat: Uint8Array): Uint8Array { + const lps = new Uint8Array(pat.length); + lps[0] = 0; + let prefixEnd = 0; + let i = 1; + while (i < lps.length) { + if (pat[i] == pat[prefixEnd]) { + prefixEnd++; + lps[i] = prefixEnd; + i++; + } else if (prefixEnd === 0) { + lps[i] = 0; + i++; + } else { + prefixEnd = pat[prefixEnd - 1]; + } + } + return lps; +} + +/** Read delimited bytes from a Reader. */ +export async function* readDelim( + reader: Reader, + delim: Uint8Array +): AsyncIterableIterator<Uint8Array> { + // Avoid unicode problems + const delimLen = delim.length; + const delimLPS = createLPS(delim); + + let inputBuffer = new Deno.Buffer(); + const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1)); + + // Modified KMP + let inspectIndex = 0; + let matchIndex = 0; + while (true) { + const result = await reader.read(inspectArr); + if (result === Deno.EOF) { + // Yield last chunk. + yield inputBuffer.bytes(); + return; + } + if ((result as number) < 0) { + // Discard all remaining and silently fail. + return; + } + const sliceRead = inspectArr.subarray(0, result as number); + await Deno.writeAll(inputBuffer, sliceRead); + + let sliceToProcess = inputBuffer.bytes(); + while (inspectIndex < sliceToProcess.length) { + if (sliceToProcess[inspectIndex] === delim[matchIndex]) { + inspectIndex++; + matchIndex++; + if (matchIndex === delimLen) { + // Full match + const matchEnd = inspectIndex - delimLen; + const readyBytes = sliceToProcess.subarray(0, matchEnd); + // Copy + const pendingBytes = sliceToProcess.slice(inspectIndex); + yield readyBytes; + // Reset match, different from KMP. + sliceToProcess = pendingBytes; + inspectIndex = 0; + matchIndex = 0; + } + } else { + if (matchIndex === 0) { + inspectIndex++; + } else { + matchIndex = delimLPS[matchIndex - 1]; + } + } + } + // Keep inspectIndex and matchIndex. + inputBuffer = new Deno.Buffer(sliceToProcess); + } +} + +/** Read delimited strings from a Reader. */ +export async function* readStringDelim( + reader: Reader, + delim: string +): AsyncIterableIterator<string> { + const encoder = new TextEncoder(); + const decoder = new TextDecoder(); + for await (const chunk of readDelim(reader, encoder.encode(delim))) { + yield decoder.decode(chunk); + } +} + +/** Read strings line-by-line from a Reader. */ +export async function* readLines( + reader: Reader +): AsyncIterableIterator<string> { + yield* readStringDelim(reader, "\n"); +} diff --git a/std/io/bufio_test.ts b/std/io/bufio_test.ts index 665c25361..4f4bd48b1 100644 --- a/std/io/bufio_test.ts +++ b/std/io/bufio_test.ts @@ -15,7 +15,9 @@ import { BufReader, BufWriter, BufferFullError, - UnexpectedEOFError + UnexpectedEOFError, + readStringDelim, + readLines } from "./bufio.ts"; import * as iotest from "./iotest.ts"; import { charCode, copyBytes, stringsReader } from "./util.ts"; @@ -381,3 +383,28 @@ Deno.test(async function bufReaderReadFull(): Promise<void> { } } }); + +Deno.test(async function readStringDelimAndLines(): Promise<void> { + const enc = new TextEncoder(); + const data = new Buffer( + enc.encode("Hello World\tHello World 2\tHello World 3") + ); + const chunks_ = []; + + for await (const c of readStringDelim(data, "\t")) { + chunks_.push(c); + } + + assertEquals(chunks_.length, 3); + assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]); + + const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9")); + const lines_ = []; + + for await (const l of readLines(linesData)) { + lines_.push(l); + } + + assertEquals(lines_.length, 10); + assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]); +}); |