summaryrefslogtreecommitdiff
path: root/std/io
diff options
context:
space:
mode:
Diffstat (limited to 'std/io')
-rw-r--r--std/io/bufio.ts99
-rw-r--r--std/io/bufio_test.ts29
2 files changed, 127 insertions, 1 deletions
diff --git a/std/io/bufio.ts b/std/io/bufio.ts
index f721022f3..c0a52ac6a 100644
--- a/std/io/bufio.ts
+++ b/std/io/bufio.ts
@@ -508,3 +508,102 @@ export class BufWriter implements Writer {
return nn;
}
}
+
+/** Generate longest proper prefix which is also suffix array. */
+function createLPS(pat: Uint8Array): Uint8Array {
+ const lps = new Uint8Array(pat.length);
+ lps[0] = 0;
+ let prefixEnd = 0;
+ let i = 1;
+ while (i < lps.length) {
+ if (pat[i] == pat[prefixEnd]) {
+ prefixEnd++;
+ lps[i] = prefixEnd;
+ i++;
+ } else if (prefixEnd === 0) {
+ lps[i] = 0;
+ i++;
+ } else {
+ prefixEnd = pat[prefixEnd - 1];
+ }
+ }
+ return lps;
+}
+
+/** Read delimited bytes from a Reader. */
+export async function* readDelim(
+ reader: Reader,
+ delim: Uint8Array
+): AsyncIterableIterator<Uint8Array> {
+ // Avoid unicode problems
+ const delimLen = delim.length;
+ const delimLPS = createLPS(delim);
+
+ let inputBuffer = new Deno.Buffer();
+ const inspectArr = new Uint8Array(Math.max(1024, delimLen + 1));
+
+ // Modified KMP
+ let inspectIndex = 0;
+ let matchIndex = 0;
+ while (true) {
+ const result = await reader.read(inspectArr);
+ if (result === Deno.EOF) {
+ // Yield last chunk.
+ yield inputBuffer.bytes();
+ return;
+ }
+ if ((result as number) < 0) {
+ // Discard all remaining and silently fail.
+ return;
+ }
+ const sliceRead = inspectArr.subarray(0, result as number);
+ await Deno.writeAll(inputBuffer, sliceRead);
+
+ let sliceToProcess = inputBuffer.bytes();
+ while (inspectIndex < sliceToProcess.length) {
+ if (sliceToProcess[inspectIndex] === delim[matchIndex]) {
+ inspectIndex++;
+ matchIndex++;
+ if (matchIndex === delimLen) {
+ // Full match
+ const matchEnd = inspectIndex - delimLen;
+ const readyBytes = sliceToProcess.subarray(0, matchEnd);
+ // Copy
+ const pendingBytes = sliceToProcess.slice(inspectIndex);
+ yield readyBytes;
+ // Reset match, different from KMP.
+ sliceToProcess = pendingBytes;
+ inspectIndex = 0;
+ matchIndex = 0;
+ }
+ } else {
+ if (matchIndex === 0) {
+ inspectIndex++;
+ } else {
+ matchIndex = delimLPS[matchIndex - 1];
+ }
+ }
+ }
+ // Keep inspectIndex and matchIndex.
+ inputBuffer = new Deno.Buffer(sliceToProcess);
+ }
+}
+
+/** Read delimited strings from a Reader. */
+export async function* readStringDelim(
+ reader: Reader,
+ delim: string
+): AsyncIterableIterator<string> {
+ const encoder = new TextEncoder();
+ const decoder = new TextDecoder();
+ for await (const chunk of readDelim(reader, encoder.encode(delim))) {
+ yield decoder.decode(chunk);
+ }
+}
+
+/** Read strings line-by-line from a Reader. */
+export async function* readLines(
+ reader: Reader
+): AsyncIterableIterator<string> {
+ yield* readStringDelim(reader, "\n");
+}
diff --git a/std/io/bufio_test.ts b/std/io/bufio_test.ts
index 665c25361..4f4bd48b1 100644
--- a/std/io/bufio_test.ts
+++ b/std/io/bufio_test.ts
@@ -15,7 +15,9 @@ import {
BufReader,
BufWriter,
BufferFullError,
- UnexpectedEOFError
+ UnexpectedEOFError,
+ readStringDelim,
+ readLines
} from "./bufio.ts";
import * as iotest from "./iotest.ts";
import { charCode, copyBytes, stringsReader } from "./util.ts";
@@ -381,3 +383,28 @@ Deno.test(async function bufReaderReadFull(): Promise<void> {
}
}
});
+
+Deno.test(async function readStringDelimAndLines(): Promise<void> {
+ const enc = new TextEncoder();
+ const data = new Buffer(
+ enc.encode("Hello World\tHello World 2\tHello World 3")
+ );
+ const chunks_ = [];
+
+ for await (const c of readStringDelim(data, "\t")) {
+ chunks_.push(c);
+ }
+
+ assertEquals(chunks_.length, 3);
+ assertEquals(chunks_, ["Hello World", "Hello World 2", "Hello World 3"]);
+
+ const linesData = new Buffer(enc.encode("0\n1\n2\n3\n4\n5\n6\n7\n8\n9"));
+ const lines_ = [];
+
+ for await (const l of readLines(linesData)) {
+ lines_.push(l);
+ }
+
+ assertEquals(lines_.length, 10);
+ assertEquals(lines_, ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
+});