summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos Casagrande <marcoscvp90@gmail.com>2020-06-10 23:10:47 +0200
committerGitHub <noreply@github.com>2020-06-10 17:10:47 -0400
commitfa6f10219e096e0c2765628ae3d5bc733e489adb (patch)
tree680a758535b64e2bd1815e6209069700b01f980f
parent408edbb065cb0a7b375d83df5c7f61f62b4d47e6 (diff)
fix(std/archive): untar (#6217)
- Fix `Untar` for tarballs with multiple files (Currently throwing `"checksum error"`) - Fix parsing, now all `ustar` versions should be supported. Linux `tar -cvf deno.tar dir/` is not working on master. - Added `asyncIterator` - Added README.md
-rw-r--r--std/archive/README.md60
-rw-r--r--std/archive/tar.ts263
-rw-r--r--std/archive/tar_test.ts321
-rw-r--r--std/archive/testdata/deno.tarbin0 -> 10240 bytes
4 files changed, 583 insertions, 61 deletions
diff --git a/std/archive/README.md b/std/archive/README.md
new file mode 100644
index 000000000..efa258ca4
--- /dev/null
+++ b/std/archive/README.md
@@ -0,0 +1,60 @@
+# Usage
+
+## Tar
+
+```ts
+import { Tar } from "https://deno.land/std/archive/tar.ts";
+
+const tar = new Tar();
+const content = new TextEncoder().encode("Deno.land");
+await tar.append("deno.txt", {
+ reader: new Deno.Buffer(content),
+ contentSize: content.byteLength,
+});
+
+// Or specifying a filePath
+await tar.append("land.txt", {
+ filePath: "./land.txt",
+});
+
+// use tar.getReader() to read the contents
+
+const writer = await Deno.open("./out.tar", { write: true, create: true });
+await Deno.copy(tar.getReader(), writer);
+writer.close();
+```
+
+## Untar
+
+```ts
+import { Untar } from "https://deno.land/std/archive/tar.ts";
+import { ensureFile } from "https://deno.land/std/fs/ensure_file.ts";
+import { ensureDir } from "https://deno.land/std/fs/ensure_dir.ts";
+
+const reader = await Deno.open("./out.tar", { read: true });
+const untar = new Untar(reader);
+
+for await (const entry of untar) {
+ console.log(entry); // metadata
+ /*
+ fileName: "archive/deno.txt",
+ fileMode: 33204,
+ mtime: 1591657305,
+ uid: 0,
+ gid: 0,
+ size: 24400,
+ type: 'file'
+ */
+
+ if (entry.type === "directory") {
+ await ensureDir(entry.fileName);
+ continue;
+ }
+
+ await ensureFile(entry.fileName);
+ const file = await Deno.open(entry.fileName, { write: true });
+ // <entry> is a reader
+ await Deno.copy(entry, file);
+}
+reader.close();
+```
diff --git a/std/archive/tar.ts b/std/archive/tar.ts
index d549a4623..8ec240764 100644
--- a/std/archive/tar.ts
+++ b/std/archive/tar.ts
@@ -27,16 +27,42 @@
* THE SOFTWARE.
*/
import { MultiReader } from "../io/readers.ts";
-import { BufReader } from "../io/bufio.ts";
+import { PartialReadError } from "../io/bufio.ts";
import { assert } from "../_util/assert.ts";
+type Reader = Deno.Reader;
+type Seeker = Deno.Seeker;
+
const recordSize = 512;
const ustar = "ustar\u000000";
+// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
+// eight checksum bytes taken to be ascii spaces (decimal value 32)
+const initialChecksum = 8 * 32;
+
+async function readBlock(
+ reader: Deno.Reader,
+ p: Uint8Array
+): Promise<number | null> {
+ let bytesRead = 0;
+ while (bytesRead < p.length) {
+ const rr = await reader.read(p.subarray(bytesRead));
+ if (rr === null) {
+ if (bytesRead === 0) {
+ return null;
+ } else {
+ throw new PartialReadError();
+ }
+ }
+ bytesRead += rr;
+ }
+ return bytesRead;
+}
+
/**
* Simple file reader
*/
-class FileReader implements Deno.Reader {
+class FileReader implements Reader {
private file?: Deno.File;
constructor(private filePath: string) {}
@@ -79,24 +105,34 @@ function pad(num: number, bytes: number, base?: number): string {
return "000000000000".substr(numString.length + 12 - bytes) + numString;
}
+const types: { [key: string]: string } = {
+ "": "file",
+ "0": "file",
+ "1": "link",
+ "2": "symlink",
+ "3": "character-device",
+ "4": "block-device",
+ "5": "directory",
+};
+
/*
struct posix_header { // byte offset
- char name[100]; // 0
- char mode[8]; // 100
- char uid[8]; // 108
- char gid[8]; // 116
- char size[12]; // 124
- char mtime[12]; // 136
- char chksum[8]; // 148
- char typeflag; // 156
- char linkname[100]; // 157
- char magic[6]; // 257
- char version[2]; // 263
- char uname[32]; // 265
- char gname[32]; // 297
- char devmajor[8]; // 329
- char devminor[8]; // 337
- char prefix[155]; // 345
+ char name[100]; // 0
+ char mode[8]; // 100
+ char uid[8]; // 108
+ char gid[8]; // 116
+ char size[12]; // 124
+ char mtime[12]; // 136
+ char chksum[8]; // 148
+ char typeflag; // 156
+ char linkname[100]; // 157
+ char magic[6]; // 257
+ char version[2]; // 263
+ char uname[32]; // 265
+ char gname[32]; // 297
+ char devmajor[8]; // 329
+ char devminor[8]; // 337
+ char prefix[155]; // 345
// 500
};
*/
@@ -198,6 +234,10 @@ function parseHeader(buffer: Uint8Array): { [key: string]: Uint8Array } {
return data;
}
+interface TarHeader {
+ [key: string]: Uint8Array;
+}
+
export interface TarData {
fileName?: string;
fileNamePrefix?: string;
@@ -221,7 +261,7 @@ export interface TarDataWithSource extends TarData {
/**
* buffer to read
*/
- reader?: Deno.Reader;
+ reader?: Reader;
}
export interface TarInfo {
@@ -231,6 +271,7 @@ export interface TarInfo {
gid?: number;
owner?: string;
group?: string;
+ type?: string;
}
export interface TarOptions extends TarInfo {
@@ -242,7 +283,7 @@ export interface TarOptions extends TarInfo {
/**
* append any arbitrary content
*/
- reader?: Deno.Reader;
+ reader?: Reader;
/**
* size of the content to be appended
@@ -250,10 +291,14 @@ export interface TarOptions extends TarInfo {
contentSize?: number;
}
-export interface UntarOptions extends TarInfo {
+export interface TarMeta extends TarInfo {
fileName: string;
+ fileSize?: number;
}
+// eslint-disable-next-line @typescript-eslint/no-empty-interface
+interface TarEntry extends TarMeta {}
+
/**
* A class to create a tar archive
*/
@@ -364,8 +409,8 @@ export class Tar {
/**
* Get a Reader instance for this tar data
*/
- getReader(): Deno.Reader {
- const readers: Deno.Reader[] = [];
+ getReader(): Reader {
+ const readers: Reader[] = [];
this.data.forEach((tarData): void => {
let { reader } = tarData;
const { filePath } = tarData;
@@ -395,44 +440,132 @@ export class Tar {
}
}
+class TarEntry implements Reader {
+ #header: TarHeader;
+ #reader: Reader | (Reader & Deno.Seeker);
+ #size: number;
+ #read = 0;
+ #consumed = false;
+ #entrySize: number;
+ constructor(
+ meta: TarMeta,
+ header: TarHeader,
+ reader: Reader | (Reader & Deno.Seeker)
+ ) {
+ Object.assign(this, meta);
+ this.#header = header;
+ this.#reader = reader;
+
+ // File Size
+ this.#size = this.fileSize || 0;
+ // Entry Size
+ const blocks = Math.ceil(this.#size / recordSize);
+ this.#entrySize = blocks * recordSize;
+ }
+
+ get consumed(): boolean {
+ return this.#consumed;
+ }
+
+ async read(p: Uint8Array): Promise<number | null> {
+ // Bytes left for entry
+ const entryBytesLeft = this.#entrySize - this.#read;
+ const bufSize = Math.min(
+ // bufSize can't be greater than p.length nor bytes left in the entry
+ p.length,
+ entryBytesLeft
+ );
+
+ if (entryBytesLeft <= 0) return null;
+
+ const block = new Uint8Array(bufSize);
+ const n = await readBlock(this.#reader, block);
+ const bytesLeft = this.#size - this.#read;
+
+ this.#read += n || 0;
+ if (n === null || bytesLeft <= 0) {
+ if (null) this.#consumed = true;
+ return null;
+ }
+
+ // Remove zero filled
+ const offset = bytesLeft < n ? bytesLeft : n;
+ p.set(block.subarray(0, offset), 0);
+
+ return offset < 0 ? n - Math.abs(offset) : offset;
+ }
+
+ async discard(): Promise<void> {
+ // Discard current entry
+ if (this.#consumed) return;
+ this.#consumed = true;
+
+ if (typeof (this.#reader as Seeker).seek === "function") {
+ await (this.#reader as Seeker).seek(
+ this.#entrySize - this.#read,
+ Deno.SeekMode.Current
+ );
+ this.#read = this.#entrySize;
+ } else {
+ await Deno.readAll(this);
+ }
+ }
+}
+
/**
- * A class to create a tar archive
+ * A class to extract a tar archive
*/
export class Untar {
- reader: BufReader;
+ reader: Reader;
block: Uint8Array;
+ #entry: TarEntry | undefined;
- constructor(reader: Deno.Reader) {
- this.reader = new BufReader(reader);
+ constructor(reader: Reader) {
+ this.reader = reader;
this.block = new Uint8Array(recordSize);
}
- async extract(writer: Deno.Writer): Promise<UntarOptions> {
- await this.reader.readFull(this.block);
+ #checksum = (header: Uint8Array): number => {
+ let sum = initialChecksum;
+ for (let i = 0; i < 512; i++) {
+ if (i >= 148 && i < 156) {
+ // Ignore checksum header
+ continue;
+ }
+ sum += header[i];
+ }
+ return sum;
+ };
+
+ #getHeader = async (): Promise<TarHeader | null> => {
+ await readBlock(this.reader, this.block);
const header = parseHeader(this.block);
// calculate the checksum
- let checksum = 0;
- const encoder = new TextEncoder(),
- decoder = new TextDecoder("ascii");
- Object.keys(header)
- .filter((key): boolean => key !== "checksum")
- .forEach(function (key): void {
- checksum += header[key].reduce((p, c): number => p + c, 0);
- });
- checksum += encoder.encode(" ").reduce((p, c): number => p + c, 0);
+ const decoder = new TextDecoder();
+ const checksum = this.#checksum(this.block);
if (parseInt(decoder.decode(header.checksum), 8) !== checksum) {
+ if (checksum === initialChecksum) {
+ // EOF
+ return null;
+ }
throw new Error("checksum error");
}
const magic = decoder.decode(header.ustar);
- if (magic !== ustar) {
+
+ if (magic.indexOf("ustar")) {
throw new Error(`unsupported archive format: ${magic}`);
}
+ return header;
+ };
+
+ #getMetadata = (header: TarHeader): TarMeta => {
+ const decoder = new TextDecoder();
// get meta data
- const meta: UntarOptions = {
+ const meta: TarMeta = {
fileName: decoder.decode(trim(header.fileName)),
};
const fileNamePrefix = trim(header.fileNamePrefix);
@@ -450,23 +583,45 @@ export class Untar {
meta[key] = parseInt(decoder.decode(arr), 8);
}
});
- (["owner", "group"] as ["owner", "group"]).forEach((key): void => {
- const arr = trim(header[key]);
- if (arr.byteLength > 0) {
- meta[key] = decoder.decode(arr);
+ (["owner", "group", "type"] as ["owner", "group", "type"]).forEach(
+ (key): void => {
+ const arr = trim(header[key]);
+ if (arr.byteLength > 0) {
+ meta[key] = decoder.decode(arr);
+ }
}
- });
+ );
- // read the file content
- const len = parseInt(decoder.decode(header.fileSize), 8);
- let rest = len;
- while (rest > 0) {
- await this.reader.readFull(this.block);
- const arr = rest < recordSize ? this.block.subarray(0, rest) : this.block;
- await Deno.copy(new Deno.Buffer(arr), writer);
- rest -= recordSize;
- }
+ meta.fileSize = parseInt(decoder.decode(header.fileSize), 8);
+ meta.type = types[meta.type as string] || meta.type;
return meta;
+ };
+
+ async extract(): Promise<TarEntry | null> {
+ if (this.#entry && !this.#entry.consumed) {
+ // If entry body was not read, discard the body
+ // so we can read the next entry.
+ await this.#entry.discard();
+ }
+
+ const header = await this.#getHeader();
+ if (header === null) return null;
+
+ const meta = this.#getMetadata(header);
+
+ this.#entry = new TarEntry(meta, header, this.reader);
+
+ return this.#entry;
+ }
+
+ async *[Symbol.asyncIterator](): AsyncIterableIterator<TarEntry> {
+ while (true) {
+ const entry = await this.extract();
+
+ if (entry === null) return;
+
+ yield entry;
+ }
}
}
diff --git a/std/archive/tar_test.ts b/std/archive/tar_test.ts
index 0df9956f2..69f1ec3f3 100644
--- a/std/archive/tar_test.ts
+++ b/std/archive/tar_test.ts
@@ -8,13 +8,40 @@
* **to run this test**
* deno run --allow-read archive/tar_test.ts
*/
-import { assertEquals } from "../testing/asserts.ts";
+import { assertEquals, assert } from "../testing/asserts.ts";
import { resolve } from "../path/mod.ts";
import { Tar, Untar } from "./tar.ts";
const filePath = resolve("archive", "testdata", "example.txt");
+interface TestEntry {
+ name: string;
+ content?: Uint8Array;
+ filePath?: string;
+}
+
+async function createTar(entries: TestEntry[]): Promise<Tar> {
+ const tar = new Tar();
+ // put data on memory
+ for (const file of entries) {
+ let options;
+
+ if (file.content) {
+ options = {
+ reader: new Deno.Buffer(file.content),
+ contentSize: file.content.byteLength,
+ };
+ } else {
+ options = { filePath: file.filePath };
+ }
+
+ await tar.append(file.name, options);
+ }
+
+ return tar;
+}
+
Deno.test("createTarArchive", async function (): Promise<void> {
// initialize
const tar = new Tar();
@@ -54,10 +81,11 @@ Deno.test("deflateTarArchive", async function (): Promise<void> {
// read data from a tar archive
const untar = new Untar(tar.getReader());
- const buf = new Deno.Buffer();
- const result = await untar.extract(buf);
- const untarText = new TextDecoder("utf-8").decode(buf.bytes());
+ const result = await untar.extract();
+ assert(result !== null);
+ const untarText = new TextDecoder("utf-8").decode(await Deno.readAll(result));
+ assertEquals(await untar.extract(), null); // EOF
// tests
assertEquals(result.fileName, fileName);
assertEquals(untarText, text);
@@ -80,11 +108,290 @@ Deno.test("appendFileWithLongNameToTarArchive", async function (): Promise<
// read data from a tar archive
const untar = new Untar(tar.getReader());
- const buf = new Deno.Buffer();
- const result = await untar.extract(buf);
- const untarText = new TextDecoder("utf-8").decode(buf.bytes());
+ const result = await untar.extract();
+ assert(result !== null);
+ const untarText = new TextDecoder("utf-8").decode(await Deno.readAll(result));
// tests
assertEquals(result.fileName, fileName);
assertEquals(untarText, text);
});
+
+Deno.test("untarAsyncIterator", async function (): Promise<void> {
+ const entries: TestEntry[] = [
+ {
+ name: "output.txt",
+ content: new TextEncoder().encode("hello tar world!"),
+ },
+ {
+ name: "dir/tar.ts",
+ filePath,
+ },
+ ];
+
+ const tar = await createTar(entries);
+
+ // read data from a tar archive
+ const untar = new Untar(tar.getReader());
+
+ for await (const entry of untar) {
+ const expected = entries.shift();
+ assert(expected);
+
+ let content = expected.content;
+ if (expected.filePath) {
+ content = await Deno.readFile(expected.filePath);
+ }
+
+ assertEquals(content, await Deno.readAll(entry));
+ assertEquals(expected.name, entry.fileName);
+ }
+
+ assertEquals(entries.length, 0);
+});
+
+Deno.test("untarAsyncIteratorWithoutReadingBody", async function (): Promise<
+ void
+> {
+ const entries: TestEntry[] = [
+ {
+ name: "output.txt",
+ content: new TextEncoder().encode("hello tar world!"),
+ },
+ {
+ name: "dir/tar.ts",
+ filePath,
+ },
+ ];
+
+ const tar = await createTar(entries);
+
+ // read data from a tar archive
+ const untar = new Untar(tar.getReader());
+
+ for await (const entry of untar) {
+ const expected = entries.shift();
+ assert(expected);
+ assertEquals(expected.name, entry.fileName);
+ }
+
+ assertEquals(entries.length, 0);
+});
+
+Deno.test(
+ "untarAsyncIteratorWithoutReadingBodyFromFileReader",
+ async function (): Promise<void> {
+ const entries: TestEntry[] = [
+ {
+ name: "output.txt",
+ content: new TextEncoder().encode("hello tar world!"),
+ },
+ {
+ name: "dir/tar.ts",
+ filePath,
+ },
+ ];
+
+ const outputFile = resolve("archive", "testdata", "test.tar");
+
+ const tar = await createTar(entries);
+ const file = await Deno.open(outputFile, { create: true, write: true });
+ await Deno.copy(tar.getReader(), file);
+ file.close();
+
+ const reader = await Deno.open(outputFile, { read: true });
+ // read data from a tar archive
+ const untar = new Untar(reader);
+
+ for await (const entry of untar) {
+ const expected = entries.shift();
+ assert(expected);
+ assertEquals(expected.name, entry.fileName);
+ }
+
+ reader.close();
+ await Deno.remove(outputFile);
+ assertEquals(entries.length, 0);
+ }
+);
+
+Deno.test("untarAsyncIteratorFromFileReader", async function (): Promise<void> {
+ const entries: TestEntry[] = [
+ {
+ name: "output.txt",
+ content: new TextEncoder().encode("hello tar world!"),
+ },
+ {
+ name: "dir/tar.ts",
+ filePath,
+ },
+ ];
+
+ const outputFile = resolve("archive", "testdata", "test.tar");
+
+ const tar = await createTar(entries);
+ const file = await Deno.open(outputFile, { create: true, write: true });
+ await Deno.copy(tar.getReader(), file);
+ file.close();
+
+ const reader = await Deno.open(outputFile, { read: true });
+ // read data from a tar archive
+ const untar = new Untar(reader);
+
+ for await (const entry of untar) {
+ const expected = entries.shift();
+ assert(expected);
+
+ let content = expected.content;
+ if (expected.filePath) {
+ content = await Deno.readFile(expected.filePath);
+ }
+
+ assertEquals(content, await Deno.readAll(entry));
+ assertEquals(expected.name, entry.fileName);
+ }
+
+ reader.close();
+ await Deno.remove(outputFile);
+ assertEquals(entries.length, 0);
+});
+
+Deno.test(
+ "untarAsyncIteratorReadingLessThanRecordSize",
+ async function (): Promise<void> {
+ // record size is 512
+ const bufSizes = [1, 53, 256, 511];
+
+ for (const bufSize of bufSizes) {
+ const entries: TestEntry[] = [
+ {
+ name: "output.txt",
+ content: new TextEncoder().encode("hello tar world!".repeat(100)),
+ },
+ // Need to test at least two files, to make sure the first entry doesn't over-read
+ // Causing the next to fail with: chesum error
+ {
+ name: "deni.txt",
+ content: new TextEncoder().encode("deno!".repeat(250)),
+ },
+ ];
+
+ const tar = await createTar(entries);
+
+ // read data from a tar archive
+ const untar = new Untar(tar.getReader());
+
+ for await (const entry of untar) {
+ const expected = entries.shift();
+ assert(expected);
+ assertEquals(expected.name, entry.fileName);
+
+ const writer = new Deno.Buffer();
+ while (true) {
+ const buf = new Uint8Array(bufSize);
+ const n = await entry.read(buf);
+ if (n === null) break;
+
+ await writer.write(buf.subarray(0, n));
+ }
+ assertEquals(writer.bytes(), expected!.content);
+ }
+
+ assertEquals(entries.length, 0);
+ }
+ }
+);
+
+Deno.test("untarLinuxGeneratedTar", async function (): Promise<void> {
+ const filePath = resolve("archive", "testdata", "deno.tar");
+ const file = await Deno.open(filePath, { read: true });
+
+ const expectedEntries = [
+ {
+ fileName: "archive/",
+ fileSize: 0,
+ fileMode: 509,
+ mtime: 1591800767,
+ uid: 1001,
+ gid: 1001,
+ owner: "deno",
+ group: "deno",
+ type: "directory",
+ },
+ {
+ fileName: "archive/deno/",
+ fileSize: 0,
+ fileMode: 509,
+ mtime: 1591799635,
+ uid: 1001,
+ gid: 1001,
+ owner: "deno",
+ group: "deno",
+ type: "directory",
+ },
+ {
+ fileName: "archive/deno/land/",
+ fileSize: 0,
+ fileMode: 509,
+ mtime: 1591799660,
+ uid: 1001,
+ gid: 1001,
+ owner: "deno",
+ group: "deno",
+ type: "directory",
+ },
+ {
+ fileName: "archive/deno/land/land.txt",
+ fileMode: 436,
+ fileSize: 5,
+ mtime: 1591799660,
+ uid: 1001,
+ gid: 1001,
+ owner: "deno",
+ group: "deno",
+ type: "file",
+ content: new TextEncoder().encode("land\n"),
+ },
+ {
+ fileName: "archive/file.txt",
+ fileMode: 436,
+ fileSize: 5,
+ mtime: 1591799626,
+ uid: 1001,
+ gid: 1001,
+ owner: "deno",
+ group: "deno",
+ type: "file",
+ content: new TextEncoder().encode("file\n"),
+ },
+ {
+ fileName: "archive/deno.txt",
+ fileMode: 436,
+ fileSize: 5,
+ mtime: 1591799642,
+ uid: 1001,
+ gid: 1001,
+ owner: "deno",
+ group: "deno",
+ type: "file",
+ content: new TextEncoder().encode("deno\n"),
+ },
+ ];
+
+ const untar = new Untar(file);
+
+ for await (const entry of untar) {
+ const expected = expectedEntries.shift();
+ assert(expected);
+ const content = expected.content;
+ delete expected.content;
+
+ assertEquals(entry, expected);
+
+ if (content) {
+ assertEquals(content, await Deno.readAll(entry));
+ }
+ }
+
+ file.close();
+});
diff --git a/std/archive/testdata/deno.tar b/std/archive/testdata/deno.tar
new file mode 100644
index 000000000..300ce003b
--- /dev/null
+++ b/std/archive/testdata/deno.tar
Binary files differ