-
-
Notifications
You must be signed in to change notification settings - Fork 32.8k
Open
Description
Version
24.6.0
Platform
Linux robokoteg-StarBook 6.14.0-27-generic #27~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 22 17:38:49 UTC 2 x86_64 x86_64 x86_64 GNU/Linux
Subsystem
zlib
What steps will reproduce the bug?
Have a gzipped content that expands to more than 4GB as a Buffer.
Decompress it with Node's zlib module.
Find that the decompressed content is corrupted (does not match the original).
The original behavior was observed when using DecompressionStream API, but we managed to trace it to the core zlib module.
Complete repro script:
const { execSync } = require("child_process");
const { Buffer, constants } = require("node:buffer");
const { statSync } = require("node:fs");
const { open } = require("node:fs/promises");
const { gunzipSync } = require("node:zlib");
const chunkSize = 1_000_000;
// Needed to handle files >4GB
async function writeInChunks(filename, buf) {
const outputFile = await open(filename, "w");
let pos = 0;
do {
const length = Math.min(chunkSize, buf.length - pos);
const { bytesWritten } = await outputFile.write(buf, { offset: pos, length });
pos += bytesWritten;
} while (pos < buf.length);
await outputFile.close();
}
// Needed to handle files >4GB
async function readInChunks(filename) {
const buf = Buffer.alloc(statSync(filename).size);
const inputFile = await open(filename);
let pos = 0;
do {
const length = Math.min(chunkSize, buf.length - pos);
const { bytesRead } = await inputFile.read(buf, pos, length);
pos += bytesRead;
} while (pos < buf.length);
await inputFile.close();
return buf;
}
async function main() {
const fnOriginal = "/tmp/random_5bn_bytes.bin";
const fnEncoded = "/tmp/random_5bn_bytes.gz";
const fnDecoded = "/tmp/random_5bn_bytes_decoded.bin";
console.log("create a dummy file");
// Note: decompression works correctly with 3GB files but fails with 5GB files
const originalBuf = Buffer.alloc(5_000_000_000, "foo");
await writeInChunks(fnOriginal, originalBuf);
console.log("gzip via system tool");
execSync(`cat ${fnOriginal} | gzip > ${fnEncoded}`);
console.log("read gzipped");
const encodedBuf = await readInChunks(fnEncoded);
console.log("gunzip via node");
const out = gunzipSync(encodedBuf);
await writeInChunks(fnDecoded, out);
console.log("verify");
console.log(execSync(`stat ${fnOriginal}`).toString());
console.log(execSync(`stat ${fnDecoded}`).toString());
console.log(execSync(`cat ${fnOriginal} | md5sum`).toString());
console.log(execSync(`cat ${fnDecoded} | md5sum`).toString());
}
main()
.then(() => console.log("done"))
.catch((e) => console.error(e));
How often does it reproduce? Is there a required condition?
Only for content >4GB.
What is the expected behavior? Why is that the expected behavior?
The original content decompresses correctly.
What do you see instead?
The size of decompressed buffer is correct, but the content is corrupted (does not match the original).
Additional information
No response
dwb and jpmnteiro
Metadata
Metadata
Assignees
Labels
No labels