Skip to content

Commit 5012c66

Browse files
authored
perf: use memmap to speed up file reading (#696)
1 parent 49cc7d0 commit 5012c66

File tree

3 files changed

+67
-6
lines changed

3 files changed

+67
-6
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ pnp = { version = "0.12.2", optional = true }
9494

9595
document-features = { version = "0.2.11", optional = true }
9696

97+
[target.'cfg(not(any(target_family = "wasm", target_os = "wasi")))'.dependencies]
98+
memmap2 = "0.9"
99+
97100
[target.'cfg(target_os = "windows")'.dependencies]
98101
windows = { version = "0.62.0", features = ["Win32_Storage_FileSystem"] }
99102

src/file_system.rs

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use std::{
44
};
55

66
use cfg_if::cfg_if;
7+
#[cfg(not(any(target_family = "wasm", target_os = "wasi")))]
8+
use memmap2::Mmap;
79
#[cfg(feature = "yarn_pnp")]
810
use pnp::fs::{LruZipCache, VPath, VPathInfo, ZipCache};
911

@@ -124,21 +126,67 @@ pub struct FileSystemOs {
124126
}
125127

126128
impl FileSystemOs {
129+
/// Memory-mapped file reading threshold in bytes
130+
#[cfg(not(any(target_family = "wasm", target_os = "wasi")))]
131+
const MMAP_THRESHOLD: u64 = 4096;
132+
133+
/// Validates UTF-8 encoding and converts bytes to String
134+
///
127135
/// # Errors
128136
///
129-
/// See [std::fs::read_to_string]
130-
pub fn read_to_string(path: &Path) -> io::Result<String> {
131-
// `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally
132-
let bytes = std::fs::read(path)?;
133-
if simdutf8::basic::from_utf8(&bytes).is_err() {
137+
/// Returns an error if the bytes are not valid UTF-8
138+
fn validate_and_convert_utf8(bytes: &[u8]) -> io::Result<String> {
139+
if simdutf8::basic::from_utf8(bytes).is_err() {
134140
// Same error as `fs::read_to_string` produces (`io::Error::INVALID_UTF8`)
135141
return Err(io::Error::new(
136142
io::ErrorKind::InvalidData,
137143
"stream did not contain valid UTF-8",
138144
));
139145
}
140146
// SAFETY: `simdutf8` has ensured it's a valid UTF-8 string
141-
Ok(unsafe { String::from_utf8_unchecked(bytes) })
147+
Ok(unsafe { std::str::from_utf8_unchecked(bytes) }.to_string())
148+
}
149+
150+
/// # Errors
151+
///
152+
/// See [std::fs::read_to_string]
153+
pub fn read_to_string(path: &Path) -> io::Result<String> {
154+
#[cfg(not(any(target_family = "wasm", target_os = "wasi")))]
155+
{
156+
let file = std::fs::File::open(path)?;
157+
let metadata = file.metadata()?;
158+
159+
// Use memory mapping for files >= 4KB, standard read for smaller files
160+
if metadata.len() >= Self::MMAP_THRESHOLD {
161+
return Self::read_to_string_mmap(&file);
162+
}
163+
}
164+
Self::read_to_string_standard(path)
165+
}
166+
167+
/// Standard file reading implementation using std::fs::read
168+
///
169+
/// # Errors
170+
///
171+
/// See [std::fs::read_to_string]
172+
pub fn read_to_string_standard(path: &Path) -> io::Result<String> {
173+
// `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally
174+
let bytes = std::fs::read(path)?;
175+
Self::validate_and_convert_utf8(&bytes)
176+
}
177+
178+
/// Memory-mapped file reading implementation
179+
///
180+
/// # Errors
181+
///
182+
/// See [std::fs::read_to_string] and [memmap2::Mmap::map]
183+
#[cfg(not(any(target_family = "wasm", target_os = "wasi")))]
184+
fn read_to_string_mmap(file: &std::fs::File) -> io::Result<String> {
185+
// SAFETY: memmap2::Mmap::map requires that the file remains valid and unmutated
186+
// for the lifetime of the mmap. Since we're doing read-only access and the file
187+
// won't be modified during this function's execution, this is safe.
188+
let mmap = unsafe { Mmap::map(file)? };
189+
Self::validate_and_convert_utf8(&mmap[..])
142190
}
143191

144192
/// # Errors

0 commit comments

Comments
 (0)