@@ -4,6 +4,8 @@ use std::{
4
4
} ;
5
5
6
6
use cfg_if:: cfg_if;
7
+ #[ cfg( not( any( target_family = "wasm" , target_os = "wasi" ) ) ) ]
8
+ use memmap2:: Mmap ;
7
9
#[ cfg( feature = "yarn_pnp" ) ]
8
10
use pnp:: fs:: { LruZipCache , VPath , VPathInfo , ZipCache } ;
9
11
@@ -124,21 +126,67 @@ pub struct FileSystemOs {
124
126
}
125
127
126
128
impl FileSystemOs {
129
+ /// Memory-mapped file reading threshold in bytes
130
+ #[ cfg( not( any( target_family = "wasm" , target_os = "wasi" ) ) ) ]
131
+ const MMAP_THRESHOLD : u64 = 4096 ;
132
+
133
+ /// Validates UTF-8 encoding and converts bytes to String
134
+ ///
127
135
/// # Errors
128
136
///
129
- /// See [std::fs::read_to_string]
130
- pub fn read_to_string ( path : & Path ) -> io:: Result < String > {
131
- // `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally
132
- let bytes = std:: fs:: read ( path) ?;
133
- if simdutf8:: basic:: from_utf8 ( & bytes) . is_err ( ) {
137
+ /// Returns an error if the bytes are not valid UTF-8
138
+ fn validate_and_convert_utf8 ( bytes : & [ u8 ] ) -> io:: Result < String > {
139
+ if simdutf8:: basic:: from_utf8 ( bytes) . is_err ( ) {
134
140
// Same error as `fs::read_to_string` produces (`io::Error::INVALID_UTF8`)
135
141
return Err ( io:: Error :: new (
136
142
io:: ErrorKind :: InvalidData ,
137
143
"stream did not contain valid UTF-8" ,
138
144
) ) ;
139
145
}
140
146
// SAFETY: `simdutf8` has ensured it's a valid UTF-8 string
141
- Ok ( unsafe { String :: from_utf8_unchecked ( bytes) } )
147
+ Ok ( unsafe { std:: str:: from_utf8_unchecked ( bytes) } . to_string ( ) )
148
+ }
149
+
150
+ /// # Errors
151
+ ///
152
+ /// See [std::fs::read_to_string]
153
+ pub fn read_to_string ( path : & Path ) -> io:: Result < String > {
154
+ #[ cfg( not( any( target_family = "wasm" , target_os = "wasi" ) ) ) ]
155
+ {
156
+ let file = std:: fs:: File :: open ( path) ?;
157
+ let metadata = file. metadata ( ) ?;
158
+
159
+ // Use memory mapping for files >= 4KB, standard read for smaller files
160
+ if metadata. len ( ) >= Self :: MMAP_THRESHOLD {
161
+ return Self :: read_to_string_mmap ( & file) ;
162
+ }
163
+ }
164
+ Self :: read_to_string_standard ( path)
165
+ }
166
+
167
+ /// Standard file reading implementation using std::fs::read
168
+ ///
169
+ /// # Errors
170
+ ///
171
+ /// See [std::fs::read_to_string]
172
+ pub fn read_to_string_standard ( path : & Path ) -> io:: Result < String > {
173
+ // `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally
174
+ let bytes = std:: fs:: read ( path) ?;
175
+ Self :: validate_and_convert_utf8 ( & bytes)
176
+ }
177
+
178
+ /// Memory-mapped file reading implementation
179
+ ///
180
+ /// # Errors
181
+ ///
182
+ /// See [std::fs::read_to_string] and [memmap2::Mmap::map]
183
+ #[ cfg( not( any( target_family = "wasm" , target_os = "wasi" ) ) ) ]
184
+ fn read_to_string_mmap ( file : & std:: fs:: File ) -> io:: Result < String > {
185
+ // SAFETY: memmap2::Mmap::map requires that the file remains valid and unmutated
186
+ // for the lifetime of the mmap. Since we're doing read-only access and the file
187
+ // won't be modified during this function's execution, this is safe.
188
+ let mmap = unsafe { Mmap :: map ( file) ? } ;
189
+ Self :: validate_and_convert_utf8 ( & mmap[ ..] )
142
190
}
143
191
144
192
/// # Errors
0 commit comments