4
4
Helper to view slabinfo data
5
5
"""
6
6
import argparse
7
+ from typing import List
7
8
from typing import NamedTuple
8
9
from typing import Set
9
10
from typing import Tuple
10
11
11
12
from drgn import cast
13
+ from drgn import FaultError
12
14
from drgn import Object
13
15
from drgn import Program
16
+ from drgn import ProgramFlags
14
17
from drgn import Type
15
18
from drgn .helpers .linux .cpumask import for_each_present_cpu
16
19
from drgn .helpers .linux .list import list_for_each_entry
@@ -40,6 +43,8 @@ class SlabCacheInfo(NamedTuple):
40
43
"""Slab size"""
41
44
name : str
42
45
"""Name of the slab cache"""
46
+ freelist_corrupt_cpus : List [int ]
47
+ """A list of CPUs for which the freelist was found to be corrupt"""
43
48
44
49
45
50
def _slab_type (prog : Program ) -> Type :
@@ -204,19 +209,41 @@ def slub_per_cpu_partial_free(cpu_partial: Object) -> int:
204
209
return partial_free
205
210
206
211
207
- def kmem_cache_slub_info (cache : Object ) -> Tuple [int , int ]:
212
+ class _CpuSlubWrapper :
213
+ def __init__ (self , obj ):
214
+ self ._obj = obj
215
+
216
+ def __getattr__ (self , key ):
217
+ if key == "cpu_slab" :
218
+ raise AttributeError ("CpuSlubWrapper!" )
219
+ return self ._obj .__getattribute__ (key )
220
+
221
+
222
+ def kmem_cache_slub_info (cache : Object ) -> Tuple [int , int , List [int ]]:
208
223
"""
209
224
For given kmem_cache object, parse through each cpu
210
225
and get number of total slabs and free objects
211
226
227
+ If the CPU freelist was corrupt, then we do our best effort to count free
228
+ objects, but we may undercount them. We set the corruption flag when this
229
+ happens.
230
+
212
231
:param: ``struct kmem_cache`` drgn object
213
- :returns: total slabs, free objects
232
+ :returns: total slabs, free objects, corruption instances
214
233
"""
215
234
prog = cache .prog_
216
235
use_slab = _has_struct_slab (prog )
217
236
218
237
total_slabs = objects = free_objects = 0
219
- slub_helper = _get_slab_cache_helper (cache )
238
+
239
+ # The "cpu_slab" variable is used by the slab helper to preload the percpu
240
+ # freelists. Not only does this duplicate work we're about to do, but also
241
+ # corrupt slab caches will crash this function before we can detect which
242
+ # CPU is corrupt. Pretend we have no "cpu_slab" variable when getting the
243
+ # helper. This depends on implementation details: we will improve the helper
244
+ # upstream to avoid this for the future.
245
+ slub_helper = _get_slab_cache_helper (_CpuSlubWrapper (cache ))
246
+ corrupt = []
220
247
221
248
for cpuid in for_each_present_cpu (prog ):
222
249
per_cpu_slab = per_cpu_ptr (cache .cpu_slab , cpuid )
@@ -237,15 +264,25 @@ def kmem_cache_slub_info(cache: Object) -> Tuple[int, int]:
237
264
objects = 0
238
265
239
266
free_objects += objects - page_inuse
240
- cpu_free_objects = slub_get_cpu_freelist_cnt (cpu_freelist , slub_helper )
241
- free_objects += cpu_free_objects
267
+
268
+ # Easily the most common form of corruption in the slab allocator comes
269
+ # from use after free, which overwrites the freelist pointer and causes
270
+ # a fault error. Catch this and report it for later.
271
+ try :
272
+ cpu_free_objects = slub_get_cpu_freelist_cnt (
273
+ cpu_freelist , slub_helper
274
+ )
275
+ except FaultError :
276
+ corrupt .append (cpuid )
277
+ else :
278
+ free_objects += cpu_free_objects
242
279
243
280
partial_frees = slub_per_cpu_partial_free (cpu_partial )
244
281
free_objects += partial_frees
245
282
246
283
total_slabs += 1
247
284
248
- return total_slabs , free_objects
285
+ return total_slabs , free_objects , corrupt
249
286
250
287
251
288
def get_kmem_cache_slub_info (cache : Object ) -> SlabCacheInfo :
@@ -255,7 +292,7 @@ def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
255
292
:param cache: ``struct kmem_cache`` drgn object
256
293
:returns: a :class:`SlabCacheInfo` with statistics about the cache
257
294
"""
258
- total_slabs , free_objects = kmem_cache_slub_info (cache )
295
+ total_slabs , free_objects , corrupt = kmem_cache_slub_info (cache )
259
296
(
260
297
nr_slabs ,
261
298
nr_total_objs ,
@@ -280,6 +317,7 @@ def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
280
317
total_slabs ,
281
318
ssize ,
282
319
cache .name .string_ ().decode ("utf-8" ),
320
+ corrupt ,
283
321
)
284
322
285
323
@@ -296,19 +334,42 @@ def print_slab_info(prog: Program) -> None:
296
334
"NAME" ,
297
335
]
298
336
)
337
+ corruption = []
299
338
for cache in for_each_slab_cache (prog ):
300
339
slabinfo = get_kmem_cache_slub_info (cache )
340
+ maybe_asterisk = ""
341
+ if slabinfo .freelist_corrupt_cpus :
342
+ maybe_asterisk = "*"
343
+ corruption .append (slabinfo )
301
344
table .row (
302
345
slabinfo .cache .value_ (),
303
346
slabinfo .objsize ,
304
- slabinfo .allocated ,
347
+ f" { slabinfo .allocated } { maybe_asterisk } " ,
305
348
slabinfo .total ,
306
349
slabinfo .nr_slabs ,
307
350
f"{ int (slabinfo .ssize / 1024 )} k" ,
308
351
slabinfo .name ,
309
352
)
310
353
table .write ()
311
354
355
+ if corruption :
356
+ if prog .flags & ProgramFlags .IS_LIVE :
357
+ print (
358
+ "NOTE: freelist corruption was detected. This is not "
359
+ "necessarily an error, as live systems may encounter race "
360
+ "conditions."
361
+ )
362
+ else :
363
+ print (
364
+ "WARNING: freelist corruption was detected. It is likely that "
365
+ "a use-after-free bug occurred."
366
+ )
367
+ table = FixedTable (["CACHE:<24s" , "CORRUPT CPUS" ])
368
+ for slabinfo in corruption :
369
+ cpus = ", " .join (map (str , slabinfo .freelist_corrupt_cpus ))
370
+ table .row (slabinfo .name , cpus )
371
+ table .write ()
372
+
312
373
313
374
class SlabInfo (CorelensModule ):
314
375
"""Print info about each slab cache"""
0 commit comments