|
| 1 | +from typing import Optional |
| 2 | + |
| 3 | +import ctypes |
| 4 | +import logging |
| 5 | +import os |
| 6 | +import pathlib |
| 7 | +import sys |
| 8 | + |
| 9 | +import capstone |
| 10 | +import kdmp_parser |
| 11 | + |
| 12 | +import bochscpu |
| 13 | +import bochscpu.cpu |
| 14 | +import bochscpu.memory |
| 15 | +import bochscpu.utils |
| 16 | + |
| 17 | + |
| 18 | +kernel32 = ctypes.windll.kernel32 |
| 19 | +kernel32.GetModuleHandleW.argtypes = [ctypes.c_wchar_p] |
| 20 | +kernel32.GetModuleHandleW.restype = ctypes.c_void_p |
| 21 | +kernel32.GetProcAddress.argtypes = [ctypes.c_void_p, ctypes.c_char_p] |
| 22 | +kernel32.GetProcAddress.restype = ctypes.c_void_p |
| 23 | + |
| 24 | + |
| 25 | +cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) |
| 26 | + |
| 27 | +emulation_end_address = 0 |
| 28 | + |
| 29 | +hvas: list[int] = [] |
| 30 | +dmp: Optional[kdmp_parser.KernelDumpParser] = None |
| 31 | +session: Optional[bochscpu.Session] = None |
| 32 | + |
| 33 | + |
| 34 | +def hexdump( |
| 35 | + source: bytes, length: int = 0x10, separator: str = ".", base: int = 0x00 |
| 36 | +) -> str: |
| 37 | + result = [] |
| 38 | + align = 0x8 * 2 + 2 |
| 39 | + |
| 40 | + def chunk2hexstr(chunk: bytes): |
| 41 | + return " ".join(map(lambda x: f"{x:02X}", chunk)) |
| 42 | + |
| 43 | + def chunk2ascii(chunk: bytes): |
| 44 | + return "".join([chr(b) if 0x20 <= b < 0x7F else separator for b in chunk]) |
| 45 | + |
| 46 | + for i in range(0, len(source), length): |
| 47 | + chunk = bytearray(source[i : i + length]) |
| 48 | + hexa = chunk2hexstr(chunk) |
| 49 | + text = chunk2ascii(chunk) |
| 50 | + result.append(f"{base + i:#0{align}x} {hexa:<{3 * length}} {text}") |
| 51 | + return os.linesep.join(result) |
| 52 | + |
| 53 | + |
| 54 | +def missing_page_cb(pa): |
| 55 | + global session, dmp, hvas |
| 56 | + assert dmp and session |
| 57 | + |
| 58 | + gpa = kdmp_parser.page.align(pa) |
| 59 | + logging.debug(f"Missing GPA={gpa:#x}") |
| 60 | + |
| 61 | + if gpa in dmp.pages: |
| 62 | + # lazily handle missing page: first try to look into the dump, if found load it to mem |
| 63 | + hva = bochscpu.memory.allocate_host_page() |
| 64 | + page = dmp.read_physical_page(gpa) |
| 65 | + if hva and page: |
| 66 | + bochscpu.memory.page_insert(gpa, hva) |
| 67 | + bochscpu.memory.phy_write(gpa, page) |
| 68 | + logging.debug(f"{gpa=:#x} -> {hva=:#x}") |
| 69 | + hvas.append(hva) |
| 70 | + # we've successfully mapped it |
| 71 | + return |
| 72 | + |
| 73 | + # otherwise the page is really missing, bail |
| 74 | + session.stop() |
| 75 | + raise Exception |
| 76 | + |
| 77 | + |
| 78 | +def phy_access_cb( |
| 79 | + sess: bochscpu.Session, cpu_id: int, lin: int, phy: int, len: int, rw: int |
| 80 | +): |
| 81 | + logging.debug(f"{lin=:#x} -> {phy=:#x}, {len=:#x}, {bool(rw)=}") |
| 82 | + |
| 83 | + |
| 84 | +def exception_cb( |
| 85 | + sess: bochscpu.Session, |
| 86 | + cpu_id: int, |
| 87 | + vector: int, |
| 88 | + error_code: int, |
| 89 | +): |
| 90 | + excpt = bochscpu.cpu.ExceptionType(vector) |
| 91 | + match excpt: |
| 92 | + case bochscpu.cpu.ExceptionType.BreakPoint: |
| 93 | + logging.info("breakpoint hit") |
| 94 | + |
| 95 | + case bochscpu.cpu.ExceptionType.PageFault: |
| 96 | + logging.warning( |
| 97 | + f"pagefault on VA={sess.cpu.cr2:#016x} at IP={sess.cpu.rip:#016x}" |
| 98 | + ) |
| 99 | + |
| 100 | + case _: |
| 101 | + logging.error( |
| 102 | + f"cpu#{cpu_id} received exception({excpt=}, {error_code=:d}) " |
| 103 | + ) |
| 104 | + sess.stop() |
| 105 | + |
| 106 | + |
| 107 | +def before_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): |
| 108 | + state = sess.cpu.state |
| 109 | + raw = bytes(bochscpu.memory.virt_read(state.cr3, state.rip, 16)) |
| 110 | + insn = next(cs.disasm(raw, state.rip)) |
| 111 | + logging.debug( |
| 112 | + f"[CPU#{cpu_id}] PC={state.rip:#x} {insn.bytes.hex()} - {insn.mnemonic} {insn.op_str}" |
| 113 | + ) |
| 114 | + |
| 115 | + |
| 116 | +def after_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): |
| 117 | + global emulation_end_address |
| 118 | + if not emulation_end_address: |
| 119 | + return |
| 120 | + |
| 121 | + if emulation_end_address == sess.cpu.state.rip: |
| 122 | + logging.info( |
| 123 | + f"Reaching end address @ {emulation_end_address}, ending emulation" |
| 124 | + ) |
| 125 | + sess.stop() |
| 126 | + |
| 127 | + |
| 128 | +def resolve_function(symbol: str) -> int: |
| 129 | + dll, func = symbol.split("!", 1) |
| 130 | + if not dll.lower().endswith(".dll"): |
| 131 | + dll += ".dll" |
| 132 | + logging.info(f"Looking up {func} in {dll}") |
| 133 | + handle = kernel32.GetModuleHandleW(dll) |
| 134 | + address: int = kernel32.GetProcAddress(handle, func.encode()) |
| 135 | + if not address: |
| 136 | + raise RuntimeError(f"Failed to resolve {symbol}") |
| 137 | + logging.info(f"Resolved '{symbol:s}' -> {address:#x}") |
| 138 | + return address |
| 139 | + |
| 140 | + |
| 141 | +def emulate(dmp_path: pathlib.Path): |
| 142 | + global session, dmp |
| 143 | + |
| 144 | + assert session is None |
| 145 | + |
| 146 | + logging.info(f"Parsing {dmp_path}") |
| 147 | + dmp = kdmp_parser.KernelDumpParser(dmp_path) |
| 148 | + assert dmp |
| 149 | + |
| 150 | + logging.info(f"Successfully parsed {dmp_path}") |
| 151 | + |
| 152 | + session = bochscpu.Session() |
| 153 | + session.missing_page_handler = missing_page_cb |
| 154 | + |
| 155 | + logging.debug("Preparing CPU state") |
| 156 | + state = bochscpu.State() |
| 157 | + bochscpu.cpu.set_long_mode(state) |
| 158 | + |
| 159 | + logging.debug("Enabling MMX (SSE/AVX) instructions") |
| 160 | + cr0 = bochscpu.utils.cpu.CR0(state.cr0) |
| 161 | + cr4 = bochscpu.utils.cpu.CR4(state.cr4) |
| 162 | + xcr0 = bochscpu.utils.cpu.XCR0(state.xcr0) |
| 163 | + # See AMD Vol2 - 11.3 |
| 164 | + cr0.MP = True |
| 165 | + cr0.EM = False |
| 166 | + cr4.OSFXSR = True |
| 167 | + cr4.OSXSAVE = True |
| 168 | + # See AMD Vol2 - 11.5.2 |
| 169 | + xcr0.x87 = True |
| 170 | + xcr0.SSE = True |
| 171 | + xcr0.YMM = True |
| 172 | + |
| 173 | + # TODO use bdump.js::regs.json instead |
| 174 | + logging.debug(f"Setting {cr0=:}") |
| 175 | + logging.debug(f"Setting {cr4=:}") |
| 176 | + logging.debug(f"Setting {xcr0=:}") |
| 177 | + state.cr0 = int(cr0) |
| 178 | + state.cr4 = int(cr4) |
| 179 | + state.xcr0 = int(xcr0) |
| 180 | + |
| 181 | + cr3 = dmp._KernelDumpParser__dump.GetDirectoryTableBase() # type: ignore # HACK |
| 182 | + logging.debug(f"Setting CR3={cr3:#x}") |
| 183 | + state.cr3 = cr3 |
| 184 | + |
| 185 | + logging.debug(f"Setting the flag register") |
| 186 | + state.rflags = dmp.context.ContextFlags |
| 187 | + |
| 188 | + logging.debug(f"Setting the other GPRs") |
| 189 | + for regname in ( |
| 190 | + "rax", |
| 191 | + "rbx", |
| 192 | + "rcx", |
| 193 | + "rdx", |
| 194 | + "rsi", |
| 195 | + "rdi", |
| 196 | + "rip", |
| 197 | + "rsp", |
| 198 | + "rbp", |
| 199 | + "r8", |
| 200 | + "r9", |
| 201 | + "r10", |
| 202 | + "r11", |
| 203 | + "r12", |
| 204 | + "r13", |
| 205 | + "r14", |
| 206 | + "r15", |
| 207 | + ): |
| 208 | + value = int(getattr(dmp.context, regname.capitalize())) |
| 209 | + setattr(state, regname, value) |
| 210 | + |
| 211 | + logging.debug(f"Setting the segment selectors") |
| 212 | + _cs = bochscpu.Segment() |
| 213 | + _cs.base = 0 |
| 214 | + _cs.limit = 0xFFFF_FFFF |
| 215 | + _cs.selector = dmp.context.SegCs |
| 216 | + _cs_attr = bochscpu.cpu.SegmentFlags() |
| 217 | + _cs_attr.A = True |
| 218 | + _cs_attr.R = True |
| 219 | + _cs_attr.E = True |
| 220 | + _cs_attr.S = True |
| 221 | + _cs_attr.P = True |
| 222 | + _cs_attr.L = True |
| 223 | + _cs.attr = int(_cs_attr) |
| 224 | + _ds = bochscpu.Segment() |
| 225 | + _ds.base = 0 |
| 226 | + _ds.limit = 0xFFFF_FFFF |
| 227 | + _ds.selector = dmp.context.SegDs |
| 228 | + _ds.attr = 0xCF3 |
| 229 | + _es = bochscpu.Segment() |
| 230 | + _es.base = 0 |
| 231 | + _es.limit = 0xFFFF_FFFF |
| 232 | + _es.selector = dmp.context.SegEs |
| 233 | + _es.attr = 0xCF3 |
| 234 | + _ss = bochscpu.Segment() |
| 235 | + _ss.base = 0 |
| 236 | + _ss.limit = 0xFFFF_FFFF |
| 237 | + _ss.selector = dmp.context.SegSs |
| 238 | + _ss.attr = 0xCF3 |
| 239 | + _fs = bochscpu.Segment() |
| 240 | + _fs.base = 0 |
| 241 | + _fs.limit = 0xFFFF_FFFF |
| 242 | + _fs.selector = dmp.context.SegFs |
| 243 | + _fs.present = True |
| 244 | + _fs.attr = 0x4F3 |
| 245 | + _gs = bochscpu.Segment() |
| 246 | + _gs.base = 0 # TODO: missing curprocess TEB |
| 247 | + _gs.limit = 0xFFFF_FFFF |
| 248 | + _gs.selector = dmp.context.SegGs |
| 249 | + _gs.present = True |
| 250 | + _gs.attr = 0xCF3 |
| 251 | + |
| 252 | + state.ss = _ss |
| 253 | + state.cs = _cs |
| 254 | + state.ds = _ds |
| 255 | + state.es = _es |
| 256 | + state.fs = _fs |
| 257 | + state.gs = _gs |
| 258 | + |
| 259 | + logging.debug(f"Apply the created state to the session CPU") |
| 260 | + session.cpu.state = state |
| 261 | + |
| 262 | + logging.debug("Preparing hooks") |
| 263 | + hook = bochscpu.Hook() |
| 264 | + hook.exception = exception_cb |
| 265 | + hook.before_execution = before_execution_cb |
| 266 | + hook.after_execution = after_execution_cb |
| 267 | + |
| 268 | + logging.debug("Initial register state") |
| 269 | + bochscpu.utils.dump_registers(session.cpu.state) |
| 270 | + |
| 271 | + logging.debug("Let's go baby!") |
| 272 | + session.run( |
| 273 | + [ |
| 274 | + hook, |
| 275 | + ] |
| 276 | + ) |
| 277 | + |
| 278 | + session.stop() |
| 279 | + |
| 280 | + logging.debug("Final register state") |
| 281 | + bochscpu.utils.dump_registers(session.cpu.state) |
| 282 | + |
| 283 | + |
| 284 | +if __name__ == "__main__": |
| 285 | + logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) |
| 286 | + arg = pathlib.Path(sys.argv[1]).resolve() |
| 287 | + assert arg.exists() |
| 288 | + emulate(arg) |
| 289 | + |
| 290 | + logging.debug("Cleanup") |
| 291 | + for hva in hvas: |
| 292 | + bochscpu.memory.release_host_page(hva) |
0 commit comments