Skip to content

Commit 8e32138

Browse files
authored
[Example] Windows KDMP Emulation (#12)
Adds an example for emulation using `kdmp-parser` Plus fixing an issue with building bochscpu with recent version, reverting to bochs-emu/Bochs@c48a501 for now that is known to work
1 parent f56c7c4 commit 8e32138

File tree

3 files changed

+295
-2
lines changed

3 files changed

+295
-2
lines changed

.github/build-bochscpu.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ mkdir bxbuild
1212
cd bxbuild
1313

1414
REM Use WSL to configure / clone the repositories.
15-
bash -c "git clone https://github.com/yrp604/bochscpu-build.git && git clone https://github.com/yrp604/bochscpu && git clone https://github.com/yrp604/bochscpu-ffi && cd bochscpu-build && bash prep.sh && cd Bochs/bochs && bash .conf.cpu-msvc"
15+
bash -c "git clone https://github.com/yrp604/bochscpu-build.git && git clone https://github.com/yrp604/bochscpu && git clone https://github.com/yrp604/bochscpu-ffi && cd bochscpu-build && BOCHS_REV=c48a50141b6ade6c6b0744280a598b55d906bb9e bash prep.sh && cd Bochs/bochs && bash .conf.cpu-msvc"
1616

1717
REM Build bochs; libinstrument.a is expected to fail to build so don't freak out.
1818
REM You can run nmake all-clean to clean up the build.

.github/build-bochscpu.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ git clone https://github.com/yrp604/bochscpu
1616
git clone https://github.com/yrp604/bochscpu-ffi
1717

1818
cd bochscpu-build
19+
export BOCHS_REV=c48a50141b6ade6c6b0744280a598b55d906bb9e
1920
bash prep.sh && cd Bochs/bochs && sh .conf.cpu && make || true
2021

2122
# Remove old files in bochscpu.
2223
rm -rf ../../../bochscpu/bochs
23-
rm -rf ../../../bochscpu/libs
24+
rm -rf ../../../bochscpu/lib
2425

2526
# Create the libs directory where we stuff all the libs.
2627
mkdir ../../../bochscpu/lib
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
from typing import Optional
2+
3+
import ctypes
4+
import logging
5+
import os
6+
import pathlib
7+
import sys
8+
9+
import capstone
10+
import kdmp_parser
11+
12+
import bochscpu
13+
import bochscpu.cpu
14+
import bochscpu.memory
15+
import bochscpu.utils
16+
17+
18+
kernel32 = ctypes.windll.kernel32
19+
kernel32.GetModuleHandleW.argtypes = [ctypes.c_wchar_p]
20+
kernel32.GetModuleHandleW.restype = ctypes.c_void_p
21+
kernel32.GetProcAddress.argtypes = [ctypes.c_void_p, ctypes.c_char_p]
22+
kernel32.GetProcAddress.restype = ctypes.c_void_p
23+
24+
25+
cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
26+
27+
emulation_end_address = 0
28+
29+
hvas: list[int] = []
30+
dmp: Optional[kdmp_parser.KernelDumpParser] = None
31+
session: Optional[bochscpu.Session] = None
32+
33+
34+
def hexdump(
35+
source: bytes, length: int = 0x10, separator: str = ".", base: int = 0x00
36+
) -> str:
37+
result = []
38+
align = 0x8 * 2 + 2
39+
40+
def chunk2hexstr(chunk: bytes):
41+
return " ".join(map(lambda x: f"{x:02X}", chunk))
42+
43+
def chunk2ascii(chunk: bytes):
44+
return "".join([chr(b) if 0x20 <= b < 0x7F else separator for b in chunk])
45+
46+
for i in range(0, len(source), length):
47+
chunk = bytearray(source[i : i + length])
48+
hexa = chunk2hexstr(chunk)
49+
text = chunk2ascii(chunk)
50+
result.append(f"{base + i:#0{align}x} {hexa:<{3 * length}} {text}")
51+
return os.linesep.join(result)
52+
53+
54+
def missing_page_cb(pa):
55+
global session, dmp, hvas
56+
assert dmp and session
57+
58+
gpa = kdmp_parser.page.align(pa)
59+
logging.debug(f"Missing GPA={gpa:#x}")
60+
61+
if gpa in dmp.pages:
62+
# lazily handle missing page: first try to look into the dump, if found load it to mem
63+
hva = bochscpu.memory.allocate_host_page()
64+
page = dmp.read_physical_page(gpa)
65+
if hva and page:
66+
bochscpu.memory.page_insert(gpa, hva)
67+
bochscpu.memory.phy_write(gpa, page)
68+
logging.debug(f"{gpa=:#x} -> {hva=:#x}")
69+
hvas.append(hva)
70+
# we've successfully mapped it
71+
return
72+
73+
# otherwise the page is really missing, bail
74+
session.stop()
75+
raise Exception
76+
77+
78+
def phy_access_cb(
79+
sess: bochscpu.Session, cpu_id: int, lin: int, phy: int, len: int, rw: int
80+
):
81+
logging.debug(f"{lin=:#x} -> {phy=:#x}, {len=:#x}, {bool(rw)=}")
82+
83+
84+
def exception_cb(
85+
sess: bochscpu.Session,
86+
cpu_id: int,
87+
vector: int,
88+
error_code: int,
89+
):
90+
excpt = bochscpu.cpu.ExceptionType(vector)
91+
match excpt:
92+
case bochscpu.cpu.ExceptionType.BreakPoint:
93+
logging.info("breakpoint hit")
94+
95+
case bochscpu.cpu.ExceptionType.PageFault:
96+
logging.warning(
97+
f"pagefault on VA={sess.cpu.cr2:#016x} at IP={sess.cpu.rip:#016x}"
98+
)
99+
100+
case _:
101+
logging.error(
102+
f"cpu#{cpu_id} received exception({excpt=}, {error_code=:d}) "
103+
)
104+
sess.stop()
105+
106+
107+
def before_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int):
108+
state = sess.cpu.state
109+
raw = bytes(bochscpu.memory.virt_read(state.cr3, state.rip, 16))
110+
insn = next(cs.disasm(raw, state.rip))
111+
logging.debug(
112+
f"[CPU#{cpu_id}] PC={state.rip:#x} {insn.bytes.hex()} - {insn.mnemonic} {insn.op_str}"
113+
)
114+
115+
116+
def after_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int):
117+
global emulation_end_address
118+
if not emulation_end_address:
119+
return
120+
121+
if emulation_end_address == sess.cpu.state.rip:
122+
logging.info(
123+
f"Reaching end address @ {emulation_end_address}, ending emulation"
124+
)
125+
sess.stop()
126+
127+
128+
def resolve_function(symbol: str) -> int:
129+
dll, func = symbol.split("!", 1)
130+
if not dll.lower().endswith(".dll"):
131+
dll += ".dll"
132+
logging.info(f"Looking up {func} in {dll}")
133+
handle = kernel32.GetModuleHandleW(dll)
134+
address: int = kernel32.GetProcAddress(handle, func.encode())
135+
if not address:
136+
raise RuntimeError(f"Failed to resolve {symbol}")
137+
logging.info(f"Resolved '{symbol:s}' -> {address:#x}")
138+
return address
139+
140+
141+
def emulate(dmp_path: pathlib.Path):
142+
global session, dmp
143+
144+
assert session is None
145+
146+
logging.info(f"Parsing {dmp_path}")
147+
dmp = kdmp_parser.KernelDumpParser(dmp_path)
148+
assert dmp
149+
150+
logging.info(f"Successfully parsed {dmp_path}")
151+
152+
session = bochscpu.Session()
153+
session.missing_page_handler = missing_page_cb
154+
155+
logging.debug("Preparing CPU state")
156+
state = bochscpu.State()
157+
bochscpu.cpu.set_long_mode(state)
158+
159+
logging.debug("Enabling MMX (SSE/AVX) instructions")
160+
cr0 = bochscpu.utils.cpu.CR0(state.cr0)
161+
cr4 = bochscpu.utils.cpu.CR4(state.cr4)
162+
xcr0 = bochscpu.utils.cpu.XCR0(state.xcr0)
163+
# See AMD Vol2 - 11.3
164+
cr0.MP = True
165+
cr0.EM = False
166+
cr4.OSFXSR = True
167+
cr4.OSXSAVE = True
168+
# See AMD Vol2 - 11.5.2
169+
xcr0.x87 = True
170+
xcr0.SSE = True
171+
xcr0.YMM = True
172+
173+
# TODO use bdump.js::regs.json instead
174+
logging.debug(f"Setting {cr0=:}")
175+
logging.debug(f"Setting {cr4=:}")
176+
logging.debug(f"Setting {xcr0=:}")
177+
state.cr0 = int(cr0)
178+
state.cr4 = int(cr4)
179+
state.xcr0 = int(xcr0)
180+
181+
cr3 = dmp._KernelDumpParser__dump.GetDirectoryTableBase() # type: ignore # HACK
182+
logging.debug(f"Setting CR3={cr3:#x}")
183+
state.cr3 = cr3
184+
185+
logging.debug(f"Setting the flag register")
186+
state.rflags = dmp.context.ContextFlags
187+
188+
logging.debug(f"Setting the other GPRs")
189+
for regname in (
190+
"rax",
191+
"rbx",
192+
"rcx",
193+
"rdx",
194+
"rsi",
195+
"rdi",
196+
"rip",
197+
"rsp",
198+
"rbp",
199+
"r8",
200+
"r9",
201+
"r10",
202+
"r11",
203+
"r12",
204+
"r13",
205+
"r14",
206+
"r15",
207+
):
208+
value = int(getattr(dmp.context, regname.capitalize()))
209+
setattr(state, regname, value)
210+
211+
logging.debug(f"Setting the segment selectors")
212+
_cs = bochscpu.Segment()
213+
_cs.base = 0
214+
_cs.limit = 0xFFFF_FFFF
215+
_cs.selector = dmp.context.SegCs
216+
_cs_attr = bochscpu.cpu.SegmentFlags()
217+
_cs_attr.A = True
218+
_cs_attr.R = True
219+
_cs_attr.E = True
220+
_cs_attr.S = True
221+
_cs_attr.P = True
222+
_cs_attr.L = True
223+
_cs.attr = int(_cs_attr)
224+
_ds = bochscpu.Segment()
225+
_ds.base = 0
226+
_ds.limit = 0xFFFF_FFFF
227+
_ds.selector = dmp.context.SegDs
228+
_ds.attr = 0xCF3
229+
_es = bochscpu.Segment()
230+
_es.base = 0
231+
_es.limit = 0xFFFF_FFFF
232+
_es.selector = dmp.context.SegEs
233+
_es.attr = 0xCF3
234+
_ss = bochscpu.Segment()
235+
_ss.base = 0
236+
_ss.limit = 0xFFFF_FFFF
237+
_ss.selector = dmp.context.SegSs
238+
_ss.attr = 0xCF3
239+
_fs = bochscpu.Segment()
240+
_fs.base = 0
241+
_fs.limit = 0xFFFF_FFFF
242+
_fs.selector = dmp.context.SegFs
243+
_fs.present = True
244+
_fs.attr = 0x4F3
245+
_gs = bochscpu.Segment()
246+
_gs.base = 0 # TODO: missing curprocess TEB
247+
_gs.limit = 0xFFFF_FFFF
248+
_gs.selector = dmp.context.SegGs
249+
_gs.present = True
250+
_gs.attr = 0xCF3
251+
252+
state.ss = _ss
253+
state.cs = _cs
254+
state.ds = _ds
255+
state.es = _es
256+
state.fs = _fs
257+
state.gs = _gs
258+
259+
logging.debug(f"Apply the created state to the session CPU")
260+
session.cpu.state = state
261+
262+
logging.debug("Preparing hooks")
263+
hook = bochscpu.Hook()
264+
hook.exception = exception_cb
265+
hook.before_execution = before_execution_cb
266+
hook.after_execution = after_execution_cb
267+
268+
logging.debug("Initial register state")
269+
bochscpu.utils.dump_registers(session.cpu.state)
270+
271+
logging.debug("Let's go baby!")
272+
session.run(
273+
[
274+
hook,
275+
]
276+
)
277+
278+
session.stop()
279+
280+
logging.debug("Final register state")
281+
bochscpu.utils.dump_registers(session.cpu.state)
282+
283+
284+
if __name__ == "__main__":
285+
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
286+
arg = pathlib.Path(sys.argv[1]).resolve()
287+
assert arg.exists()
288+
emulate(arg)
289+
290+
logging.debug("Cleanup")
291+
for hva in hvas:
292+
bochscpu.memory.release_host_page(hva)

0 commit comments

Comments
 (0)