kdbg.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <unistd.h>
  4. #include <pthread.h>
  5. #include "kdbg.h"
  6. #include "kutils.h"
  7. #include "kmem.h"
  8. #include "symbols.h"
  9. #include "kcall.h"
  10. #include "find_port.h"
  11. #include "early_kalloc.h"
  12. #include "arm64_state.h"
  13. /*
  14. A thread-local iOS kernel debugger for all ARM64 devices
  15. This code uses a kernel memory read-write primitve to enable a hardware breakpoint in EL1 on a particular thread.
  16. When that bp triggers it will eventually end up stuck in a loop:
  17. case ESR_EC_BKPT_REG_MATCH_EL1:
  18. if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
  19. kprintf("Hardware Breakpoint Debug exception from kernel. Hanging here (by design).\n");
  20. for (;;);
  21. That thread will eventually get preempted; when that happens we'll find its state (from userspace) and modify it
  22. such that it breaks out of that loop and continues with the desired state.
  23. Doing this requires careful attention to how aarch64 exceptions work, how XNU handles nested exceptions
  24. and how context switching works. A description of this is given below:
  25. AArch64 Exceptions:
  26. There are four classes of AArch64 exceptions: Synchronous, IRQ, FIQ, SError. These exceptions are the only
  27. way which the CPU will transition between Exception Levels (EL.) There are four Exception Levels: EL0, EL1,
  28. EL2, EL3. In iOS userspace runs in EL0 and the kernel runs in EL1. These are similar to the Ring 0 & Ring 3
  29. in x86. All 64-bit iOS devices below iPhone 7 also contain a secure monitor which runs in EL3.
  30. Exception types:
  31. Synchronous: These are things like SVC instructions (used for syscalls), breakpoints, data aborts etc
  32. IRQ: These are external interrupts from devices
  33. FIQ: These are also external interrupts
  34. SError: These are system errors, things like ECC errors
  35. For our purposes we're interested in Synchronous and FIQ interrupts. Hardware breakpoints are synchronous exceptions.
  36. The timer which drives the scheduler is attached as an FIQ source.
  37. Aarch64 further subdivides those four exception classes into another four categories depending on where the
  38. exception came from:
  39. a) Exception came from the current exception level which was running on SP_EL0
  40. b) Exception came from the current exception level which was running on SP_EL1
  41. c) Exception came from a lower exception level which was executing in AArch64 mode
  42. d) Exception came from a lower exception level which was executing in AArch32 mode
  43. Each of these 16 cases has their own vector (handling routine.)
  44. sp registers:
  45. sp isn't a general purpose register; it's better to view it as an alias for one of four seperate hardware registers:
  46. SP_EL0, SP_EL1, SP_EL2, SP_EL3.
  47. When an exception is taken sp will be set to name the SP_ELX register for the exception level which the exception is taken to.
  48. For example, when userspace (EL0) makes a syscall (Synchronous exception to EL1 from lower exception level) sp will name SP_EL1 in the handler.
  49. To enable nested exceptions code generally switches back to using SP_EL0 regardless of which exception level it's actually
  50. running at (obviously after first saving the original value of SP_EL0 so it can be restored.)
  51. Nested exceptions and masking:
  52. The four PSTATE.{A,D,F,I} bits control exception masking. Whenever any exception is taken these four bits will be set.
  53. PSTATE.A: SError interrupts will be pended until this bit is cleared
  54. PSTATE.F: FIQ interrupts will be pended until this bit is cleared
  55. PSTATE.I: IRQ interrupts will be pended until this bit is cleared
  56. PSTATE.D: Debug interrupts will be suppressed until this bit is cleared
  57. These bits can be manually set/cleared by writing to the DAIFSet/DAIFClr msrs. The bits will also be restored to their saved value
  58. during an ERET (return from exception) from the SPSR_ELX register (where X is the EL the exception was taken to.)
  59. Synchronous exceptions which are not Debug exceptions cannot be masked. However Debug exceptions will be suppressed, and XNU doesn't re-enable
  60. them. This presents the first major hurdle to implementing this debugger as the exceptions generated by hardware breakpoints fall in to
  61. the Debug category and will therefore never generate exceptions even if we set them and enable them for EL1.
  62. Note that the Debug exceptions will be suppresssed, that is, they will never fire, unlike the other maskable interrupts which will just be pended
  63. and will fire as soon as they are un-masked.
  64. Re-enabling Debug exceptions during syscall execution:
  65. The trick to clearing PSTATE.D is to fake a return from an exception by calling ERET using a arbitrary-call primitive.
  66. See below in the code for exactly the right gadget which will let us restore a complete register state (including CPSR.)
  67. With PSTATE.D cleared we point pc back to near the start of the syscall handling path so we can fake the execution of an arbitrary
  68. syscall.
  69. There are a couple of other things preventing HW breakpoints firing:
  70. The Kernel Debug Enable bit has to be set in MDSCR_EL1. This can be set with some simple ROP. It's per-core, and it won't be cleared if we get
  71. scheduled off so it's sufficient to just set it once.
  72. We can use the thread_set_state API to set a breakpoint on a kernel address, but it sanitizes the BCRX control flags so it's also
  73. necessary to set ARM_DBG_CR_MODE_CONTROL_ANY using the kernel memory r/w.
  74. Finding a modifying the stuck thread state:
  75. This is explained below. We pin a monitor thread to the same core as the debugee then search the debugee's kernel stack looking for the
  76. set of stack frames which indicate it's got stuck in the kernel hw bp hit infinite loop.
  77. We then expose the state at the bp to a callback which can modify it before unblocking the stuck kernel thread.
  78. Limitations:
  79. I only wrote code to support one breakpoint at the moment, expect a fuller-featured, interactive version soon!
  80. Don't set breakpoints when things like spinlocks are held, it will go very badly.
  81. Single-step won't work. In the breakpoint handler you have to emulate the instruction and manually move pc on.
  82. It's slow! This is unlikely to change give how it works, but hey, you're modifying kernel thread state from userspace on the same machine!
  83. */
  84. // scheduling mach trap to yield the cpu
  85. extern boolean_t swtch_pri(int pri);
  86. // pin the current thread to a processor, returns a pointer to the processor we're pinned to
  87. uint64_t pin_current_thread() {
  88. // get the current thread_t:
  89. uint64_t th = current_thread();
  90. #if 0
  91. // get the processor_t this thread last ran on
  92. uint64_t processor = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_LAST_PROCESSOR));
  93. printf("thread %llx last ran on %llx, pinning it to that core\n", th, processor);
  94. // this is probably fine...
  95. wk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor);
  96. #endif
  97. // need the struct cpu_data for that processor which is stored in the CpuDataEntries array, declared in data.s
  98. // it's 6*4k in to the data segment
  99. uint64_t cpu_data_entries = ksym(KSYMBOL_CPU_DATA_ENTRIES);
  100. int cpu_id = 0;
  101. // it's an array of cpu_data_entry_t which contains just the 64-bit physical and virtual addresses of struct cpu_data
  102. uint64_t cpu_data = rk64_electra(cpu_data_entries + ((cpu_id * 0x10) + 8));
  103. uint64_t processor = rk64_electra(cpu_data + koffset(KSTRUCT_OFFSET_CPU_DATA_CPU_PROCESSOR));
  104. printf("trying to pin to cpu0: %llx\n", processor);
  105. // pin to that cpu
  106. // this is probably fine...
  107. wk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor);
  108. // that binding will only take account once we get scheduled off and back on again so yield the cpu:
  109. printf("pin_current_thread yielding cpu\n");
  110. swtch_pri(0);
  111. printf("pin_current_thread back on cpu\n");
  112. uint64_t chosen = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR));
  113. printf("running on %llx\n", chosen);
  114. #if 0
  115. // should now be running on the chosen processor, and should only get scheduled on there:
  116. printf("we're running again!\n");
  117. int got_switched = 0;
  118. for (int i = 0; i < 1000; i++) {
  119. swtch_pri(0);
  120. uint64_t p = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR));
  121. if (p != processor) {
  122. printf("got moved off target processor\n");
  123. got_switched = 1;
  124. break;
  125. }
  126. usleep(15000);
  127. p = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR));
  128. if (p != processor) {
  129. printf("got moved off target processor\n");
  130. got_switched = 1;
  131. break;
  132. }
  133. }
  134. if (!got_switched) {
  135. printf("looks like pinning works!\n");
  136. }
  137. #endif
  138. return processor;
  139. }
  140. #if 0
  141. use the two argument arbitrary call to call this:
  142. __TEXT_EXEC:__text:FFFFFFF0070CC1AC MOV X21, X0
  143. __TEXT_EXEC:__text:FFFFFFF0070CC1B0 MOV X22, X1
  144. __TEXT_EXEC:__text:FFFFFFF0070CC1B4 BR X22
  145. that gives control of x21 and pc
  146. point pc to this:
  147. exception_return:
  148. msr DAIFSet, #(DAIFSC_IRQF | DAIFSC_FIQF) // Disable interrupts
  149. mrs x3, TPIDR_EL1 // Load thread pointer
  150. mov sp, x21 // Reload the pcb pointer
  151. /* ARM64_TODO Reserve x18 until we decide what to do with it */
  152. ldr x0, [x3, TH_CTH_DATA] // Load cthread data pointer
  153. str x0, [sp, SS64_X18] // and use it to trash x18
  154. Lexception_return_restore_registers:
  155. /* Restore special register state */
  156. ldr x0, [sp, SS64_PC] // Get the return address
  157. ldr w1, [sp, SS64_CPSR] // Get the return CPSR
  158. ldr w2, [sp, NS64_FPSR]
  159. ldr w3, [sp, NS64_FPCR]
  160. msr ELR_EL1, x0 // Load the return address into ELR
  161. msr SPSR_EL1, x1 // Load the return CPSR into SPSR
  162. msr FPSR, x2
  163. msr FPCR, x3 // Synchronized by ERET
  164. mov x0, sp // x0 = &pcb
  165. /* Restore arm_neon_saved_state64 */
  166. ldp q0, q1, [x0, NS64_Q0]
  167. ldp q2, q3, [x0, NS64_Q2]
  168. ldp q4, q5, [x0, NS64_Q4]
  169. ldp q6, q7, [x0, NS64_Q6]
  170. ldp q8, q9, [x0, NS64_Q8]
  171. ldp q10, q11, [x0, NS64_Q10]
  172. ldp q12, q13, [x0, NS64_Q12]
  173. ldp q14, q15, [x0, NS64_Q14]
  174. ldp q16, q17, [x0, NS64_Q16]
  175. ldp q18, q19, [x0, NS64_Q18]
  176. ldp q20, q21, [x0, NS64_Q20]
  177. ldp q22, q23, [x0, NS64_Q22]
  178. ldp q24, q25, [x0, NS64_Q24]
  179. ldp q26, q27, [x0, NS64_Q26]
  180. ldp q28, q29, [x0, NS64_Q28]
  181. ldp q30, q31, [x0, NS64_Q30]
  182. /* Restore arm_saved_state64 */
  183. // Skip x0, x1 - we're using them
  184. ldp x2, x3, [x0, SS64_X2]
  185. ldp x4, x5, [x0, SS64_X4]
  186. ldp x6, x7, [x0, SS64_X6]
  187. ldp x8, x9, [x0, SS64_X8]
  188. ldp x10, x11, [x0, SS64_X10]
  189. ldp x12, x13, [x0, SS64_X12]
  190. ldp x14, x15, [x0, SS64_X14]
  191. ldp x16, x17, [x0, SS64_X16]
  192. ldp x18, x19, [x0, SS64_X18]
  193. ldp x20, x21, [x0, SS64_X20]
  194. ldp x22, x23, [x0, SS64_X22]
  195. ldp x24, x25, [x0, SS64_X24]
  196. ldp x26, x27, [x0, SS64_X26]
  197. ldr x28, [x0, SS64_X28]
  198. ldp fp, lr, [x0, SS64_FP]
  199. // Restore stack pointer and our last two GPRs
  200. ldr x1, [x0, SS64_SP]
  201. mov sp, x1
  202. ldp x0, x1, [x0, SS64_X0] // Restore the GPRs
  203. eret
  204. this lets us eret with a completely controlled state :)
  205. use that to clear PSTATE.D, and return to EL1+SP0
  206. return to:
  207. .text
  208. .align 2
  209. fleh_synchronous:
  210. mrs x1, ESR_EL1 // Load exception syndrome
  211. mrs x2, FAR_EL1 // Load fault address
  212. and w3, w1, #(ESR_EC_MASK)
  213. lsr w3, w3, #(ESR_EC_SHIFT)
  214. mov w4, #(ESR_EC_IABORT_EL1)
  215. cmp w3, w4
  216. b.eq Lfleh_sync_load_lr
  217. Lvalid_link_register: <-- ***there***
  218. PUSH_FRAME
  219. bl EXT(sleh_synchronous)
  220. POP_FRAME
  221. b exception_return_dispatch
  222. in ip7 11.1.2 that's:
  223. __TEXT_EXEC:__text:FFFFFFF0070CC1D4 STP X29, X30, [SP,#var_10]!
  224. __TEXT_EXEC:__text:FFFFFFF0070CC1D8 MOV X29, SP
  225. __TEXT_EXEC:__text:FFFFFFF0070CC1DC BL loc_FFFFFFF0071DDED4
  226. __TEXT_EXEC:__text:FFFFFFF0070CC1E0 MOV SP, X29
  227. __TEXT_EXEC:__text:FFFFFFF0070CC1E4 LDP X29, X30, [SP+0x10+var_10],#0x10
  228. __TEXT_EXEC:__text:FFFFFFF0070CC1E8 B sub_FFFFFFF0070CC3CC
  229. in the state which we get loaded:
  230. x21 should point to the actual saved ACT_CONTEXT since x21 will be used in the return path if no ASTs are taken
  231. x0 should point to the saved state which we want the debugged syscall to see (not ACT_CONTEXT!)
  232. x1 should be the svn syndrome number (ESR_EC(esr) == ESR_EC_SVC_64)
  233. x2 should be the pc of the svc instruction
  234. sp should be the right place on the thread's kernel stack
  235. #endif
  236. struct syscall_args {
  237. uint32_t number;
  238. uint64_t arg[8];
  239. };
  240. void do_syscall_with_pstate_d_unmasked(struct syscall_args* args) {
  241. // get the target thread_t
  242. //uint64_t thread_port_addr = find_port_address_electra(target_thread_port, MACH_MSG_TYPE_COPY_SEND);
  243. //uint64_t thread_t_addr = rk64_electra(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
  244. uint64_t thread_t_addr = current_thread();
  245. /* this state should set up as if it were calling the target syscall */
  246. arm_context_t fake_syscall_args = {0};
  247. /* this state will be restored by an eret */
  248. arm_context_t eret_return_state = {0};
  249. // there's no need to initialize too much of this since it won't actually be the state which is restored
  250. // it just needs to be enough to get the target syscall called
  251. fake_syscall_args.ss.ss_64.x[16] = args->number;
  252. fake_syscall_args.ss.ss_64.x[0] = args->arg[0];
  253. fake_syscall_args.ss.ss_64.x[1] = args->arg[1];
  254. fake_syscall_args.ss.ss_64.x[2] = args->arg[2];
  255. fake_syscall_args.ss.ss_64.x[3] = args->arg[3];
  256. fake_syscall_args.ss.ss_64.x[4] = args->arg[4];
  257. fake_syscall_args.ss.ss_64.x[5] = args->arg[5];
  258. fake_syscall_args.ss.ss_64.x[6] = args->arg[6];
  259. fake_syscall_args.ss.ss_64.x[7] = args->arg[7];
  260. fake_syscall_args.ss.ash.flavor = ARM_SAVED_STATE64;
  261. fake_syscall_args.ss.ss_64.cpsr = 0;
  262. // allocate a copy of that in wired kernel memory:
  263. //uint64_t fake_syscall_args_kern = kmem_alloc_wired(sizeof(arm_context_t));
  264. uint64_t fake_syscall_args_kern = early_kalloc(sizeof(arm_context_t));
  265. kmemcpy(fake_syscall_args_kern, (uint64_t)&fake_syscall_args, sizeof(arm_context_t));
  266. // this state needs to be a bit more complete...
  267. // x0 of the eret restored state will be the arm_context_t which the syscall dispatch code sees
  268. eret_return_state.ss.ss_64.x[0] = fake_syscall_args_kern;
  269. // x1 will be the exception syndrome
  270. #define ESR_EC_SVC_64 0x15
  271. #define ESR_EC_SHIFT 26
  272. eret_return_state.ss.ss_64.x[1] = ESR_EC_SVC_64 << ESR_EC_SHIFT;
  273. // x2 will be the address of the exception, not relevant for a syscall
  274. eret_return_state.ss.ss_64.x[2] = 0x454545454540;
  275. // x21 will be the real saved state to be used to return back to EL0
  276. // this is the state which was spilled during the actual EL0 -> EL1 transition.
  277. // if a continuation is run x21 won't be used, instead the return will go via the thread's ACT_CONTEXT
  278. // so this makes both paths safe
  279. uint64_t act_context = rk64_electra(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_CONTEXT_DATA));
  280. eret_return_state.ss.ss_64.x[21] = act_context;
  281. // let's stay on the thread's actual kernel stack
  282. uint64_t thread_kernel_stack_top = rk64_electra(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_KSTACKPTR));
  283. eret_return_state.ss.ss_64.sp = thread_kernel_stack_top;
  284. // the target place to eret to (see code snippet above)
  285. eret_return_state.ss.ss_64.pc = ksym(KSYMBOL_VALID_LINK_REGISTER);
  286. // the whole point of this, cpsr! this will be restored to SPSR_EL1 before the eret
  287. // see D1.6.4 of the armv8 manual
  288. // we want to return on to SP0 and to EL1
  289. // A,I,F should still be masked, D unmasked
  290. #define SPSR_A (1<<8)
  291. #define SPSR_I (1<<7)
  292. #define SPSR_F (1<<6)
  293. #define SPSR_EL1_SP0 (0x4)
  294. eret_return_state.ss.ss_64.cpsr = SPSR_A | SPSR_I | SPSR_F | SPSR_EL1_SP0;
  295. //uint64_t eret_return_state_kern = kmem_alloc_wired(sizeof(arm_context_t));
  296. uint64_t eret_return_state_kern = early_kalloc(sizeof(arm_context_t));
  297. kmemcpy(eret_return_state_kern, (uint64_t)&eret_return_state, sizeof(arm_context_t));
  298. // make the arbitrary call
  299. kcall(ksym(KSYMBOL_X21_JOP_GADGET), 2, eret_return_state_kern, ksym(KSYMBOL_EXCEPTION_RETURN));
  300. }
  301. /*
  302. we want to call this gadget:
  303. FFFFFFF0071E1998 MSR #0, c0, c2, #2, X8 ; [>] MDSCR_EL1 (Monitor Debug System Control Register)
  304. FFFFFFF0071E199C ISB // this a workaround for some errata...
  305. FFFFFFF0071E19A0 B loc_FFFFFFF0071E19F8
  306. ...
  307. FFFFFFF0071E19F8 BL _ml_set_interrupts_enabled
  308. FFFFFFF0071E19FC ADD SP, SP, #0x220
  309. FFFFFFF0071E1A00 LDP X29, X30, [SP,#0x20+var_s0]
  310. FFFFFFF0071E1A04 LDP X20, X19, [SP,#0x20+var_10]
  311. FFFFFFF0071E1A08 LDP X28, X27, [SP+0x20+var_20],#0x30
  312. FFFFFFF0071E1A0C RET
  313. lets just use the ERET case to get full register control an run that on a little ROP stack which then
  314. returns to thread_exception_return
  315. */
  316. void set_MDSCR_EL1_KDE(mach_port_t target_thread_port) {
  317. /* this state will be restored by an eret */
  318. arm_context_t eret_return_state = {0};
  319. // allocate a stack for the rop:
  320. //uint64_t rop_stack_kern_base = kmem_alloc_wired(0x4000);
  321. uint64_t rop_stack_kern_base = early_kalloc(0x1000);
  322. uint64_t rop_stack_kern_middle = rop_stack_kern_base + 0xc00;
  323. eret_return_state.ss.ss_64.sp = rop_stack_kern_middle;
  324. uint64_t rop_stack_kern_popped_base = rop_stack_kern_middle + 0x220;
  325. // x28, x27, x20, x19, fp, lr
  326. uint64_t popped_regs[] = {0, 0, 0, 0, 0x414243444546, ksym(KSYMBOL_THREAD_EXCEPTION_RETURN)}; // directly return back to userspace after this
  327. kmemcpy(rop_stack_kern_popped_base, (uint64_t)popped_regs, sizeof(popped_regs));
  328. #define MDSCR_EL1_KDE (1<<13)
  329. eret_return_state.ss.ss_64.x[8] = MDSCR_EL1_KDE;
  330. // the target place to eret to
  331. eret_return_state.ss.ss_64.pc = ksym(KSYMBOL_SET_MDSCR_EL1_GADGET);
  332. // we want to return on to SP0 and to EL1
  333. // A,I,F should still be masked, D unmasked (here we could actually mask D?)
  334. #define SPSR_A (1<<8)
  335. #define SPSR_I (1<<7)
  336. #define SPSR_F (1<<6)
  337. #define SPSR_EL1_SP0 (0x4)
  338. eret_return_state.ss.ss_64.cpsr = SPSR_A | SPSR_I | SPSR_F | SPSR_EL1_SP0;
  339. //uint64_t eret_return_state_kern = kmem_alloc_wired(sizeof(arm_context_t));
  340. uint64_t eret_return_state_kern = early_kalloc(sizeof(arm_context_t));
  341. kmemcpy(eret_return_state_kern, (uint64_t)&eret_return_state, sizeof(arm_context_t));
  342. // make the arbitrary call
  343. kcall(ksym(KSYMBOL_X21_JOP_GADGET), 2, eret_return_state_kern, ksym(KSYMBOL_EXCEPTION_RETURN));
  344. printf("returned from trying to set the KDE bit\n");
  345. // free the stack we used:
  346. //kmem_free(rop_stack_kern_base, 0x4000);
  347. }
  348. /*
  349. target_thread_port is the thread port for a thread which may or already has hit a kernel hw breakpoint.
  350. detect whether that is the case, and if so find the register state when the BP was hit.
  351. where to find stuff:
  352. userspace svc: EL0+SP0 -> EL1+SP1 (sync exception from lower exception level running aarch64)
  353. userspace state gets saved in thread->ACT_CONTEXT
  354. stack switched to thread's kernel stack pointer and SP0 selected
  355. does stuff which then hits kernel hw bp
  356. kernel hw bp: EL1+SP0 -> EL1+SP1 (sync exception from same exception level running on SP0)
  357. switch back to SP0 and push new arm_context_t on the there. point x21 to this saved state area.
  358. control flow reaches infinite loop
  359. fiq timer: EL1+SP0 -> EL1+SP1 (fiq interrupt from same exception level running on SP0)
  360. switch back to SP0 and push new arm_context_t on there. point x21 to there.
  361. then set sp to the interrupt stack.
  362. schedule off:
  363. this will happen just before the fiq timer interrupt returns in return_to_kernel
  364. it will set sp back to x21 (as if to eret back to the previous exception level) then call ast_taken_kernel
  365. if the thread will be scheduled off just a small amount of state will be saved to the reserved area
  366. above the top of the thread's kernel stack, sufficient to get the thread back on the core and
  367. resume execution.
  368. +-----------------------------+
  369. | |
  370. | struct thread_kernel_state | <-- *above* the top of thread kernel stack
  371. | |
  372. +> +=============================+ <-- top of thread kernel stack
  373. | | |
  374. | | syscall stack frames of |
  375. | | varying depth |
  376. | | (not user state) |
  377. | | |
  378. | +-----------------------------+ <-- kernel hw bp: EL1+SP0 -> EL1+SP1 (sync exception from same exception level running on SP0)
  379. | | | <-- saved state from when the bp was hit
  380. | | struct arm_context_t |
  381. | | .pc = address of hit bp |
  382. | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
  383. | | |
  384. | | |
  385. | | stack frames from sync excp |
  386. | | to the infinite loop... |
  387. | | |
  388. | +-----------------------------+ <-- fiq timer: EL1+SP0 -> EL1+SP1 (fiq interrupt from same exception level running on SP0)
  389. | | struct arm_context_t | <-- saved state from the infinite loop before it was scheduled off
  390. | | .pc = addr of the infinite |
  391. | | loop instr |
  392. | |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
  393. | | |
  394. | | |
  395. | | |
  396. | | |
  397. +- +-----------------------------+
  398. */
  399. typedef void (*breakpoint_callback)(arm_context_t* context);
  400. volatile int syscall_complete = 0;
  401. void handle_kernel_bp_hits(mach_port_t target_thread_port, uint64_t looper_pc, uint64_t breakpoint, breakpoint_callback callback) {
  402. // get the target thread's thread_t
  403. uint64_t thread_port_addr = find_port_address_electra(target_thread_port, MACH_MSG_TYPE_COPY_SEND);
  404. uint64_t thread_t_addr = rk64_electra(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
  405. while (1) {
  406. uint64_t looper_saved_state = 0;
  407. int found_it = 0;
  408. while (!found_it) {
  409. if (syscall_complete) {
  410. return;
  411. }
  412. // we've pinned ourself to the same core, so if we're running, it isn't...
  413. // in some ways this code is very racy, but when we actually have detected that the target
  414. // thread has hit the breakpoint it should be safe until we restart it
  415. // and up until then we don't do anything too dangerous...
  416. // get the kstack pointer
  417. uint64_t kstackptr = rk64_electra(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_KSTACKPTR));
  418. printf("kstackptr: %llx\n", kstackptr);
  419. // get the thread_kernel_state
  420. // the stack lives below kstackptr, and kstackptr itself points to a struct thread_kernel_state:
  421. // the first bit of that is just an arm_context_t:
  422. // this is the scheduled-off state
  423. arm_context_t saved_ksched_state = {0};
  424. kmemcpy((uint64_t)&saved_ksched_state, kstackptr, sizeof(arm_context_t));
  425. // get the saved stack pointer
  426. uint64_t sp = saved_ksched_state.ss.ss_64.sp;
  427. printf("sp: %llx\n", sp);
  428. if (sp == 0) {
  429. continue;
  430. }
  431. uint64_t stack[128] = {0};
  432. // walk up from there and look for the saved state dumped by the fiq:
  433. // note that it won't be right at the bottom of the stack
  434. // instead there are the frames for:
  435. // ast_taken_kernel <-- above this is the saved state which will get restored when the hw bp spinner gets rescheduled
  436. // thread_block_reason
  437. // thread_invoke
  438. // machine_switch_context
  439. // Switch_context <-- the frame actually at the bottom of the stack
  440. // should probably walk those stack frame properly, but this will do...
  441. // grab the stack
  442. kmemcpy((uint64_t)&stack[0], sp, sizeof(stack));
  443. //for (int i = 0; i < 128; i++) {
  444. // printf("%016llx\n", stack[i]);
  445. //}
  446. for (int i = 0; i < 128; i++) {
  447. uint64_t flavor_and_count = stack[i];
  448. if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) {
  449. continue;
  450. }
  451. arm_context_t* saved_state = (arm_context_t*)&stack[i];
  452. if (saved_state->ss.ss_64.pc != looper_pc) {
  453. continue;
  454. }
  455. found_it = 1;
  456. looper_saved_state = sp + (i*sizeof(uint64_t));
  457. printf("found the saved state probably at %llx\n", looper_saved_state); // should walk the stack properly..
  458. break;
  459. }
  460. if (!found_it) {
  461. printf("unable to find the saved scheduler tick state on the stack, waiting a bit then trying again...\n");
  462. sleep(1);
  463. return;
  464. }
  465. }
  466. // now keep walking up and find the saved state for the code which hit the BP:
  467. uint64_t bp_hitting_state = looper_saved_state + sizeof(arm_context_t);
  468. found_it = 0;
  469. for (int i = 0; i < 1000; i++) {
  470. uint64_t flavor_and_count = rk64_electra(bp_hitting_state);
  471. if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) {
  472. bp_hitting_state += 8;
  473. continue;
  474. }
  475. arm_context_t bp_context;
  476. kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t));
  477. for (int i = 0; i < 40; i++) {
  478. uint64_t* buf = (uint64_t*)&bp_context;
  479. printf("%016llx\n", buf[i]);
  480. }
  481. if (bp_context.ss.ss_64.pc != breakpoint) {
  482. printf("hummm, found an unexpected breakpoint: %llx\n", bp_context.ss.ss_64.pc);
  483. }
  484. found_it = 1;
  485. break;
  486. }
  487. if (!found_it) {
  488. printf("unable to find bp hitting state\n");
  489. }
  490. // fix up the bp hitting state so it will continue (with whatever modifications we want:)
  491. // get a copy of the state:
  492. arm_context_t bp_context;
  493. kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t));
  494. callback(&bp_context);
  495. // write that new state back:
  496. kmemcpy(bp_hitting_state, (uint64_t)&bp_context, sizeof(arm_context_t));
  497. // unblock the looper:
  498. wk64_electra(looper_saved_state + offsetof(arm_context_t, ss.ss_64.pc), ksym(KSYMBOL_SLEH_SYNC_EPILOG));
  499. // when it runs again it should break out of the loop and continue the syscall
  500. // forces us off the core and hopefully it on:
  501. thread_switch(target_thread_port, 0, 0);
  502. swtch_pri(0);
  503. }
  504. }
  505. struct monitor_args {
  506. mach_port_t target_thread_port;
  507. uint64_t breakpoint;
  508. breakpoint_callback callback;
  509. };
  510. void* monitor_thread(void* arg) {
  511. struct monitor_args* args = (struct monitor_args*)arg;
  512. printf("monitor thread running, pinning to core\n");
  513. pin_current_thread();
  514. printf("monitor thread pinned\n");
  515. handle_kernel_bp_hits(args->target_thread_port, ksym(KSYMBOL_EL1_HW_BP_INFINITE_LOOP), args->breakpoint, args->callback);
  516. return NULL;
  517. }
  518. // this runs on the thread which will execute the target syscall to debug
  519. void run_syscall_with_breakpoint(uint64_t bp_address, breakpoint_callback callback, uint32_t syscall_number, uint32_t n_args, ...) {
  520. // pin this thread to the target cpu:
  521. pin_current_thread();
  522. // set the Kernel Debug Enable bit of MDSCR_EL1:
  523. set_MDSCR_EL1_KDE(mach_thread_self());
  524. // MDE will be set by the regular API for us
  525. // enable a hw debug breakpoint at bp_address
  526. // it won't fire because PSTATE.D will be set, but we'll deal with that in a bit!
  527. // set a hardware bp on the thread using the proper API so that all the structures are already set up:
  528. struct arm64_debug_state state = {0};
  529. state.bvr[0] = bp_address;
  530. #define BCR_BAS_ALL (0xf << 5)
  531. #define BCR_E (1 << 0)
  532. state.bcr[0] = BCR_BAS_ALL | BCR_E; // enabled
  533. kern_return_t err = thread_set_state(mach_thread_self(),
  534. ARM_DEBUG_STATE64,
  535. (thread_state_t)&state,
  536. sizeof(state)/4);
  537. // verify that it got set:
  538. memset(&state, 0, sizeof(state));
  539. mach_msg_type_number_t count = sizeof(state)/4;
  540. err = thread_get_state(mach_thread_self(),
  541. ARM_DEBUG_STATE64,
  542. (thread_state_t)&state,
  543. &count);
  544. if (state.bvr[0] != bp_address) {
  545. printf("setting the bp address failed\n");
  546. }
  547. // now go and find that thread's DebugData where those values are stored.
  548. uint64_t thread_port_addr = find_port_address_electra(mach_thread_self(), MACH_MSG_TYPE_COPY_SEND);
  549. uint64_t thread_t_addr = rk64_electra(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
  550. printf("thread_t_addr: %llx\n", thread_t_addr);
  551. // read bvr[0] in that thread_t's DebugData:
  552. uint64_t DebugData = rk64_electra(thread_t_addr + ACT_DEBUGDATA_OFFSET);
  553. //printf("DebugData: %llx\n", DebugData);
  554. uint64_t bvr0 = rk64_electra(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bvr[0]));
  555. printf("bvr0 read from the DebugData: 0x%llx\n", bvr0);
  556. uint32_t bcr0 = rk32_electra(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0]));
  557. printf("bcr0 read from the DebugData: 0x%08x\n", bcr0);
  558. // need to manually set this too in the bcr:
  559. #define ARM_DBG_CR_MODE_CONTROL_ANY (3 << 1)
  560. bcr0 |= ARM_DBG_CR_MODE_CONTROL_ANY;
  561. wk32_electra(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0]), bcr0);
  562. printf("set ARM_DBG_CR_MODE_CONTROL_ANY\n");
  563. // returning from the syscall should be enough to set it.
  564. struct monitor_args* margs = malloc(sizeof(struct monitor_args));
  565. margs->target_thread_port = mach_thread_self();
  566. margs->breakpoint = bp_address;
  567. margs->callback = callback;
  568. // spin up a thread to monitor when the bp is hit:
  569. pthread_t th;
  570. pthread_create(&th, NULL, monitor_thread, (void*)margs);
  571. printf("started monitor thread\n");
  572. struct syscall_args sargs = {0};
  573. sargs.number = syscall_number;
  574. va_list ap;
  575. va_start(ap, n_args);
  576. for (int i = 0; i < n_args; i++){
  577. sargs.arg[i] = va_arg(ap, uint64_t);
  578. }
  579. va_end(ap);
  580. // now execute a syscall with PSTATE.D disabled:
  581. syscall_complete = 0;
  582. do_syscall_with_pstate_d_unmasked(&sargs);
  583. syscall_complete = 1;
  584. printf("syscall returned\n");
  585. pthread_join(th, NULL);
  586. printf("monitor exited\n");
  587. }
  588. void sys_write_breakpoint_handler(arm_context_t* state) {
  589. // we will have to skip it one instruction ahead because single step won't work...
  590. state->ss.ss_64.pc += 4;
  591. // this means emulating what that instruction did:
  592. // LDR X8, [X8,#0x388]
  593. uint64_t val = rk64_electra(state->ss.ss_64.x[8] + 0x388);
  594. state->ss.ss_64.x[8] = val;
  595. uint64_t uap = state->ss.ss_64.x[1];
  596. char* replacer_string = strdup("a different string!\n");
  597. wk64_electra(uap+8, (uint64_t)replacer_string);
  598. wk64_electra(uap+0x10, strlen(replacer_string));
  599. }
  600. char* hello_wrld_str = "hellowrld!\n";
  601. void test_kdbg() {
  602. run_syscall_with_breakpoint(ksym(KSYMBOL_WRITE_SYSCALL_ENTRYPOINT), // breakpoint address
  603. sys_write_breakpoint_handler, // breakpoint hit handler
  604. 4, // SYS_write
  605. 3, // 3 arguments
  606. 1, // stdout
  607. (uint64_t)hello_wrld_str, // "hellowrld!\n"
  608. strlen(hello_wrld_str)); // 11
  609. }