123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806 |
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <pthread.h>
- #include "kdbg.h"
- #include "kutils.h"
- #include "kmem.h"
- #include "symbols.h"
- #include "kcall.h"
- #include "find_port.h"
- #include "early_kalloc.h"
- #include "arm64_state.h"
- /*
- A thread-local iOS kernel debugger for all ARM64 devices
- This code uses a kernel memory read-write primitve to enable a hardware breakpoint in EL1 on a particular thread.
- When that bp triggers it will eventually end up stuck in a loop:
-
- case ESR_EC_BKPT_REG_MATCH_EL1:
- if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
- kprintf("Hardware Breakpoint Debug exception from kernel. Hanging here (by design).\n");
- for (;;);
-
- That thread will eventually get preempted; when that happens we'll find its state (from userspace) and modify it
- such that it breaks out of that loop and continues with the desired state.
-
- Doing this requires careful attention to how aarch64 exceptions work, how XNU handles nested exceptions
- and how context switching works. A description of this is given below:
-
- AArch64 Exceptions:
- There are four classes of AArch64 exceptions: Synchronous, IRQ, FIQ, SError. These exceptions are the only
- way which the CPU will transition between Exception Levels (EL.) There are four Exception Levels: EL0, EL1,
- EL2, EL3. In iOS userspace runs in EL0 and the kernel runs in EL1. These are similar to the Ring 0 & Ring 3
- in x86. All 64-bit iOS devices below iPhone 7 also contain a secure monitor which runs in EL3.
-
- Exception types:
-
- Synchronous: These are things like SVC instructions (used for syscalls), breakpoints, data aborts etc
- IRQ: These are external interrupts from devices
- FIQ: These are also external interrupts
- SError: These are system errors, things like ECC errors
-
- For our purposes we're interested in Synchronous and FIQ interrupts. Hardware breakpoints are synchronous exceptions.
- The timer which drives the scheduler is attached as an FIQ source.
-
- Aarch64 further subdivides those four exception classes into another four categories depending on where the
- exception came from:
- a) Exception came from the current exception level which was running on SP_EL0
- b) Exception came from the current exception level which was running on SP_EL1
- c) Exception came from a lower exception level which was executing in AArch64 mode
- d) Exception came from a lower exception level which was executing in AArch32 mode
-
- Each of these 16 cases has their own vector (handling routine.)
-
- sp registers:
- sp isn't a general purpose register; it's better to view it as an alias for one of four seperate hardware registers:
- SP_EL0, SP_EL1, SP_EL2, SP_EL3.
-
- When an exception is taken sp will be set to name the SP_ELX register for the exception level which the exception is taken to.
- For example, when userspace (EL0) makes a syscall (Synchronous exception to EL1 from lower exception level) sp will name SP_EL1 in the handler.
-
- To enable nested exceptions code generally switches back to using SP_EL0 regardless of which exception level it's actually
- running at (obviously after first saving the original value of SP_EL0 so it can be restored.)
-
- Nested exceptions and masking:
- The four PSTATE.{A,D,F,I} bits control exception masking. Whenever any exception is taken these four bits will be set.
-
- PSTATE.A: SError interrupts will be pended until this bit is cleared
- PSTATE.F: FIQ interrupts will be pended until this bit is cleared
- PSTATE.I: IRQ interrupts will be pended until this bit is cleared
- PSTATE.D: Debug interrupts will be suppressed until this bit is cleared
-
- These bits can be manually set/cleared by writing to the DAIFSet/DAIFClr msrs. The bits will also be restored to their saved value
- during an ERET (return from exception) from the SPSR_ELX register (where X is the EL the exception was taken to.)
-
- Synchronous exceptions which are not Debug exceptions cannot be masked. However Debug exceptions will be suppressed, and XNU doesn't re-enable
- them. This presents the first major hurdle to implementing this debugger as the exceptions generated by hardware breakpoints fall in to
- the Debug category and will therefore never generate exceptions even if we set them and enable them for EL1.
-
- Note that the Debug exceptions will be suppresssed, that is, they will never fire, unlike the other maskable interrupts which will just be pended
- and will fire as soon as they are un-masked.
-
- Re-enabling Debug exceptions during syscall execution:
- The trick to clearing PSTATE.D is to fake a return from an exception by calling ERET using a arbitrary-call primitive.
-
- See below in the code for exactly the right gadget which will let us restore a complete register state (including CPSR.)
-
- With PSTATE.D cleared we point pc back to near the start of the syscall handling path so we can fake the execution of an arbitrary
- syscall.
-
- There are a couple of other things preventing HW breakpoints firing:
-
- The Kernel Debug Enable bit has to be set in MDSCR_EL1. This can be set with some simple ROP. It's per-core, and it won't be cleared if we get
- scheduled off so it's sufficient to just set it once.
- We can use the thread_set_state API to set a breakpoint on a kernel address, but it sanitizes the BCRX control flags so it's also
- necessary to set ARM_DBG_CR_MODE_CONTROL_ANY using the kernel memory r/w.
-
- Finding a modifying the stuck thread state:
- This is explained below. We pin a monitor thread to the same core as the debugee then search the debugee's kernel stack looking for the
- set of stack frames which indicate it's got stuck in the kernel hw bp hit infinite loop.
-
- We then expose the state at the bp to a callback which can modify it before unblocking the stuck kernel thread.
-
- Limitations:
- I only wrote code to support one breakpoint at the moment, expect a fuller-featured, interactive version soon!
-
- Don't set breakpoints when things like spinlocks are held, it will go very badly.
-
- Single-step won't work. In the breakpoint handler you have to emulate the instruction and manually move pc on.
-
- It's slow! This is unlikely to change give how it works, but hey, you're modifying kernel thread state from userspace on the same machine!
-
- */
- // scheduling mach trap to yield the cpu
- extern boolean_t swtch_pri(int pri);
- // pin the current thread to a processor, returns a pointer to the processor we're pinned to
- uint64_t pin_current_thread() {
- // get the current thread_t:
- uint64_t th = current_thread();
- #if 0
- // get the processor_t this thread last ran on
- uint64_t processor = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_LAST_PROCESSOR));
- printf("thread %llx last ran on %llx, pinning it to that core\n", th, processor);
-
- // this is probably fine...
- wk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor);
- #endif
-
- // need the struct cpu_data for that processor which is stored in the CpuDataEntries array, declared in data.s
- // it's 6*4k in to the data segment
- uint64_t cpu_data_entries = ksym(KSYMBOL_CPU_DATA_ENTRIES);
-
- int cpu_id = 0;
-
- // it's an array of cpu_data_entry_t which contains just the 64-bit physical and virtual addresses of struct cpu_data
- uint64_t cpu_data = rk64_electra(cpu_data_entries + ((cpu_id * 0x10) + 8));
-
- uint64_t processor = rk64_electra(cpu_data + koffset(KSTRUCT_OFFSET_CPU_DATA_CPU_PROCESSOR));
- printf("trying to pin to cpu0: %llx\n", processor);
- // pin to that cpu
- // this is probably fine...
- wk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_BOUND_PROCESSOR), processor);
-
- // that binding will only take account once we get scheduled off and back on again so yield the cpu:
- printf("pin_current_thread yielding cpu\n");
- swtch_pri(0);
- printf("pin_current_thread back on cpu\n");
- uint64_t chosen = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR));
- printf("running on %llx\n", chosen);
- #if 0
- // should now be running on the chosen processor, and should only get scheduled on there:
- printf("we're running again!\n");
-
- int got_switched = 0;
- for (int i = 0; i < 1000; i++) {
- swtch_pri(0);
- uint64_t p = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR));
- if (p != processor) {
- printf("got moved off target processor\n");
- got_switched = 1;
- break;
- }
- usleep(15000);
- p = rk64_electra(th + koffset(KSTRUCT_OFFSET_THREAD_CHOSEN_PROCESSOR));
- if (p != processor) {
- printf("got moved off target processor\n");
- got_switched = 1;
- break;
- }
- }
- if (!got_switched) {
- printf("looks like pinning works!\n");
- }
- #endif
- return processor;
- }
- #if 0
- use the two argument arbitrary call to call this:
- __TEXT_EXEC:__text:FFFFFFF0070CC1AC MOV X21, X0
- __TEXT_EXEC:__text:FFFFFFF0070CC1B0 MOV X22, X1
- __TEXT_EXEC:__text:FFFFFFF0070CC1B4 BR X22
- that gives control of x21 and pc
- point pc to this:
- exception_return:
- msr DAIFSet, #(DAIFSC_IRQF | DAIFSC_FIQF) // Disable interrupts
- mrs x3, TPIDR_EL1 // Load thread pointer
- mov sp, x21 // Reload the pcb pointer
- /* ARM64_TODO Reserve x18 until we decide what to do with it */
- ldr x0, [x3, TH_CTH_DATA] // Load cthread data pointer
- str x0, [sp, SS64_X18] // and use it to trash x18
- Lexception_return_restore_registers:
- /* Restore special register state */
- ldr x0, [sp, SS64_PC] // Get the return address
- ldr w1, [sp, SS64_CPSR] // Get the return CPSR
- ldr w2, [sp, NS64_FPSR]
- ldr w3, [sp, NS64_FPCR]
- msr ELR_EL1, x0 // Load the return address into ELR
- msr SPSR_EL1, x1 // Load the return CPSR into SPSR
- msr FPSR, x2
- msr FPCR, x3 // Synchronized by ERET
- mov x0, sp // x0 = &pcb
- /* Restore arm_neon_saved_state64 */
- ldp q0, q1, [x0, NS64_Q0]
- ldp q2, q3, [x0, NS64_Q2]
- ldp q4, q5, [x0, NS64_Q4]
- ldp q6, q7, [x0, NS64_Q6]
- ldp q8, q9, [x0, NS64_Q8]
- ldp q10, q11, [x0, NS64_Q10]
- ldp q12, q13, [x0, NS64_Q12]
- ldp q14, q15, [x0, NS64_Q14]
- ldp q16, q17, [x0, NS64_Q16]
- ldp q18, q19, [x0, NS64_Q18]
- ldp q20, q21, [x0, NS64_Q20]
- ldp q22, q23, [x0, NS64_Q22]
- ldp q24, q25, [x0, NS64_Q24]
- ldp q26, q27, [x0, NS64_Q26]
- ldp q28, q29, [x0, NS64_Q28]
- ldp q30, q31, [x0, NS64_Q30]
- /* Restore arm_saved_state64 */
- // Skip x0, x1 - we're using them
- ldp x2, x3, [x0, SS64_X2]
- ldp x4, x5, [x0, SS64_X4]
- ldp x6, x7, [x0, SS64_X6]
- ldp x8, x9, [x0, SS64_X8]
- ldp x10, x11, [x0, SS64_X10]
- ldp x12, x13, [x0, SS64_X12]
- ldp x14, x15, [x0, SS64_X14]
- ldp x16, x17, [x0, SS64_X16]
- ldp x18, x19, [x0, SS64_X18]
- ldp x20, x21, [x0, SS64_X20]
- ldp x22, x23, [x0, SS64_X22]
- ldp x24, x25, [x0, SS64_X24]
- ldp x26, x27, [x0, SS64_X26]
- ldr x28, [x0, SS64_X28]
- ldp fp, lr, [x0, SS64_FP]
- // Restore stack pointer and our last two GPRs
- ldr x1, [x0, SS64_SP]
- mov sp, x1
- ldp x0, x1, [x0, SS64_X0] // Restore the GPRs
- eret
- this lets us eret with a completely controlled state :)
- use that to clear PSTATE.D, and return to EL1+SP0
- return to:
- .text
- .align 2
- fleh_synchronous:
- mrs x1, ESR_EL1 // Load exception syndrome
- mrs x2, FAR_EL1 // Load fault address
- and w3, w1, #(ESR_EC_MASK)
- lsr w3, w3, #(ESR_EC_SHIFT)
- mov w4, #(ESR_EC_IABORT_EL1)
- cmp w3, w4
- b.eq Lfleh_sync_load_lr
- Lvalid_link_register: <-- ***there***
- PUSH_FRAME
- bl EXT(sleh_synchronous)
- POP_FRAME
- b exception_return_dispatch
- in ip7 11.1.2 that's:
- __TEXT_EXEC:__text:FFFFFFF0070CC1D4 STP X29, X30, [SP,#var_10]!
- __TEXT_EXEC:__text:FFFFFFF0070CC1D8 MOV X29, SP
- __TEXT_EXEC:__text:FFFFFFF0070CC1DC BL loc_FFFFFFF0071DDED4
- __TEXT_EXEC:__text:FFFFFFF0070CC1E0 MOV SP, X29
- __TEXT_EXEC:__text:FFFFFFF0070CC1E4 LDP X29, X30, [SP+0x10+var_10],#0x10
- __TEXT_EXEC:__text:FFFFFFF0070CC1E8 B sub_FFFFFFF0070CC3CC
- in the state which we get loaded:
- x21 should point to the actual saved ACT_CONTEXT since x21 will be used in the return path if no ASTs are taken
- x0 should point to the saved state which we want the debugged syscall to see (not ACT_CONTEXT!)
- x1 should be the svn syndrome number (ESR_EC(esr) == ESR_EC_SVC_64)
- x2 should be the pc of the svc instruction
- sp should be the right place on the thread's kernel stack
- #endif
- struct syscall_args {
- uint32_t number;
- uint64_t arg[8];
- };
- void do_syscall_with_pstate_d_unmasked(struct syscall_args* args) {
- // get the target thread_t
- //uint64_t thread_port_addr = find_port_address_electra(target_thread_port, MACH_MSG_TYPE_COPY_SEND);
- //uint64_t thread_t_addr = rk64_electra(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
- uint64_t thread_t_addr = current_thread();
-
- /* this state should set up as if it were calling the target syscall */
- arm_context_t fake_syscall_args = {0};
-
- /* this state will be restored by an eret */
- arm_context_t eret_return_state = {0};
- // there's no need to initialize too much of this since it won't actually be the state which is restored
- // it just needs to be enough to get the target syscall called
- fake_syscall_args.ss.ss_64.x[16] = args->number;
- fake_syscall_args.ss.ss_64.x[0] = args->arg[0];
- fake_syscall_args.ss.ss_64.x[1] = args->arg[1];
- fake_syscall_args.ss.ss_64.x[2] = args->arg[2];
- fake_syscall_args.ss.ss_64.x[3] = args->arg[3];
- fake_syscall_args.ss.ss_64.x[4] = args->arg[4];
- fake_syscall_args.ss.ss_64.x[5] = args->arg[5];
- fake_syscall_args.ss.ss_64.x[6] = args->arg[6];
- fake_syscall_args.ss.ss_64.x[7] = args->arg[7];
-
- fake_syscall_args.ss.ash.flavor = ARM_SAVED_STATE64;
-
- fake_syscall_args.ss.ss_64.cpsr = 0;
-
- // allocate a copy of that in wired kernel memory:
- //uint64_t fake_syscall_args_kern = kmem_alloc_wired(sizeof(arm_context_t));
- uint64_t fake_syscall_args_kern = early_kalloc(sizeof(arm_context_t));
- kmemcpy(fake_syscall_args_kern, (uint64_t)&fake_syscall_args, sizeof(arm_context_t));
-
- // this state needs to be a bit more complete...
- // x0 of the eret restored state will be the arm_context_t which the syscall dispatch code sees
- eret_return_state.ss.ss_64.x[0] = fake_syscall_args_kern;
-
- // x1 will be the exception syndrome
- #define ESR_EC_SVC_64 0x15
- #define ESR_EC_SHIFT 26
- eret_return_state.ss.ss_64.x[1] = ESR_EC_SVC_64 << ESR_EC_SHIFT;
-
- // x2 will be the address of the exception, not relevant for a syscall
- eret_return_state.ss.ss_64.x[2] = 0x454545454540;
-
- // x21 will be the real saved state to be used to return back to EL0
- // this is the state which was spilled during the actual EL0 -> EL1 transition.
- // if a continuation is run x21 won't be used, instead the return will go via the thread's ACT_CONTEXT
- // so this makes both paths safe
- uint64_t act_context = rk64_electra(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_CONTEXT_DATA));
- eret_return_state.ss.ss_64.x[21] = act_context;
-
- // let's stay on the thread's actual kernel stack
- uint64_t thread_kernel_stack_top = rk64_electra(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_KSTACKPTR));
- eret_return_state.ss.ss_64.sp = thread_kernel_stack_top;
-
- // the target place to eret to (see code snippet above)
- eret_return_state.ss.ss_64.pc = ksym(KSYMBOL_VALID_LINK_REGISTER);
-
- // the whole point of this, cpsr! this will be restored to SPSR_EL1 before the eret
- // see D1.6.4 of the armv8 manual
- // we want to return on to SP0 and to EL1
- // A,I,F should still be masked, D unmasked
- #define SPSR_A (1<<8)
- #define SPSR_I (1<<7)
- #define SPSR_F (1<<6)
- #define SPSR_EL1_SP0 (0x4)
- eret_return_state.ss.ss_64.cpsr = SPSR_A | SPSR_I | SPSR_F | SPSR_EL1_SP0;
-
- //uint64_t eret_return_state_kern = kmem_alloc_wired(sizeof(arm_context_t));
- uint64_t eret_return_state_kern = early_kalloc(sizeof(arm_context_t));
- kmemcpy(eret_return_state_kern, (uint64_t)&eret_return_state, sizeof(arm_context_t));
-
- // make the arbitrary call
- kcall(ksym(KSYMBOL_X21_JOP_GADGET), 2, eret_return_state_kern, ksym(KSYMBOL_EXCEPTION_RETURN));
- }
- /*
- we want to call this gadget:
- FFFFFFF0071E1998 MSR #0, c0, c2, #2, X8 ; [>] MDSCR_EL1 (Monitor Debug System Control Register)
- FFFFFFF0071E199C ISB // this a workaround for some errata...
- FFFFFFF0071E19A0 B loc_FFFFFFF0071E19F8
- ...
- FFFFFFF0071E19F8 BL _ml_set_interrupts_enabled
- FFFFFFF0071E19FC ADD SP, SP, #0x220
- FFFFFFF0071E1A00 LDP X29, X30, [SP,#0x20+var_s0]
- FFFFFFF0071E1A04 LDP X20, X19, [SP,#0x20+var_10]
- FFFFFFF0071E1A08 LDP X28, X27, [SP+0x20+var_20],#0x30
- FFFFFFF0071E1A0C RET
- lets just use the ERET case to get full register control an run that on a little ROP stack which then
- returns to thread_exception_return
-
- */
- void set_MDSCR_EL1_KDE(mach_port_t target_thread_port) {
- /* this state will be restored by an eret */
- arm_context_t eret_return_state = {0};
-
- // allocate a stack for the rop:
- //uint64_t rop_stack_kern_base = kmem_alloc_wired(0x4000);
- uint64_t rop_stack_kern_base = early_kalloc(0x1000);
-
- uint64_t rop_stack_kern_middle = rop_stack_kern_base + 0xc00;
-
- eret_return_state.ss.ss_64.sp = rop_stack_kern_middle;
- uint64_t rop_stack_kern_popped_base = rop_stack_kern_middle + 0x220;
- // x28, x27, x20, x19, fp, lr
- uint64_t popped_regs[] = {0, 0, 0, 0, 0x414243444546, ksym(KSYMBOL_THREAD_EXCEPTION_RETURN)}; // directly return back to userspace after this
- kmemcpy(rop_stack_kern_popped_base, (uint64_t)popped_regs, sizeof(popped_regs));
- #define MDSCR_EL1_KDE (1<<13)
- eret_return_state.ss.ss_64.x[8] = MDSCR_EL1_KDE;
-
- // the target place to eret to
- eret_return_state.ss.ss_64.pc = ksym(KSYMBOL_SET_MDSCR_EL1_GADGET);
-
- // we want to return on to SP0 and to EL1
- // A,I,F should still be masked, D unmasked (here we could actually mask D?)
- #define SPSR_A (1<<8)
- #define SPSR_I (1<<7)
- #define SPSR_F (1<<6)
- #define SPSR_EL1_SP0 (0x4)
- eret_return_state.ss.ss_64.cpsr = SPSR_A | SPSR_I | SPSR_F | SPSR_EL1_SP0;
-
- //uint64_t eret_return_state_kern = kmem_alloc_wired(sizeof(arm_context_t));
- uint64_t eret_return_state_kern = early_kalloc(sizeof(arm_context_t));
- kmemcpy(eret_return_state_kern, (uint64_t)&eret_return_state, sizeof(arm_context_t));
-
- // make the arbitrary call
- kcall(ksym(KSYMBOL_X21_JOP_GADGET), 2, eret_return_state_kern, ksym(KSYMBOL_EXCEPTION_RETURN));
-
- printf("returned from trying to set the KDE bit\n");
-
- // free the stack we used:
- //kmem_free(rop_stack_kern_base, 0x4000);
- }
- /*
- target_thread_port is the thread port for a thread which may or already has hit a kernel hw breakpoint.
- detect whether that is the case, and if so find the register state when the BP was hit.
-
- where to find stuff:
-
- userspace svc: EL0+SP0 -> EL1+SP1 (sync exception from lower exception level running aarch64)
- userspace state gets saved in thread->ACT_CONTEXT
- stack switched to thread's kernel stack pointer and SP0 selected
- does stuff which then hits kernel hw bp
-
- kernel hw bp: EL1+SP0 -> EL1+SP1 (sync exception from same exception level running on SP0)
- switch back to SP0 and push new arm_context_t on the there. point x21 to this saved state area.
- control flow reaches infinite loop
-
- fiq timer: EL1+SP0 -> EL1+SP1 (fiq interrupt from same exception level running on SP0)
- switch back to SP0 and push new arm_context_t on there. point x21 to there.
- then set sp to the interrupt stack.
-
- schedule off:
- this will happen just before the fiq timer interrupt returns in return_to_kernel
- it will set sp back to x21 (as if to eret back to the previous exception level) then call ast_taken_kernel
-
- if the thread will be scheduled off just a small amount of state will be saved to the reserved area
- above the top of the thread's kernel stack, sufficient to get the thread back on the core and
- resume execution.
-
-
- +-----------------------------+
- | |
- | struct thread_kernel_state | <-- *above* the top of thread kernel stack
- | |
- +> +=============================+ <-- top of thread kernel stack
- | | |
- | | syscall stack frames of |
- | | varying depth |
- | | (not user state) |
- | | |
- | +-----------------------------+ <-- kernel hw bp: EL1+SP0 -> EL1+SP1 (sync exception from same exception level running on SP0)
- | | | <-- saved state from when the bp was hit
- | | struct arm_context_t |
- | | .pc = address of hit bp |
- | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
- | | |
- | | |
- | | stack frames from sync excp |
- | | to the infinite loop... |
- | | |
- | +-----------------------------+ <-- fiq timer: EL1+SP0 -> EL1+SP1 (fiq interrupt from same exception level running on SP0)
- | | struct arm_context_t | <-- saved state from the infinite loop before it was scheduled off
- | | .pc = addr of the infinite |
- | | loop instr |
- | |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~+
- | | |
- | | |
- | | |
- | | |
- +- +-----------------------------+
- */
- typedef void (*breakpoint_callback)(arm_context_t* context);
- volatile int syscall_complete = 0;
- void handle_kernel_bp_hits(mach_port_t target_thread_port, uint64_t looper_pc, uint64_t breakpoint, breakpoint_callback callback) {
- // get the target thread's thread_t
- uint64_t thread_port_addr = find_port_address_electra(target_thread_port, MACH_MSG_TYPE_COPY_SEND);
- uint64_t thread_t_addr = rk64_electra(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
-
- while (1) {
- uint64_t looper_saved_state = 0;
- int found_it = 0;
- while (!found_it) {
- if (syscall_complete) {
- return;
- }
- // we've pinned ourself to the same core, so if we're running, it isn't...
- // in some ways this code is very racy, but when we actually have detected that the target
- // thread has hit the breakpoint it should be safe until we restart it
- // and up until then we don't do anything too dangerous...
-
-
- // get the kstack pointer
- uint64_t kstackptr = rk64_electra(thread_t_addr + koffset(KSTRUCT_OFFSET_THREAD_KSTACKPTR));
-
- printf("kstackptr: %llx\n", kstackptr);
-
- // get the thread_kernel_state
- // the stack lives below kstackptr, and kstackptr itself points to a struct thread_kernel_state:
- // the first bit of that is just an arm_context_t:
- // this is the scheduled-off state
- arm_context_t saved_ksched_state = {0};
- kmemcpy((uint64_t)&saved_ksched_state, kstackptr, sizeof(arm_context_t));
-
- // get the saved stack pointer
- uint64_t sp = saved_ksched_state.ss.ss_64.sp;
- printf("sp: %llx\n", sp);
-
- if (sp == 0) {
- continue;
- }
-
- uint64_t stack[128] = {0};
-
- // walk up from there and look for the saved state dumped by the fiq:
- // note that it won't be right at the bottom of the stack
- // instead there are the frames for:
- // ast_taken_kernel <-- above this is the saved state which will get restored when the hw bp spinner gets rescheduled
- // thread_block_reason
- // thread_invoke
- // machine_switch_context
- // Switch_context <-- the frame actually at the bottom of the stack
-
- // should probably walk those stack frame properly, but this will do...
-
- // grab the stack
- kmemcpy((uint64_t)&stack[0], sp, sizeof(stack));
- //for (int i = 0; i < 128; i++) {
- // printf("%016llx\n", stack[i]);
- //}
-
- for (int i = 0; i < 128; i++) {
- uint64_t flavor_and_count = stack[i];
- if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) {
- continue;
- }
-
- arm_context_t* saved_state = (arm_context_t*)&stack[i];
-
- if (saved_state->ss.ss_64.pc != looper_pc) {
- continue;
- }
-
- found_it = 1;
- looper_saved_state = sp + (i*sizeof(uint64_t));
- printf("found the saved state probably at %llx\n", looper_saved_state); // should walk the stack properly..
- break;
- }
-
- if (!found_it) {
- printf("unable to find the saved scheduler tick state on the stack, waiting a bit then trying again...\n");
- sleep(1);
- return;
- }
-
- }
-
-
-
- // now keep walking up and find the saved state for the code which hit the BP:
- uint64_t bp_hitting_state = looper_saved_state + sizeof(arm_context_t);
- found_it = 0;
- for (int i = 0; i < 1000; i++) {
- uint64_t flavor_and_count = rk64_electra(bp_hitting_state);
- if (flavor_and_count != (ARM_SAVED_STATE64 | (((uint64_t)ARM_SAVED_STATE64_COUNT) << 32))) {
- bp_hitting_state += 8;
- continue;
- }
-
- arm_context_t bp_context;
- kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t));
-
- for (int i = 0; i < 40; i++) {
- uint64_t* buf = (uint64_t*)&bp_context;
- printf("%016llx\n", buf[i]);
- }
-
- if (bp_context.ss.ss_64.pc != breakpoint) {
- printf("hummm, found an unexpected breakpoint: %llx\n", bp_context.ss.ss_64.pc);
- }
-
- found_it = 1;
- break;
- }
-
- if (!found_it) {
- printf("unable to find bp hitting state\n");
- }
-
- // fix up the bp hitting state so it will continue (with whatever modifications we want:)
- // get a copy of the state:
- arm_context_t bp_context;
- kmemcpy((uint64_t)&bp_context, bp_hitting_state, sizeof(arm_context_t));
-
- callback(&bp_context);
-
- // write that new state back:
- kmemcpy(bp_hitting_state, (uint64_t)&bp_context, sizeof(arm_context_t));
-
- // unblock the looper:
- wk64_electra(looper_saved_state + offsetof(arm_context_t, ss.ss_64.pc), ksym(KSYMBOL_SLEH_SYNC_EPILOG));
-
- // when it runs again it should break out of the loop and continue the syscall
- // forces us off the core and hopefully it on:
- thread_switch(target_thread_port, 0, 0);
- swtch_pri(0);
-
- }
- }
- struct monitor_args {
- mach_port_t target_thread_port;
- uint64_t breakpoint;
- breakpoint_callback callback;
- };
- void* monitor_thread(void* arg) {
- struct monitor_args* args = (struct monitor_args*)arg;
-
- printf("monitor thread running, pinning to core\n");
- pin_current_thread();
- printf("monitor thread pinned\n");
- handle_kernel_bp_hits(args->target_thread_port, ksym(KSYMBOL_EL1_HW_BP_INFINITE_LOOP), args->breakpoint, args->callback);
- return NULL;
- }
- // this runs on the thread which will execute the target syscall to debug
- void run_syscall_with_breakpoint(uint64_t bp_address, breakpoint_callback callback, uint32_t syscall_number, uint32_t n_args, ...) {
- // pin this thread to the target cpu:
- pin_current_thread();
-
- // set the Kernel Debug Enable bit of MDSCR_EL1:
- set_MDSCR_EL1_KDE(mach_thread_self());
-
- // MDE will be set by the regular API for us
-
- // enable a hw debug breakpoint at bp_address
- // it won't fire because PSTATE.D will be set, but we'll deal with that in a bit!
-
- // set a hardware bp on the thread using the proper API so that all the structures are already set up:
- struct arm64_debug_state state = {0};
- state.bvr[0] = bp_address;
- #define BCR_BAS_ALL (0xf << 5)
- #define BCR_E (1 << 0)
- state.bcr[0] = BCR_BAS_ALL | BCR_E; // enabled
- kern_return_t err = thread_set_state(mach_thread_self(),
- ARM_DEBUG_STATE64,
- (thread_state_t)&state,
- sizeof(state)/4);
-
- // verify that it got set:
- memset(&state, 0, sizeof(state));
- mach_msg_type_number_t count = sizeof(state)/4;
- err = thread_get_state(mach_thread_self(),
- ARM_DEBUG_STATE64,
- (thread_state_t)&state,
- &count);
-
- if (state.bvr[0] != bp_address) {
- printf("setting the bp address failed\n");
- }
-
-
- // now go and find that thread's DebugData where those values are stored.
-
- uint64_t thread_port_addr = find_port_address_electra(mach_thread_self(), MACH_MSG_TYPE_COPY_SEND);
- uint64_t thread_t_addr = rk64_electra(thread_port_addr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
-
- printf("thread_t_addr: %llx\n", thread_t_addr);
-
- // read bvr[0] in that thread_t's DebugData:
- uint64_t DebugData = rk64_electra(thread_t_addr + ACT_DEBUGDATA_OFFSET);
- //printf("DebugData: %llx\n", DebugData);
-
- uint64_t bvr0 = rk64_electra(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bvr[0]));
- printf("bvr0 read from the DebugData: 0x%llx\n", bvr0);
-
- uint32_t bcr0 = rk32_electra(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0]));
- printf("bcr0 read from the DebugData: 0x%08x\n", bcr0);
-
- // need to manually set this too in the bcr:
- #define ARM_DBG_CR_MODE_CONTROL_ANY (3 << 1)
- bcr0 |= ARM_DBG_CR_MODE_CONTROL_ANY;
- wk32_electra(DebugData + offsetof(struct arm_debug_aggregate_state, ds64.bcr[0]), bcr0);
-
- printf("set ARM_DBG_CR_MODE_CONTROL_ANY\n");
- // returning from the syscall should be enough to set it.
-
- struct monitor_args* margs = malloc(sizeof(struct monitor_args));
- margs->target_thread_port = mach_thread_self();
- margs->breakpoint = bp_address;
- margs->callback = callback;
-
- // spin up a thread to monitor when the bp is hit:
- pthread_t th;
- pthread_create(&th, NULL, monitor_thread, (void*)margs);
- printf("started monitor thread\n");
-
- struct syscall_args sargs = {0};
- sargs.number = syscall_number;
- va_list ap;
- va_start(ap, n_args);
-
- for (int i = 0; i < n_args; i++){
- sargs.arg[i] = va_arg(ap, uint64_t);
- }
-
- va_end(ap);
-
- // now execute a syscall with PSTATE.D disabled:
- syscall_complete = 0;
- do_syscall_with_pstate_d_unmasked(&sargs);
- syscall_complete = 1;
- printf("syscall returned\n");
-
- pthread_join(th, NULL);
- printf("monitor exited\n");
- }
- void sys_write_breakpoint_handler(arm_context_t* state) {
- // we will have to skip it one instruction ahead because single step won't work...
- state->ss.ss_64.pc += 4;
-
- // this means emulating what that instruction did:
- // LDR X8, [X8,#0x388]
- uint64_t val = rk64_electra(state->ss.ss_64.x[8] + 0x388);
- state->ss.ss_64.x[8] = val;
-
- uint64_t uap = state->ss.ss_64.x[1];
- char* replacer_string = strdup("a different string!\n");
- wk64_electra(uap+8, (uint64_t)replacer_string);
- wk64_electra(uap+0x10, strlen(replacer_string));
- }
- char* hello_wrld_str = "hellowrld!\n";
- void test_kdbg() {
- run_syscall_with_breakpoint(ksym(KSYMBOL_WRITE_SYSCALL_ENTRYPOINT), // breakpoint address
- sys_write_breakpoint_handler, // breakpoint hit handler
- 4, // SYS_write
- 3, // 3 arguments
- 1, // stdout
- (uint64_t)hello_wrld_str, // "hellowrld!\n"
- strlen(hello_wrld_str)); // 11
- }
|