edef1c · Amanieu · Sep 14, 2016 · Sep 7, 2016 · Sep 14, 2016 · Sep 9, 2016
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,7 +15,7 @@ libc = "0.2.14"
 optional = true
 version = "1.0.0"
 
-[dev-dependencies]
+[target.'cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))'.dev-dependencies]
 simd = "0.1"
 
 [features]

diff --git a/README.md b/README.md
@@ -118,7 +118,7 @@ there should be at least 8 KiB of free stack space, or panicking will result in
 
 ## Limitations
 
-The architectures currently supported are: x86, x86_64, aarch64, or1k.
+The architectures currently supported are: x86, x86_64, aarch64, arm, or1k.
 
 The platforms currently supported are: bare metal, Linux (any libc),
 FreeBSD, DragonFly BSD, macOS.
@@ -176,13 +176,15 @@ of callee-saved registers.
 
 ### Call stack splicing
 
-Non-Windows platforms use [DWARF][] for both stack unwinding and debugging. DWARF call frame
-information is very generic to be ABI-agnostic—it defines a bytecode that describes the actions
-that need to be performed to simulate returning from a function. libfringe uses this bytecode
-to specify that, after the generator function has returned, execution continues at the point
-where the generator function was resumed the last time.
+Non-Windows platforms use [DWARF][] (or the highly similar [ARM EHABI][ehabi]) for both stack
+unwinding and debugging. DWARF call frame information is very generic to be ABI-agnostic—
+it defines a bytecode that describes the actions that need to be performed to simulate
+returning from a function. libfringe uses this bytecode to specify that, after the generator
+function has returned, execution continues at the point where the generator function was
+resumed the last time.
 
 [dwarf]: http://dwarfstd.org
+[ehabi]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038b/IHI0038B_ehabi.pdf
 
 ## Windows compatibility
 

diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs
@@ -47,14 +47,12 @@
 //   from the stack frame at x29 (in the parent stack), thus continuing
 //   unwinding at the swap call site instead of falling off the end of context stack.
 use core::mem;
-use stack::Stack;
+use arch::StackPointer;
+use unwind;
 
 pub const STACK_ALIGNMENT: usize = 16;
 
-#[derive(Debug, Clone, Copy)]
-pub struct StackPointer(*mut usize);
-
-pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
+pub unsafe fn init(stack_base: *mut u8, f: unsafe fn(usize, StackPointer)) -> StackPointer {
   #[cfg(not(target_vendor = "apple"))]
   #[naked]
   unsafe extern "C" fn trampoline_1() {
@@ -126,16 +124,38 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
         # trampoline_2.
         nop
 
-        # Call the provided function.
-        ldr     x2, [sp, #16]
-        blr     x2
-      "#
-      : : : : "volatile")
-  }
+        # Call unwind_wrapper with the provided function and the stack base address.
+        add     x2, sp, #32
+        ldr     x3, [sp, #16]
+        bl      ${0}
+
+        # Restore the stack pointer of the parent context. No CFI adjustments
+        # are needed since we have the same stack frame as trampoline_1.
+        ldr     x2, [sp]
+        mov     sp, x2
+
+        # Load frame and instruction pointers of the parent context.
+        ldp     x29, x30, [sp], #16
+        .cfi_adjust_cfa_offset -16
+        .cfi_restore x29
+        .cfi_restore x30
 
-  unsafe fn push(sp: &mut StackPointer, val: usize) {
-    sp.0 = sp.0.offset(-1);
-    *sp.0 = val
+        # If the returned value is nonzero, trigger an unwind in the parent
+        # context with the given exception object.
+        cbnz    x0, ${1}
+
+        # Clear the stack pointer. We can't call into this context any more once
+        # the function has returned.
+        mov     x1, #0
+
+        # Return into the parent context. Use `br` instead of a `ret` to avoid
+        # return address mispredictions.
+        br      x30
+      "#
+      :
+      : "s" (unwind::unwind_wrapper as usize)
+        "s" (unwind::start_unwind as usize)
+      : : "volatile")
   }
 
   // We set up the stack in a somewhat special way so that to the unwinder it
@@ -146,43 +166,34 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
   // followed by the x29 value for that frame. This setup supports unwinding
   // using DWARF CFI as well as the frame pointer-based unwinding used by tools
   // such as perf or dtrace.
-  let mut sp = StackPointer(stack.base() as *mut usize);
+  let mut sp = StackPointer::new(stack_base);
 
-  push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
-  push(&mut sp, f as usize); // Function that trampoline_2 should call
+  sp.push(0 as usize); // Padding to ensure the stack is properly aligned
+  sp.push(f as usize); // Function that trampoline_2 should call
 
   // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
   // each time a context switch is performed.
-  push(&mut sp, trampoline_1 as usize + 4); // Return after the nop
-  push(&mut sp, 0xdeaddeaddead0cfa);        // CFA slot
+  sp.push(trampoline_1 as usize + 4); // Return after the nop
+  sp.push(0xdeaddeaddead0cfa);        // CFA slot
 
   // Call frame for swap::trampoline. We set up the x29 value to point to the
   // parent call frame.
-  let frame = sp;
-  push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop
-  push(&mut sp, frame.0 as usize);          // Pointer to parent call frame
+  let frame = sp.offset(0);
+  sp.push(trampoline_2 as usize + 4); // Entry point, skip initial nop
+  sp.push(frame as usize);            // Pointer to parent call frame
 
   sp
 }
 
 #[inline(always)]
-pub unsafe fn swap(arg: usize, new_sp: StackPointer,
-                   new_stack: Option<&Stack>) -> (usize, StackPointer) {
-  // Address of the topmost CFA stack slot.
-  let mut dummy: usize = mem::uninitialized();
-  let new_cfa = if let Some(new_stack) = new_stack {
-    (new_stack.base() as *mut usize).offset(-4)
-  } else {
-    // Just pass a dummy pointer if we aren't linking the stack
-    &mut dummy
-  };
-
+pub unsafe fn swap_link(arg: usize, new_sp: StackPointer,
+                        new_stack_base: *mut u8) -> (usize, Option<StackPointer>) {
   let ret: usize;
-  let ret_sp: *mut usize;
+  let ret_sp: usize;
   asm!(
     r#"
         # Set up the link register
-        adr     lr, 0f
+        adr     x30, 0f
 
         # Save the frame pointer and link register; the unwinder uses them to find
         # the CFA of the caller, and so they have to have the correct value immediately
@@ -194,7 +205,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
 
         # Link the call stacks together by writing the current stack bottom
         # address to the CFA slot in the new stack.
-        str     x1, [x3]
+        str     x1, [x3, #-32]
 
         # Load stack pointer of the new context.
         mov     sp, x2
@@ -212,8 +223,8 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
       "={x1}" (ret_sp)
     : "{x0}" (arg)
       "{x2}" (new_sp.0)
-      "{x3}" (new_cfa)
-    :/*x0,   "x1",*/"x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+      "{x3}" (new_stack_base)
+    :/*"x0", "x1",*/"x2",  "x3",  "x4",  "x5",  "x6",  "x7",
       "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
       "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
       "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/
@@ -228,5 +239,76 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
       // the "alignstack" LLVM inline assembly option does exactly the same
       // thing on AArch64.
     : "volatile", "alignstack");
-  (ret, StackPointer(ret_sp))
+  (ret, mem::transmute(ret_sp))
+}
+
+#[inline(always)]
+pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) {
+  // This is identical to swap_link, but without the write to the CFA slot.
+  let ret: usize;
+  let ret_sp: usize;
+  asm!(
+    r#"
+        adr     x30, 0f
+        stp     x29, x30, [sp, #-16]!
+        mov     x1, sp
+        mov     sp, x2
+        ldp     x29, x30, [sp], #16
+        br      x30
+      0:
+    "#
+    : "={x0}" (ret)
+      "={x1}" (ret_sp)
+    : "{x0}" (arg)
+      "{x2}" (new_sp.0)
+    :/*"x0", "x1",*/"x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+      "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+      "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+      "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/
+      "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+      "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+      "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+      "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
+      "cc", "memory"
+    : "volatile", "alignstack");
+  (ret, mem::transmute(ret_sp))
+}
+
+#[inline(always)]
+pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) {
+  // Argument to pass to start_unwind, based on the stack base address.
+  let arg = unwind::unwind_arg(new_stack_base);
+
+  // This is identical to swap_link, except that it performs a tail call to
+  // start_unwind instead of returning into the target context.
+  asm!(
+    r#"
+        adr     x30, 0f
+        stp     x29, x30, [sp, #-16]!
+        mov     x1, sp
+        str     x1, [x3, #-32]
+        mov     sp, x2
+        ldp     x29, x30, [sp], #16
+
+        # Jump to the start_unwind function, which will force a stack unwind in
+        # the target context. This will eventually return to us through the
+        # stack link.
+        b       ${0}
+      0:
+    "#
+    :
+    : "s" (unwind::start_unwind as usize)
+      "{x0}" (arg)
+      "{x2}" (new_sp.0)
+      "{x3}" (new_stack_base)
+    : "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+      "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+      "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+      "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/
+      "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+      "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+      "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+      "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
+      "cc", "memory"
+    : "volatile", "alignstack");
 }