Merge pull request #445 from andreas-schwab/master

RISC-V go closures
diff --git a/src/riscv/ffi.c b/src/riscv/ffi.c
index b744fdd..c910858 100644
--- a/src/riscv/ffi.c
+++ b/src/riscv/ffi.c
@@ -324,9 +324,12 @@
 }
 
 /* Low level routine for calling functions */
-extern void ffi_call_asm(void *stack, struct call_context *regs, void (*fn)(void)) FFI_HIDDEN;
+extern void ffi_call_asm (void *stack, struct call_context *regs,
+			  void (*fn) (void), void *closure) FFI_HIDDEN;
 
-void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+static void
+ffi_call_int (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
+	      void *closure)
 {
     /* this is a conservative estimate, assuming a complex return value and
        that all remaining arguments are long long / __int128 */
@@ -366,13 +369,26 @@
     for (i = 0; i < cif->nargs; i++)
         marshal(&cb, cif->arg_types[i], i >= cif->riscv_nfixedargs, avalue[i]);
 
-    ffi_call_asm((void*)alloc_base, cb.aregs, fn);
+    ffi_call_asm ((void *) alloc_base, cb.aregs, fn, closure);
 
     cb.used_float = cb.used_integer = 0;
     if (!return_by_ref && rvalue)
         unmarshal(&cb, cif->rtype, 0, rvalue);
 }
 
+void
+ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+  ffi_call_int(cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int(cif, fn, rvalue, avalue, closure);
+}
+
 extern void ffi_closure_asm(void) FFI_HIDDEN;
 
 ffi_status ffi_prep_closure_loc(ffi_closure *closure, ffi_cif *cif, void (*fun)(ffi_cif*,void*,void**,void*), void *user_data, void *codeloc)
@@ -406,11 +422,31 @@
     return FFI_OK;
 }
 
+extern void ffi_go_closure_asm (void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
+		     void (*fun) (ffi_cif *, void *, void **, void *))
+{
+  if (cif->abi <= FFI_FIRST_ABI || cif->abi >= FFI_LAST_ABI)
+    return FFI_BAD_ABI;
+
+  closure->tramp = (void *) ffi_go_closure_asm;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
 /* Called by the assembly code with aregs pointing to saved argument registers
    and stack pointing to the stacked arguments.  Return values passed in
    registers will be reloaded from aregs. */
-void FFI_HIDDEN ffi_closure_inner(size_t *stack, call_context *aregs, ffi_closure *closure) {
-    ffi_cif *cif = closure->cif;
+void FFI_HIDDEN
+ffi_closure_inner (ffi_cif *cif,
+		   void (*fun) (ffi_cif *, void *, void **, void *),
+		   void *user_data,
+		   size_t *stack, call_context *aregs)
+{
     void **avalue = alloca(cif->nargs * sizeof(void*));
     /* storage for arguments which will be copied by unmarshal().  We could
        theoretically avoid the copies in many cases and use at most 128 bytes
@@ -436,7 +472,7 @@
         avalue[i] = unmarshal(&cb, cif->arg_types[i],
             i >= cif->riscv_nfixedargs, astorage + i*MAXCOPYARG);
 
-    (closure->fun)(cif, rvalue, avalue, closure->user_data);
+    fun (cif, rvalue, avalue, user_data);
 
     if (!return_by_ref && cif->rtype->type != FFI_TYPE_VOID) {
         cb.used_integer = cb.used_float = 0;
diff --git a/src/riscv/ffitarget.h b/src/riscv/ffitarget.h
index fcaa899..75e6462 100644
--- a/src/riscv/ffitarget.h
+++ b/src/riscv/ffitarget.h
@@ -59,6 +59,7 @@
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
 #define FFI_TRAMPOLINE_SIZE 24
 #define FFI_NATIVE_RAW_API 0
 #define FFI_EXTRA_CIF_FIELDS unsigned riscv_nfixedargs; unsigned riscv_unused;
diff --git a/src/riscv/sysv.S b/src/riscv/sysv.S
index 2d09865..522d0b0 100644
--- a/src/riscv/sysv.S
+++ b/src/riscv/sysv.S
@@ -67,8 +67,8 @@
       intreg pad[rv32 ? 2 : 0];
       intreg save_fp, save_ra;
   }
-  void ffi_call_asm(size_t *stackargs, struct call_context *regargs,
-      void (*fn)(void));
+  void ffi_call_asm (size_t *stackargs, struct call_context *regargs,
+                     void (*fn) (void), void *closure);
 */
 
 #define FRAME_LEN (8 * FLTS + 8 * PTRS + 16)
@@ -98,6 +98,7 @@
 
     # Load arguments
     mv      t1, a2
+    mv      t2, a3
 
 #if FLTS
     FLARG   fa0, -FRAME_LEN+0*FLTS(fp)
@@ -145,8 +146,10 @@
 
 /*
   ffi_closure_asm. Expects address of the passed-in ffi_closure in t1.
-  void ffi_closure_inner(size_t *stackargs, struct call_context *regargs,
-      ffi_closure *closure);
+  void ffi_closure_inner (ffi_cif *cif,
+		          void (*fun) (ffi_cif *, void *, void **, void *),
+		          void *user_data,
+		          size_t *stackargs, struct call_context *regargs)
 */
 
     .globl ffi_closure_asm
@@ -187,9 +190,11 @@
     SARG    a7, 8*FLTS+7*PTRS(sp)
 
     /* enter C */
-    addi    a0, sp, FRAME_LEN
-    mv      a1, sp
-    mv      a2, t1
+    LARG    a0, FFI_TRAMPOLINE_SIZE+0*PTRS(t1)
+    LARG    a1, FFI_TRAMPOLINE_SIZE+1*PTRS(t1)
+    LARG    a2, FFI_TRAMPOLINE_SIZE+2*PTRS(t1)
+    addi    a3, sp, FRAME_LEN
+    mv      a4, sp
 
     call    ffi_closure_inner
 
@@ -212,3 +217,77 @@
     ret
     .cfi_endproc
     .size ffi_closure_asm, .-ffi_closure_asm
+
+/*
+  ffi_go_closure_asm.  Expects address of the passed-in ffi_go_closure in t2.
+  void ffi_closure_inner (ffi_cif *cif,
+		          void (*fun) (ffi_cif *, void *, void **, void *),
+		          void *user_data,
+		          size_t *stackargs, struct call_context *regargs)
+*/
+
+    .globl ffi_go_closure_asm
+    .hidden ffi_go_closure_asm
+    .type ffi_go_closure_asm, @function
+ffi_go_closure_asm:
+    .cfi_startproc
+
+    addi    sp,  sp, -FRAME_LEN
+    .cfi_def_cfa_offset FRAME_LEN
+
+    /* make a frame */
+    SARG    fp, FRAME_LEN - 2*PTRS(sp)
+    .cfi_offset 8, -2*PTRS
+    SARG    ra, FRAME_LEN - 1*PTRS(sp)
+    .cfi_offset 1, -1*PTRS
+    addi    fp, sp, FRAME_LEN
+
+    /* save arguments */
+#if FLTS
+    FSARG   fa0, 0*FLTS(sp)
+    FSARG   fa1, 1*FLTS(sp)
+    FSARG   fa2, 2*FLTS(sp)
+    FSARG   fa3, 3*FLTS(sp)
+    FSARG   fa4, 4*FLTS(sp)
+    FSARG   fa5, 5*FLTS(sp)
+    FSARG   fa6, 6*FLTS(sp)
+    FSARG   fa7, 7*FLTS(sp)
+#endif
+
+    SARG    a0, 8*FLTS+0*PTRS(sp)
+    SARG    a1, 8*FLTS+1*PTRS(sp)
+    SARG    a2, 8*FLTS+2*PTRS(sp)
+    SARG    a3, 8*FLTS+3*PTRS(sp)
+    SARG    a4, 8*FLTS+4*PTRS(sp)
+    SARG    a5, 8*FLTS+5*PTRS(sp)
+    SARG    a6, 8*FLTS+6*PTRS(sp)
+    SARG    a7, 8*FLTS+7*PTRS(sp)
+
+    /* enter C */
+    LARG    a0, 1*PTRS(t2)
+    LARG    a1, 2*PTRS(t2)
+    mv      a2, t2
+    addi    a3, sp, FRAME_LEN
+    mv      a4, sp
+
+    call    ffi_closure_inner
+
+    /* return values */
+#if FLTS
+    FLARG   fa0, 0*FLTS(sp)
+    FLARG   fa1, 1*FLTS(sp)
+#endif
+
+    LARG    a0, 8*FLTS+0*PTRS(sp)
+    LARG    a1, 8*FLTS+1*PTRS(sp)
+
+    /* restore and return */
+    LARG    ra, FRAME_LEN-1*PTRS(sp)
+    .cfi_restore 1
+    LARG    fp, FRAME_LEN-2*PTRS(sp)
+    .cfi_restore 8
+    addi    sp, sp, FRAME_LEN
+    .cfi_def_cfa_offset 0
+    ret
+    .cfi_endproc
+    .size ffi_go_closure_asm, .-ffi_go_closure_asm