Merge pull request #406 from trofi/master

ia64: fix variadic function closures with FP arguments
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
index b64a6b9..4400b61 100644
--- a/src/aarch64/ffi.c
+++ b/src/aarch64/ffi.c
@@ -238,18 +238,13 @@
    state.
 
    The terse state variable names match the names used in the AARCH64
-   PCS.
-
-   The struct area is allocated downwards from the top of the argument
-   area.  It is used to hold copies of structures passed by value that are
-   bigger than 16 bytes.  */
+   PCS. */
 
 struct arg_state
 {
   unsigned ngrn;                /* Next general-purpose register number. */
   unsigned nsrn;                /* Next vector register number. */
   size_t nsaa;                  /* Next stack offset. */
-  size_t next_struct_area;	/* Place to allocate big structs. */
 
 #if defined (__APPLE__)
   unsigned allocating_variadic;
@@ -258,12 +253,11 @@
 
 /* Initialize a procedure call argument marshalling state.  */
 static void
-arg_init (struct arg_state *state, size_t size)
+arg_init (struct arg_state *state)
 {
   state->ngrn = 0;
   state->nsrn = 0;
   state->nsaa = 0;
-  state->next_struct_area = size;
 #if defined (__APPLE__)
   state->allocating_variadic = 0;
 #endif
@@ -292,21 +286,6 @@
   return (char *)stack + nsaa;
 }
 
-/* Allocate and copy a structure that is passed by value on the stack and
-   return a pointer to it.  */
-static void *
-allocate_and_copy_struct_to_stack (struct arg_state *state, void *stack,
-				   size_t alignment, size_t size, void *value)
-{
-  size_t dest = state->next_struct_area - size;
-
-  /* Round down to the natural alignment of the value.  */
-  dest = ALIGN_DOWN (dest, alignment);
-  state->next_struct_area = dest;
-
-  return memcpy ((char *) stack + dest, value, size);
-}
-
 static ffi_arg
 extend_integer_type (void *source, int type)
 {
@@ -612,14 +591,13 @@
   frame = stack + stack_bytes;
   rvalue = (rsize ? frame + 32 : orig_rvalue);
 
-  arg_init (&state, stack_bytes);
+  arg_init (&state);
   for (i = 0, nargs = cif->nargs; i < nargs; i++)
     {
       ffi_type *ty = cif->arg_types[i];
       size_t s = ty->size;
       void *a = avalue[i];
       int h, t;
-      void *dest;
 
       t = ty->type;
       switch (t)
@@ -667,6 +645,8 @@
 	case FFI_TYPE_STRUCT:
 	case FFI_TYPE_COMPLEX:
 	  {
+	    void *dest;
+
 	    h = is_vfp_type (ty);
 	    if (h)
 	      {
@@ -684,12 +664,9 @@
 	    else if (s > 16)
 	      {
 		/* If the argument is a composite type that is larger than 16
-		   bytes, then the argument is copied to memory, and
+		   bytes, then the argument has been copied to memory, and
 		   the argument is replaced by a pointer to the copy.  */
-		dest = allocate_and_copy_struct_to_stack (&state, stack,
-							  ty->alignment, s,
-							  avalue[i]);
-		a = &dest;
+		a = &avalue[i];
 		t = FFI_TYPE_POINTER;
 		s = sizeof (void *);
 		goto do_pointer;
@@ -858,7 +835,7 @@
   int i, h, nargs, flags;
   struct arg_state state;
 
-  arg_init (&state, cif->bytes);
+  arg_init (&state);
 
   for (i = 0, nargs = cif->nargs; i < nargs; i++)
     {
diff --git a/src/ia64/unix.S b/src/ia64/unix.S
index 4733377..e2547e0 100644
--- a/src/ia64/unix.S
+++ b/src/ia64/unix.S
@@ -175,7 +175,6 @@
 	;;
 
 .Lst_small_struct:
-	add	sp = -16, sp
 	cmp.lt	p6, p0 = 8, in3
 	cmp.lt	p7, p0 = 16, in3
 	cmp.lt	p8, p0 = 24, in3
@@ -191,6 +190,12 @@
 (p8)	st8	[r18] = r11
 	mov	out1 = sp
 	mov	out2 = in3
+	;;
+	// ia64 software calling convention requires
+	// top 16 bytes of stack to be scratch space
+	// PLT resolver uses that scratch space at
+	// 'memcpy' symbol reolution time
+	add	sp = -16, sp
 	br.call.sptk.many b0 = memcpy#
 	;;
 	mov	ar.pfs = loc0
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index 5eecc57..b367b46 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -68,6 +68,7 @@
 libffi.call/return_ldl.c \
 libffi.call/closure_fn5.c \
 libffi.call/struct6.c libffi.call/return_ll.c libffi.call/struct9.c	\
+libffi.call/struct10.c \
 libffi.call/return_sc.c libffi.call/struct7.c				\
 libffi.call/cls_align_uint64.c libffi.call/cls_4byte.c			\
 libffi.call/cls_6_1_byte.c			\
diff --git a/testsuite/libffi.call/struct10.c b/testsuite/libffi.call/struct10.c
new file mode 100644
index 0000000..17b1377
--- /dev/null
+++ b/testsuite/libffi.call/struct10.c
@@ -0,0 +1,57 @@
+/* Area:	ffi_call
+   Purpose:	Check structures.
+   Limitations:	none.
+   PR:		none.
+   Originator:	Sergei Trofimovich <slyfox@gentoo.org>
+
+   The test originally discovered in ruby's bindings
+   for ffi in https://bugs.gentoo.org/634190  */
+
+/* { dg-do run } */
+#include "ffitest.h"
+
+struct s {
+  int s32;
+  float f32;
+  signed char s8;
+};
+
+struct s make_s(void) {
+  struct s r;
+  r.s32 = 0x1234;
+  r.f32 = 7.0;
+  r.s8  = 0x78;
+  return r;
+}
+
+int main() {
+  ffi_cif cif;
+  struct s r;
+  ffi_type rtype;
+  ffi_type* s_fields[] = {
+    &ffi_type_sint,
+    &ffi_type_float,
+    &ffi_type_schar,
+    NULL,
+  };
+
+  rtype.size      = 0;
+  rtype.alignment = 0,
+  rtype.type      = FFI_TYPE_STRUCT,
+  rtype.elements  = s_fields,
+
+  r.s32 = 0xbad;
+  r.f32 = 999.999;
+  r.s8  = 0x51;
+
+  // Here we emulate the following call:
+  //r = make_s();
+
+  CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 0, &rtype, NULL) == FFI_OK);
+  ffi_call(&cif, FFI_FN(make_s), &r, NULL);
+
+  CHECK(r.s32 == 0x1234);
+  CHECK(r.f32 == 7.0);
+  CHECK(r.s8  == 0x78);
+  exit(0);
+}