aarch64: fix index base register for AArch64

The base is passed in `x3`, not in `x2`.  This fixes the indexing base
so that the right value is used.
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
index 086f7f6..d12bf27 100644
--- a/src/aarch64/sysv.S
+++ b/src/aarch64/sysv.S
@@ -296,7 +296,7 @@
 	nop
 8:	ldr	s3, [x3, #12]		/* S4 */
 	nop
-9:	ldr	s2, [x2, #8]		/* S3 */
+9:	ldr	s2, [x3, #8]		/* S3 */
 	nop
 10:	ldp	s0, s1, [x3]		/* S2 */
 	b	99f
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index da10465..5eecc57 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -82,4 +82,5 @@
 libffi.call/va_struct3.c \
 libffi.call/strlen2.c \
 libffi.call/strlen3.c \
-libffi.call/strlen4.c
+libffi.call/strlen4.c \
+libffi.call/cls_3float.c
diff --git a/testsuite/libffi.call/cls_3float.c b/testsuite/libffi.call/cls_3float.c
new file mode 100644
index 0000000..48888f8
--- /dev/null
+++ b/testsuite/libffi.call/cls_3float.c
@@ -0,0 +1,95 @@
+/* Area:	ffi_call, closure_call
+   Purpose:	Check structure passing with different structure size.
+		Depending on the ABI. Check overlapping.
+   Limitations:>none.
+   PR:		none.
+   Originator:	<compnerd@compnerd.org> 20171026	 */
+
+/* { dg-do run } */
+
+#include "ffitest.h"
+
+typedef struct cls_struct_3float {
+  float f;
+  float g;
+  float h;
+} cls_struct_3float;
+
+cls_struct_3float cls_struct_3float_fn(struct cls_struct_3float a1,
+				       struct cls_struct_3float a2)
+{
+  struct cls_struct_3float result;
+
+  result.f = a1.f + a2.f;
+  result.g = a1.g + a2.g;
+  result.h = a1.h + a2.h;
+
+  printf("%g %g %g %g %g %g: %g %g %g\n", a1.f, a1.g, a1.h,
+	 a2.f, a2.g, a2.h, result.f, result.g, result.h);
+
+  return result;
+}
+
+static void
+cls_struct_3float_gn(ffi_cif *cif __UNUSED__, void* resp, void **args,
+		     void* userdata __UNUSED__)
+{
+  struct cls_struct_3float a1, a2;
+
+  a1 = *(struct cls_struct_3float*)(args[0]);
+  a2 = *(struct cls_struct_3float*)(args[1]);
+
+  *(cls_struct_3float*)resp = cls_struct_3float_fn(a1, a2);
+}
+
+int main (void)
+{
+  ffi_cif cif;
+  void *code;
+  ffi_closure *pcl = ffi_closure_alloc(sizeof(ffi_closure), &code);
+  void *args_dbl[3];
+  ffi_type* cls_struct_fields[4];
+  ffi_type cls_struct_type;
+  ffi_type* dbl_arg_types[3];
+
+  struct cls_struct_3float g_dbl = { 1.0f, 2.0f, 3.0f };
+  struct cls_struct_3float f_dbl = { 1.0f, 2.0f, 3.0f };
+  struct cls_struct_3float res_dbl;
+
+  cls_struct_fields[0] = &ffi_type_float;
+  cls_struct_fields[1] = &ffi_type_float;
+  cls_struct_fields[2] = &ffi_type_float;
+  cls_struct_fields[3] = NULL;
+
+  cls_struct_type.size = 0;
+  cls_struct_type.alignment = 0;
+  cls_struct_type.type = FFI_TYPE_STRUCT;
+  cls_struct_type.elements = cls_struct_fields;
+
+  dbl_arg_types[0] = &cls_struct_type;
+  dbl_arg_types[1] = &cls_struct_type;
+  dbl_arg_types[2] = NULL;
+
+  CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &cls_struct_type,
+		     dbl_arg_types) == FFI_OK);
+
+  args_dbl[0] = &g_dbl;
+  args_dbl[1] = &f_dbl;
+  args_dbl[2] = NULL;
+
+  ffi_call(&cif, FFI_FN(cls_struct_3float_fn), &res_dbl, args_dbl);
+  /* { dg-output "1 2 3 1 2 3: 2 4 6" } */
+  printf("res: %g %g %g\n", res_dbl.f, res_dbl.g, res_dbl.h);
+  /* { dg-output "\nres: 2 4 6" } */
+
+  CHECK(ffi_prep_closure_loc(pcl, &cif, cls_struct_3float_gn, NULL, code) ==
+	FFI_OK);
+
+  res_dbl = ((cls_struct_3float(*)(cls_struct_3float,
+				   cls_struct_3float))(code))(g_dbl, f_dbl);
+  /* { dg-output "\n1 2 3 1 2 3: 2 4 6" } */
+  printf("res: %g %g %g\n", res_dbl.f, res_dbl.g, res_dbl.h);
+  /* { dg-output "\nres: 2 4 6" } */
+
+  exit(0);
+}