Fix small struct passing on ppc
diff --git a/ChangeLog b/ChangeLog
index 92e0b8b..9f7a1a4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2013-02-08  Andreas Tobler  <andreast@fgznet.ch>
+
+	* src/powerpc/ffi.c (ffi_prep_cif_machdep): Fix small struct
+	support.
+	* src/powerpc/sysv.S: Ditto.
+
 2013-02-08  Anthony Green <green@moxielogic.com>
 
 	* testsuite/libffi.call/cls_longdouble.c: Remove xfail for
diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c
index add43e0..f3a96a1 100644
--- a/src/powerpc/ffi.c
+++ b/src/powerpc/ffi.c
@@ -48,6 +48,11 @@
 
   FLAG_RETURNS_128BITS  = 1 << (31-27), /* cr6  */
 
+  FLAG_SYSV_SMST_R4     = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
+					   structs.  */
+  FLAG_SYSV_SMST_R3     = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
+					   structs.  */
+
   FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
 #ifndef __NO_FPRS__
   FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */
@@ -692,18 +697,35 @@
       break;
 
     case FFI_TYPE_STRUCT:
-      /*
-       * The final SYSV ABI says that structures smaller or equal 8 bytes
-       * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
-       * in memory.
-       *
-       * NOTE: The assembly code can safely assume that it just needs to
-       *       store both r3 and r4 into a 8-byte word-aligned buffer, as
-       *       we allocate a temporary buffer in ffi_call() if this flag is
-       *       set.
-       */
-      if (cif->abi == FFI_SYSV && size <= 8)
-	flags |= FLAG_RETURNS_SMST;
+      if (cif->abi == FFI_SYSV)
+	{
+	  /* The final SYSV ABI says that structures smaller or equal 8 bytes
+	     are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+	     in memory.  */
+
+	  /* Treat structs with size <= 8 bytes.  */
+	  if (size <= 8)
+	    {
+	      flags |= FLAG_RETURNS_SMST;
+	      /* These structs are returned in r3. We pack the type and the
+		 precalculated shift value (needed in the sysv.S) into flags.
+		 The same applies for the structs returned in r3/r4.  */
+	      if (size <= 4)
+		{
+		  flags |= FLAG_SYSV_SMST_R3;
+		  flags |= 8 * (4 - size) << 8;
+		  break;
+		}
+	      /* These structs are returned in r3 and r4. See above.   */
+	      if  (size <= 8)
+		{
+		  flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
+		  flags |= 8 * (8 - size) << 8;
+		  break;
+		}
+	    }
+	}
+
       intarg_count++;
       flags |= FLAG_RETVAL_REFERENCE;
       /* Fall through.  */
diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S
index 675ed03..5ee3a19 100644
--- a/src/powerpc/sysv.S
+++ b/src/powerpc/sysv.S
@@ -142,14 +142,19 @@
 #endif
 
 L(small_struct_return_value):
-	/*
-	 * The C code always allocates a properly-aligned 8-byte bounce
-	 * buffer to make this assembly code very simple.  Just write out
-	 * r3 and r4 to the buffer to allow the C code to handle the rest.
-	 */
-	stw %r3, 0(%r30)
-	stw %r4, 4(%r30)
-	b L(done_return_value)
+	extrwi	%r6,%r31,2,19         /* number of bytes padding = shift/8 */
+	mtcrf	0x02,%r31	      /* copy flags to cr[24:27] (cr6) */
+	extrwi	%r5,%r31,5,19         /* r5 <- number of bits of padding */
+	subfic  %r6,%r6,4             /* r6 <- number of useful bytes in r3 */
+	bf-	25,L(done_return_value) /* struct in r3 ? if not, done. */
+/* smst_one_register: */
+	slw	%r3,%r3,%r5           /* Left-justify value in r3 */
+	mtxer	%r6                   /* move byte count to XER ... */
+	stswx	%r3,0,%r30            /* ... and store that many bytes */
+	bf+	26,L(done_return_value)  /* struct in r3:r4 ? */
+	add	%r6,%r6,%r30          /* adjust pointer */
+	stswi	%r4,%r6,4             /* store last four bytes */
+	b	L(done_return_value)
 
 .LFE1:
 END(ffi_call_SYSV)
diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 564be13..9f0fded 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -529,6 +529,7 @@
   /* SPARC v8 requires 5 instructions for flush to be visible */
   asm volatile ("nop; nop; nop; nop; nop");
 #endif
+#else
   ffi_flush_icache (closure, 16);
 #endif