libffcall
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Libffcall] Return small structs in registers for powerpc on openbsd


From: Josh Elsasser
Subject: Re: [Libffcall] Return small structs in registers for powerpc on openbsd
Date: Thu, 17 Oct 2019 14:08:47 -0700
User-agent: Mutt/1.5.22 (2013-10-16)

On Thu, Oct 17, 2019 at 11:16:23AM -0700, Josh Elsasser wrote:
> Return small structs in registers for powerpc on openbsd. This should
> also be needed for netbsd but I have not tested that yet.
> 
> Replace the unused powerpc small-struct-copying code with hppa's,
> which copies structs of all sizes.
> 
> OpenBSD additionally requires that the secure plt abi is used for pic
> function references on powerpc. Add gcc 4.1.2 and use it to build a
> secure-plt pic copy of the vacall_r asm. This isn't needed for avcall
> or vacall.

And once again, with the syntax error fixed...

diff --git .gitignore .gitignore
index 447b983..ed25615 100644
--- .gitignore
+++ .gitignore
@@ -211,6 +211,7 @@
 /callback/vacall_r/vacall-powerpc-linux.s
 /callback/vacall_r/vacall-powerpc-linux-macro.S
 /callback/vacall_r/vacall-powerpc-macos.s
+/callback/vacall_r/vacall-powerpc-secplt-macro.S
 /callback/vacall_r/vacall-powerpc-sysv4-macro.S
 /callback/vacall_r/vacall-powerpc64-aix.s
 /callback/vacall_r/vacall-powerpc64-linux.S
diff --git avcall/avcall-internal.h avcall/avcall-internal.h
index 0357a77..35ea8b6 100644
--- avcall/avcall-internal.h
+++ avcall/avcall-internal.h
@@ -191,7 +191,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - 
(int)sizeof(__av_alist)
 #define __av_start_struct3(LIST)  \
   ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
 #endif
-#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__)) || (defined(__s390__) && 
!defined(__s390x__))
+#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__) && !defined(__OpenBSD__)) || 
(defined(__s390__) && !defined(__s390x__))
 #define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   ((TYPE_SIZE) == 1 || (TYPE_SIZE) == 2 || (TYPE_SIZE) == 4            \
    || ((TYPE_SIZE) == 8 && (TYPE_SPLITTABLE)                           \
@@ -247,6 +247,15 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND - 
(int)sizeof(__av_alist)
 #define __av_start_struct3(LIST)  \
   ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
 #endif
+#if defined(__powerpc__) && !defined(__powerpc64__) && defined(__OpenBSD__)
+#define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
+  ((TYPE_SIZE) <= 8)
+/* Turn on __AV_REGISTER_STRUCT_RETURN if __AV_SMALL_STRUCT_RETURN was set
+ * and the struct will actually be returned in registers.
+ */
+#define __av_start_struct3(LIST)  \
+  ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
+#endif
 #if (defined(__powerpc64__) && !defined(__powerpc64_elfv2__)) || 
defined(__s390x__)
 #define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   0
diff --git avcall/avcall-powerpc.c avcall/avcall-powerpc.c
index 5d1b6f8..17cbde8 100644
--- avcall/avcall-powerpc.c
+++ avcall/avcall-powerpc.c
@@ -196,19 +196,96 @@ avcall_call(av_alist* list)
   } else
   if (l->rtype == __AVstruct) {
     if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
-      if (l->rsize == sizeof(char)) {
-        RETURN(char, i);
-      } else
-      if (l->rsize == sizeof(short)) {
-        RETURN(short, i);
-      } else
-      if (l->rsize == sizeof(int)) {
-        RETURN(int, i);
-      } else
-      if (l->rsize == 2*sizeof(__avword)) {
+      if (l->rsize > 0 && l->rsize <= 8) {
         void* raddr = l->raddr;
-        ((__avword*)raddr)[0] = i;
-        ((__avword*)raddr)[1] = iret2;
+        #if 0 /* Unoptimized */
+        if (l->rsize == 1) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i);
+        } else
+        if (l->rsize == 2) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i);
+        } else
+        if (l->rsize == 3) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i);
+        } else
+        if (l->rsize == 4) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>24);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[3] = (unsigned char)(i);
+        } else
+        if (l->rsize == 5) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i);
+          ((unsigned char *)raddr)[1] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[2] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2);
+        } else
+        if (l->rsize == 6) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i);
+          ((unsigned char *)raddr)[2] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[5] = (unsigned char)(iret2);
+        } else
+        if (l->rsize == 7) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i);
+          ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[5] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[6] = (unsigned char)(iret2);
+        } else
+        if (l->rsize == 8) {
+          ((unsigned char *)raddr)[0] = (unsigned char)(i>>24);
+          ((unsigned char *)raddr)[1] = (unsigned char)(i>>16);
+          ((unsigned char *)raddr)[2] = (unsigned char)(i>>8);
+          ((unsigned char *)raddr)[3] = (unsigned char)(i);
+          ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>24);
+          ((unsigned char *)raddr)[5] = (unsigned char)(iret2>>16);
+          ((unsigned char *)raddr)[6] = (unsigned char)(iret2>>8);
+          ((unsigned char *)raddr)[7] = (unsigned char)(iret2);
+        }
+       #else /* Optimized: fewer conditional jumps, fewer memory accesses */
+        uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avword) */
+        __avword* wordaddr = (__avword*)((uintptr_t)raddr & 
~(uintptr_t)(sizeof(__avword)-1));
+        uintptr_t start_offset = (uintptr_t)raddr & 
(uintptr_t)(sizeof(__avword)-1); /* ≥ 0, < sizeof(__avword) */
+        uintptr_t end_offset = start_offset + count; /* > 0, < 
3*sizeof(__avword) */
+        if (count <= sizeof(__avword)) {
+          /* Use iret. */
+          if (end_offset <= sizeof(__avword)) {
+            /* 0 < end_offset ≤ sizeof(__avword) */
+            __avword mask0 = ((__avword)2 << 
(sizeof(__avword)*8-start_offset*8-1)) - ((__avword)1 << 
(sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ (i << 
(sizeof(__avword)*8-end_offset*8))) & mask0;
+          } else {
+            /* sizeof(__avword) < end_offset < 2*sizeof(__avword), 
start_offset > 0 */
+            __avword mask0 = ((__avword)2 << 
(sizeof(__avword)*8-start_offset*8-1)) - 1;
+            __avword mask1 = - ((__avword)1 << 
(2*sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ (i >> 
(end_offset*8-sizeof(__avword)*8))) & mask0;
+            wordaddr[1] ^= (wordaddr[1] ^ (i << 
(2*sizeof(__avword)*8-end_offset*8))) & mask1;
+          }
+        } else {
+          /* Use iret, iret2. */
+          __avword mask0 = ((__avword)2 << 
(sizeof(__avword)*8-start_offset*8-1)) - 1;
+          if (end_offset <= 2*sizeof(__avword)) {
+            /* sizeof(__avword) < end_offset ≤ 2*sizeof(__avword) */
+            __avword mask1 = - ((__avword)1 << 
(2*sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ ((i << 
(2*sizeof(__avword)*8-end_offset*8)) | (iret2 >> 
(end_offset*4-sizeof(__avword)*4) >> (end_offset*4-sizeof(__avword)*4)))) & 
mask0;
+            wordaddr[1] ^= (wordaddr[1] ^ (iret2 << 
(2*sizeof(__avword)*8-end_offset*8))) & mask1;
+          } else {
+            /* 2*sizeof(__avword) < end_offset < 3*sizeof(__avword), 
start_offset > 0 */
+            __avword mask2 = - ((__avword)1 << 
(3*sizeof(__avword)*8-end_offset*8));
+            wordaddr[0] ^= (wordaddr[0] ^ (i >> 
(end_offset*8-2*sizeof(__avword)*8))) & mask0;
+            wordaddr[1] = (i << (3*sizeof(__avword)*8-end_offset*8)) | (iret2 
>> (end_offset*8-2*sizeof(__avword)*8));
+            wordaddr[2] ^= (wordaddr[2] ^ (iret2 << 
(3*sizeof(__avword)*8-end_offset*8))) & mask2;
+          }
+        }
+      #endif
       }
     }
   }
diff --git avcall/avcall.h avcall/avcall.h
index f429dc8..525a25e 100644
--- avcall/avcall.h
+++ avcall/avcall.h
@@ -140,7 +140,7 @@ enum __AV_alist_flags
 #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && 
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
                                  __AV_SUNPROCC_STRUCT_RETURN,
 #else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || 
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || 
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) && 
!defined(__powerpc64__) && defined(__OpenBSD__)) || 
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || 
defined(__riscv32__) || defined(__riscv64__)
                                  __AV_SMALL_STRUCT_RETURN |
 #endif
 #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && 
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
diff --git callback/vacall_r/Makefile.devel callback/vacall_r/Makefile.devel
index d596091..3899b52 100644
--- callback/vacall_r/Makefile.devel
+++ callback/vacall_r/Makefile.devel
@@ -21,7 +21,7 @@ precompiled : \
   vacall-hppa-macro.S vacall-hppa64-macro.S \
   vacall-arm-macro.S vacall-armhf-macro.S \
   vacall-arm64-macro.S \
-  vacall-powerpc-aix.s vacall-powerpc-linux-macro.S 
vacall-powerpc-sysv4-macro.S vacall-powerpc-macos.s vacall-powerpc64-aix.s 
vacall-powerpc64-linux.S vacall-powerpc64-elfv2-linux.S \
+  vacall-powerpc-aix.s vacall-powerpc-linux-macro.S 
vacall-powerpc-sysv4-macro.S vacall-powerpc-secplt-macro.S 
vacall-powerpc-macos.s vacall-powerpc64-aix.s vacall-powerpc64-linux.S 
vacall-powerpc64-elfv2-linux.S \
   vacall-ia64-macro.S \
   vacall-x86_64-macro.S vacall-x86_64-x32-linux.s 
vacall-x86_64-windows-macro.S \
   vacall-s390-macro.S vacall-s390x-macro.S \
@@ -157,6 +157,11 @@ vacall-powerpc-sysv4-macro.S : 
../../vacall/vacall-powerpc.c ../../vacall/vacall
        (../../common/asm-powerpc.sh < vacall-powerpc-sysv4.s ; cat 
../../common/noexecstack.h) > vacall-powerpc-sysv4-macro.S
        $(RM) vacall-powerpc-sysv4.s
 
+vacall-powerpc-secplt-macro.S : ../../vacall/vacall-powerpc.c 
../../vacall/vacall-internal.h vacall_r.h ../../common/asm-powerpc.sh 
../../common/noexecstack.h $(THISFILE)
+       $(CROSS_TOOL) powerpc-linux gcc -V 4.1.2 -mno-power -mno-power2 
-mno-powerpc -msecure-plt $(GCCFLAGS) -D__powerpc__ -S 
../../vacall/vacall-powerpc.c -I../../vacall -I. -o vacall-powerpc-secplt.s
+       (../../common/asm-powerpc.sh < vacall-powerpc-secplt.s ; cat 
../../common/noexecstack.h) > vacall-powerpc-secplt-macro.S
+       $(RM) vacall-powerpc-secplt.s
+
 vacall-powerpc-macos.s : ../../vacall/vacall-powerpc.c 
../../vacall/vacall-internal.h vacall_r.h $(THISFILE)
        $(CROSS_TOOL) powerpc-darwin gcc -V 3.3.6 $(GCCFLAGS) -D__powerpc__ -S 
../../vacall/vacall-powerpc.c -I../../vacall -I. -o vacall-powerpc-macos.s
 
diff --git callback/vacall_r/Makefile.in callback/vacall_r/Makefile.in
index 9d52db5..0120192 100644
--- callback/vacall_r/Makefile.in
+++ callback/vacall_r/Makefile.in
@@ -147,15 +147,16 @@ vacall-arm64.s : $(srcdir)/vacall-arm64-macro.S
 vacall-powerpc.lo : vacall-powerpc.s
        $(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c vacall-powerpc.s
 
-vacall-powerpc.s : $(srcdir)/vacall-powerpc-aix.s 
$(srcdir)/vacall-powerpc-linux-macro.S $(srcdir)/vacall-powerpc-macos.s 
$(srcdir)/vacall-powerpc-sysv4-macro.S
+vacall-powerpc.s : $(srcdir)/vacall-powerpc-aix.s 
$(srcdir)/vacall-powerpc-linux-macro.S $(srcdir)/vacall-powerpc-macos.s 
$(srcdir)/vacall-powerpc-sysv4-macro.S $(srcdir)/vacall-powerpc-secplt-macro.S
        case "$(OS)" in \
          aix*) syntax=aix;; \
-         linux* | netbsd* | openbsd*) syntax=linux;; \
+         linux* | netbsd*) syntax=linux;; \
+         openbsd*) syntax=secplt;; \
          macos* | darwin*) syntax=macos;; \
          *) syntax=sysv4;; \
        esac; \
        case $${syntax} in \
-         linux | netbsd | sysv4) \
+         linux | sysv4 | secplt) \
            $(CPP) $(ASPFLAGS) -I$(srcdir) 
$(srcdir)/vacall-powerpc-$${syntax}-macro.S | grep -v '^ *#line' | grep -v '^#' 
| sed -e 's,% ,%,g' -e 's,//,@,g' -e 's,\$$,#,g' > vacall-powerpc.s || exit 1 
;; \
          macos) \
            grep -v '\.machine' $(srcdir)/vacall-powerpc-$${syntax}.s > 
vacall-powerpc.s || exit 1 ;; \
diff --git callback/vacall_r/vacall_r.h callback/vacall_r/vacall_r.h
index 0e66c2e..8ff0c6c 100644
--- callback/vacall_r/vacall_r.h
+++ callback/vacall_r/vacall_r.h
@@ -167,7 +167,7 @@ enum __VA_alist_flags
 #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && 
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
                                  __VA_SUNPROCC_STRUCT_RETURN,
 #else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || 
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || 
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) && 
!defined(__powerpc64__) && defined(__OpenBSD__)) || 
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || 
defined(__riscv32__) || defined(__riscv64__)
                                  __VA_SMALL_STRUCT_RETURN |
 #endif
 #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && 
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
diff --git cross-tools/cross.conf cross-tools/cross.conf
index 23d3faa..e20abfe 100644
--- cross-tools/cross.conf
+++ cross-tools/cross.conf
@@ -16,7 +16,7 @@ arm64        aarch64-linux           5.4.0       2.24         
aarch64-linux
 rs6000       rs6000-aix              3.3.6       2.17         rs6000-aix
 rs6000       rs6000-aix6.1           5.4.0       2.17         rs6000-aix6.1
 powerpc      powerpc-darwin          3.3.6       2.17         ppc-macos
-powerpc      powerpc-linux           3.3.6       2.17         ppc-linux
+powerpc      powerpc-linux           3.3.6,4.1.2 2.17         ppc-linux
 powerpc64    powerpc64le-linux       5.4.0       2.24         powerpc64le-linux
 ia64         ia64-linux              4.0.1       2.16.1       ia64-linux
 x86_64       x86_64-linux            4.0.2,5.4.0 2.16.1       x86_64-linux
diff --git cross-tools/patches/gcc-4.1.2.patch 
cross-tools/patches/gcc-4.1.2.patch
new file mode 100644
index 0000000..297c241
--- /dev/null
+++ cross-tools/patches/gcc-4.1.2.patch
@@ -0,0 +1,21 @@
+diff -ru gcc-4.1.2/gcc/toplev.h gcc-4.1.2/gcc/toplev.h
+--- gcc-4.1.2/gcc/toplev.h     2005-09-08 17:47:05.000000000 -0700
++++ gcc-4.1.2/gcc/toplev.h     2019-10-16 10:05:49.377343593 -0700
+@@ -171,17 +171,6 @@
+ #  define CTZ_HWI __builtin_ctz
+ # endif
+ 
+-extern inline int
+-floor_log2 (unsigned HOST_WIDE_INT x)
+-{
+-  return x ? HOST_BITS_PER_WIDE_INT - 1 - (int) CLZ_HWI (x) : -1;
+-}
+-
+-extern inline int
+-exact_log2 (unsigned HOST_WIDE_INT x)
+-{
+-  return x == (x & -x) && x ? (int) CTZ_HWI (x) : -1;
+-}
+ #endif /* GCC_VERSION >= 3004 */
+ 
+ /* Functions used to get and set GCC's notion of in what directory
diff --git vacall/vacall-internal.h vacall/vacall-internal.h
index e29a6d4..c120405 100644
--- vacall/vacall-internal.h
+++ vacall/vacall-internal.h
@@ -279,7 +279,7 @@ typedef struct vacall_alist
 #define __va_start_struct1(LIST,TYPE_SIZE,TYPE_ALIGN,TYPE_SPLITTABLE)  \
   ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN, 0)
 #endif
-#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__)) || (defined(__s390__) && 
!defined(__s390x__))
+#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) || 
(defined(__powerpc__) && !defined(__powerpc64__) && !defined(__OpenBSD)) || 
(defined(__s390__) && !defined(__s390x__))
 #define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   ((TYPE_SIZE) == 1 || (TYPE_SIZE) == 2 || (TYPE_SIZE) == 4            \
    || ((TYPE_SIZE) == 8 && (TYPE_SPLITTABLE)                           \
@@ -355,6 +355,16 @@ typedef struct vacall_alist
     && ((LIST)->flags |= __VA_REGISTER_DOUBLESTRUCT_RETURN),                   
\
    0)
 #endif
+#if defined(__powerpc__) && !defined(__powerpc64__) && defined(__OpenBSD__)
+#define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
+  ((TYPE_SIZE) <= 8)
+/* Turn on __VA_REGISTER_STRUCT_RETURN if __VA_SMALL_STRUCT_RETURN was set
+ * and the struct will actually be returned in registers.
+ */
+#define __va_start_struct1(LIST,TYPE_SIZE,TYPE_ALIGN,TYPE_SPLITTABLE)  \
+  ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN,   \
+   0)
+#endif
 #if (defined(__powerpc64__) && !defined(__powerpc64_elfv2__)) || 
defined(__s390x__)
 #define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE)  \
   0
diff --git vacall/vacall-powerpc.c vacall/vacall-powerpc.c
index ea3e208..04a790e 100644
--- vacall/vacall-powerpc.c
+++ vacall/vacall-powerpc.c
@@ -155,18 +155,93 @@ vacall_receiver (__vaword word1, __vaword word2, __vaword 
word3, __vaword word4,
   } else
   if (list.rtype == __VAstruct) {
     if (list.flags & __VA_REGISTER_STRUCT_RETURN) {
-      if (list.rsize == sizeof(char)) {
-        iret = *(unsigned char *) list.raddr;
-      } else
-      if (list.rsize == sizeof(short)) {
-        iret = *(unsigned short *) list.raddr;
-      } else
-      if (list.rsize == sizeof(int)) {
-        iret = *(unsigned int *) list.raddr;
-      } else
-      if (list.rsize == 2*sizeof(__vaword)) {
-        iret  = ((__vaword *) list.raddr)[0];
-        iret2 = ((__vaword *) list.raddr)[1];
+      if (list.rsize > 0 && list.rsize <= 8) {
+        #if 0 /* Unoptimized */
+        if (list.rsize == 1) {
+          iret =   ((unsigned char *) list.raddr)[0];
+        } else
+        if (list.rsize == 2) {
+          iret =  (((unsigned char *) list.raddr)[0] << 8)
+                |  ((unsigned char *) list.raddr)[1];
+        } else
+        if (list.rsize == 3) {
+          iret =  (((unsigned char *) list.raddr)[0] << 16)
+                | (((unsigned char *) list.raddr)[1] << 8)
+                |  ((unsigned char *) list.raddr)[2];
+        } else
+        if (list.rsize == 4) {
+          iret =  (((unsigned char *) list.raddr)[0] << 24)
+                | (((unsigned char *) list.raddr)[1] << 16)
+                | (((unsigned char *) list.raddr)[2] << 8)
+                |  ((unsigned char *) list.raddr)[3];
+        } else
+        if (list.rsize == 5) {
+          iret  =   ((unsigned char *) list.raddr)[0];
+          iret2 =  (((unsigned char *) list.raddr)[1] << 24)
+                 | (((unsigned char *) list.raddr)[2] << 16)
+                 | (((unsigned char *) list.raddr)[3] << 8)
+                 |  ((unsigned char *) list.raddr)[4];
+        } else
+        if (list.rsize == 6) {
+          iret  =  (((unsigned char *) list.raddr)[0] << 8)
+                 |  ((unsigned char *) list.raddr)[1];
+          iret2 =  (((unsigned char *) list.raddr)[2] << 24)
+                 | (((unsigned char *) list.raddr)[3] << 16)
+                 | (((unsigned char *) list.raddr)[4] << 8)
+                 |  ((unsigned char *) list.raddr)[5];
+        } else
+        if (list.rsize == 7) {
+          iret  =  (((unsigned char *) list.raddr)[0] << 16)
+                 | (((unsigned char *) list.raddr)[1] << 8)
+                 |  ((unsigned char *) list.raddr)[2];
+          iret2 =  (((unsigned char *) list.raddr)[3] << 24)
+                 | (((unsigned char *) list.raddr)[4] << 16)
+                 | (((unsigned char *) list.raddr)[5] << 8)
+                 |  ((unsigned char *) list.raddr)[6];
+        } else
+        if (list.rsize == 8) {
+          iret  =  (((unsigned char *) list.raddr)[0] << 24)
+                 | (((unsigned char *) list.raddr)[1] << 16)
+                 | (((unsigned char *) list.raddr)[2] << 8)
+                 |  ((unsigned char *) list.raddr)[3];
+          iret2 =  (((unsigned char *) list.raddr)[4] << 24)
+                 | (((unsigned char *) list.raddr)[5] << 16)
+                 | (((unsigned char *) list.raddr)[6] << 8)
+                 |  ((unsigned char *) list.raddr)[7];
+        }
+        #else /* Optimized: fewer conditional jumps, fewer memory accesses */
+        uintptr_t count = list.rsize; /* > 0, ≤ 2*sizeof(__vaword) */
+        __vaword* wordaddr = (__vaword*)((uintptr_t)list.raddr & 
~(uintptr_t)(sizeof(__vaword)-1));
+        uintptr_t start_offset = (uintptr_t)list.raddr & 
(uintptr_t)(sizeof(__vaword)-1); /* ≥ 0, < sizeof(__vaword) */
+        uintptr_t end_offset = start_offset + count; /* > 0, < 
3*sizeof(__vaword) */
+        if (count <= sizeof(__vaword)) {
+          /* Assign iret. */
+          __vaword mask0 = ((__vaword)2 << 
(sizeof(__vaword)*8-start_offset*8-1)) - 1;
+          if (end_offset <= sizeof(__vaword)) {
+            /* 0 < end_offset ≤ sizeof(__vaword) */
+            iret = (wordaddr[0] & mask0) >> (sizeof(__vaword)*8-end_offset*8);
+          } else {
+            /* sizeof(__vaword) < end_offset < 2*sizeof(__vaword), 
start_offset > 0 */
+            iret = ((wordaddr[0] & mask0) << (end_offset*8-sizeof(__vaword)*8))
+                   | (wordaddr[1] >> (2*sizeof(__vaword)*8-end_offset*8));
+          }
+        } else {
+          /* Assign iret, iret2. */
+          __vaword mask0 = ((__vaword)2 << 
(sizeof(__vaword)*8-start_offset*8-1)) - 1;
+          if (end_offset <= 2*sizeof(__vaword)) {
+            /* sizeof(__vaword) < end_offset ≤ 2*sizeof(__vaword) */
+            iret = (wordaddr[0] & mask0) >> 
(2*sizeof(__vaword)*8-end_offset*8);
+            iret2 = ((wordaddr[0] & mask0) << 
(end_offset*4-sizeof(__vaword)*4) << (end_offset*4-sizeof(__vaword)*4))
+                    | (wordaddr[1] >> (2*sizeof(__vaword)*8-end_offset*8));
+          } else {
+            /* 2*sizeof(__vaword) < end_offset < 3*sizeof(__vaword), 
start_offset > 0 */
+            iret = ((wordaddr[0] & mask0) << 
(end_offset*8-2*sizeof(__vaword)*8))
+                   | (wordaddr[1] >> (3*sizeof(__vaword)*8-end_offset*8));
+            iret2 = (wordaddr[1] << (end_offset*8-2*sizeof(__vaword)*8))
+                    | (wordaddr[2] >> (3*sizeof(__vaword)*8-end_offset*8));
+          }
+        }
+        #endif
       }
     }
   }
diff --git vacall/vacall.h vacall/vacall.h
index 9485356..041588f 100644
--- vacall/vacall.h
+++ vacall/vacall.h
@@ -128,7 +128,7 @@ enum __VA_alist_flags
 #if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) && 
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
                                  __VA_SUNPROCC_STRUCT_RETURN,
 #else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) || 
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) || 
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) || 
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) || 
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) || 
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) || 
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) || 
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) && 
!defined(__powerpc64__) && defined(__OpenBSD__)) || 
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) || 
defined(__riscv32__) || defined(__riscv64__)
                                  __VA_SMALL_STRUCT_RETURN |
 #endif
 #if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) && 
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]