[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Libffcall] Return small structs in registers for powerpc on openbsd
From: |
Josh Elsasser |
Subject: |
Re: [Libffcall] Return small structs in registers for powerpc on openbsd |
Date: |
Thu, 17 Oct 2019 14:08:47 -0700 |
User-agent: |
Mutt/1.5.22 (2013-10-16) |
On Thu, Oct 17, 2019 at 11:16:23AM -0700, Josh Elsasser wrote:
> Return small structs in registers for powerpc on openbsd. This should
> also be needed for netbsd but I have not tested that yet.
>
> Replace the unused powerpc small-struct-copying code with hppa's,
> which copies structs of all sizes.
>
> OpenBSD additionally requires that the secure plt abi is used for pic
> function references on powerpc. Add gcc 4.1.2 and use it to build a
> secure-plt pic copy of the vacall_r asm. This isn't needed for avcall
> or vacall.
And once again, with the syntax error fixed...
diff --git .gitignore .gitignore
index 447b983..ed25615 100644
--- .gitignore
+++ .gitignore
@@ -211,6 +211,7 @@
/callback/vacall_r/vacall-powerpc-linux.s
/callback/vacall_r/vacall-powerpc-linux-macro.S
/callback/vacall_r/vacall-powerpc-macos.s
+/callback/vacall_r/vacall-powerpc-secplt-macro.S
/callback/vacall_r/vacall-powerpc-sysv4-macro.S
/callback/vacall_r/vacall-powerpc64-aix.s
/callback/vacall_r/vacall-powerpc64-linux.S
diff --git avcall/avcall-internal.h avcall/avcall-internal.h
index 0357a77..35ea8b6 100644
--- avcall/avcall-internal.h
+++ avcall/avcall-internal.h
@@ -191,7 +191,7 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND -
(int)sizeof(__av_alist)
#define __av_start_struct3(LIST) \
((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
#endif
-#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) ||
(defined(__powerpc__) && !defined(__powerpc64__)) || (defined(__s390__) &&
!defined(__s390x__))
+#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) ||
(defined(__powerpc__) && !defined(__powerpc64__) && !defined(__OpenBSD__)) ||
(defined(__s390__) && !defined(__s390x__))
#define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \
((TYPE_SIZE) == 1 || (TYPE_SIZE) == 2 || (TYPE_SIZE) == 4 \
|| ((TYPE_SIZE) == 8 && (TYPE_SPLITTABLE) \
@@ -247,6 +247,15 @@ typedef int __av_alist_verify[2*(__AV_ALIST_SIZE_BOUND -
(int)sizeof(__av_alist)
#define __av_start_struct3(LIST) \
((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
#endif
+#if defined(__powerpc__) && !defined(__powerpc64__) && defined(__OpenBSD__)
+#define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \
+ ((TYPE_SIZE) <= 8)
+/* Turn on __AV_REGISTER_STRUCT_RETURN if __AV_SMALL_STRUCT_RETURN was set
+ * and the struct will actually be returned in registers.
+ */
+#define __av_start_struct3(LIST) \
+ ((LIST).flags |= __AV_REGISTER_STRUCT_RETURN, 0)
+#endif
#if (defined(__powerpc64__) && !defined(__powerpc64_elfv2__)) ||
defined(__s390x__)
#define __av_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \
0
diff --git avcall/avcall-powerpc.c avcall/avcall-powerpc.c
index 5d1b6f8..17cbde8 100644
--- avcall/avcall-powerpc.c
+++ avcall/avcall-powerpc.c
@@ -196,19 +196,96 @@ avcall_call(av_alist* list)
} else
if (l->rtype == __AVstruct) {
if (l->flags & __AV_REGISTER_STRUCT_RETURN) {
- if (l->rsize == sizeof(char)) {
- RETURN(char, i);
- } else
- if (l->rsize == sizeof(short)) {
- RETURN(short, i);
- } else
- if (l->rsize == sizeof(int)) {
- RETURN(int, i);
- } else
- if (l->rsize == 2*sizeof(__avword)) {
+ if (l->rsize > 0 && l->rsize <= 8) {
void* raddr = l->raddr;
- ((__avword*)raddr)[0] = i;
- ((__avword*)raddr)[1] = iret2;
+ #if 0 /* Unoptimized */
+ if (l->rsize == 1) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i);
+ } else
+ if (l->rsize == 2) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i>>8);
+ ((unsigned char *)raddr)[1] = (unsigned char)(i);
+ } else
+ if (l->rsize == 3) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i>>16);
+ ((unsigned char *)raddr)[1] = (unsigned char)(i>>8);
+ ((unsigned char *)raddr)[2] = (unsigned char)(i);
+ } else
+ if (l->rsize == 4) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i>>24);
+ ((unsigned char *)raddr)[1] = (unsigned char)(i>>16);
+ ((unsigned char *)raddr)[2] = (unsigned char)(i>>8);
+ ((unsigned char *)raddr)[3] = (unsigned char)(i);
+ } else
+ if (l->rsize == 5) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i);
+ ((unsigned char *)raddr)[1] = (unsigned char)(iret2>>24);
+ ((unsigned char *)raddr)[2] = (unsigned char)(iret2>>16);
+ ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>8);
+ ((unsigned char *)raddr)[4] = (unsigned char)(iret2);
+ } else
+ if (l->rsize == 6) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i>>8);
+ ((unsigned char *)raddr)[1] = (unsigned char)(i);
+ ((unsigned char *)raddr)[2] = (unsigned char)(iret2>>24);
+ ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>16);
+ ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>8);
+ ((unsigned char *)raddr)[5] = (unsigned char)(iret2);
+ } else
+ if (l->rsize == 7) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i>>16);
+ ((unsigned char *)raddr)[1] = (unsigned char)(i>>8);
+ ((unsigned char *)raddr)[2] = (unsigned char)(i);
+ ((unsigned char *)raddr)[3] = (unsigned char)(iret2>>24);
+ ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>16);
+ ((unsigned char *)raddr)[5] = (unsigned char)(iret2>>8);
+ ((unsigned char *)raddr)[6] = (unsigned char)(iret2);
+ } else
+ if (l->rsize == 8) {
+ ((unsigned char *)raddr)[0] = (unsigned char)(i>>24);
+ ((unsigned char *)raddr)[1] = (unsigned char)(i>>16);
+ ((unsigned char *)raddr)[2] = (unsigned char)(i>>8);
+ ((unsigned char *)raddr)[3] = (unsigned char)(i);
+ ((unsigned char *)raddr)[4] = (unsigned char)(iret2>>24);
+ ((unsigned char *)raddr)[5] = (unsigned char)(iret2>>16);
+ ((unsigned char *)raddr)[6] = (unsigned char)(iret2>>8);
+ ((unsigned char *)raddr)[7] = (unsigned char)(iret2);
+ }
+ #else /* Optimized: fewer conditional jumps, fewer memory accesses */
+ uintptr_t count = l->rsize; /* > 0, ≤ 2*sizeof(__avword) */
+ __avword* wordaddr = (__avword*)((uintptr_t)raddr &
~(uintptr_t)(sizeof(__avword)-1));
+ uintptr_t start_offset = (uintptr_t)raddr &
(uintptr_t)(sizeof(__avword)-1); /* ≥ 0, < sizeof(__avword) */
+ uintptr_t end_offset = start_offset + count; /* > 0, <
3*sizeof(__avword) */
+ if (count <= sizeof(__avword)) {
+ /* Use iret. */
+ if (end_offset <= sizeof(__avword)) {
+ /* 0 < end_offset ≤ sizeof(__avword) */
+ __avword mask0 = ((__avword)2 <<
(sizeof(__avword)*8-start_offset*8-1)) - ((__avword)1 <<
(sizeof(__avword)*8-end_offset*8));
+ wordaddr[0] ^= (wordaddr[0] ^ (i <<
(sizeof(__avword)*8-end_offset*8))) & mask0;
+ } else {
+ /* sizeof(__avword) < end_offset < 2*sizeof(__avword),
start_offset > 0 */
+ __avword mask0 = ((__avword)2 <<
(sizeof(__avword)*8-start_offset*8-1)) - 1;
+ __avword mask1 = - ((__avword)1 <<
(2*sizeof(__avword)*8-end_offset*8));
+ wordaddr[0] ^= (wordaddr[0] ^ (i >>
(end_offset*8-sizeof(__avword)*8))) & mask0;
+ wordaddr[1] ^= (wordaddr[1] ^ (i <<
(2*sizeof(__avword)*8-end_offset*8))) & mask1;
+ }
+ } else {
+ /* Use iret, iret2. */
+ __avword mask0 = ((__avword)2 <<
(sizeof(__avword)*8-start_offset*8-1)) - 1;
+ if (end_offset <= 2*sizeof(__avword)) {
+ /* sizeof(__avword) < end_offset ≤ 2*sizeof(__avword) */
+ __avword mask1 = - ((__avword)1 <<
(2*sizeof(__avword)*8-end_offset*8));
+ wordaddr[0] ^= (wordaddr[0] ^ ((i <<
(2*sizeof(__avword)*8-end_offset*8)) | (iret2 >>
(end_offset*4-sizeof(__avword)*4) >> (end_offset*4-sizeof(__avword)*4)))) &
mask0;
+ wordaddr[1] ^= (wordaddr[1] ^ (iret2 <<
(2*sizeof(__avword)*8-end_offset*8))) & mask1;
+ } else {
+ /* 2*sizeof(__avword) < end_offset < 3*sizeof(__avword),
start_offset > 0 */
+ __avword mask2 = - ((__avword)1 <<
(3*sizeof(__avword)*8-end_offset*8));
+ wordaddr[0] ^= (wordaddr[0] ^ (i >>
(end_offset*8-2*sizeof(__avword)*8))) & mask0;
+ wordaddr[1] = (i << (3*sizeof(__avword)*8-end_offset*8)) | (iret2
>> (end_offset*8-2*sizeof(__avword)*8));
+ wordaddr[2] ^= (wordaddr[2] ^ (iret2 <<
(3*sizeof(__avword)*8-end_offset*8))) & mask2;
+ }
+ }
+ #endif
}
}
}
diff --git avcall/avcall.h avcall/avcall.h
index f429dc8..525a25e 100644
--- avcall/avcall.h
+++ avcall/avcall.h
@@ -140,7 +140,7 @@ enum __AV_alist_flags
#if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) &&
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
__AV_SUNPROCC_STRUCT_RETURN,
#else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) ||
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) ||
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) ||
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) ||
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) ||
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) ||
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) ||
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) ||
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) ||
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) ||
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) ||
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) ||
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) &&
!defined(__powerpc64__) && defined(__OpenBSD__)) ||
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) ||
defined(__riscv32__) || defined(__riscv64__)
__AV_SMALL_STRUCT_RETURN |
#endif
#if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) &&
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
diff --git callback/vacall_r/Makefile.devel callback/vacall_r/Makefile.devel
index d596091..3899b52 100644
--- callback/vacall_r/Makefile.devel
+++ callback/vacall_r/Makefile.devel
@@ -21,7 +21,7 @@ precompiled : \
vacall-hppa-macro.S vacall-hppa64-macro.S \
vacall-arm-macro.S vacall-armhf-macro.S \
vacall-arm64-macro.S \
- vacall-powerpc-aix.s vacall-powerpc-linux-macro.S
vacall-powerpc-sysv4-macro.S vacall-powerpc-macos.s vacall-powerpc64-aix.s
vacall-powerpc64-linux.S vacall-powerpc64-elfv2-linux.S \
+ vacall-powerpc-aix.s vacall-powerpc-linux-macro.S
vacall-powerpc-sysv4-macro.S vacall-powerpc-secplt-macro.S
vacall-powerpc-macos.s vacall-powerpc64-aix.s vacall-powerpc64-linux.S
vacall-powerpc64-elfv2-linux.S \
vacall-ia64-macro.S \
vacall-x86_64-macro.S vacall-x86_64-x32-linux.s
vacall-x86_64-windows-macro.S \
vacall-s390-macro.S vacall-s390x-macro.S \
@@ -157,6 +157,11 @@ vacall-powerpc-sysv4-macro.S :
../../vacall/vacall-powerpc.c ../../vacall/vacall
(../../common/asm-powerpc.sh < vacall-powerpc-sysv4.s ; cat
../../common/noexecstack.h) > vacall-powerpc-sysv4-macro.S
$(RM) vacall-powerpc-sysv4.s
+vacall-powerpc-secplt-macro.S : ../../vacall/vacall-powerpc.c
../../vacall/vacall-internal.h vacall_r.h ../../common/asm-powerpc.sh
../../common/noexecstack.h $(THISFILE)
+ $(CROSS_TOOL) powerpc-linux gcc -V 4.1.2 -mno-power -mno-power2
-mno-powerpc -msecure-plt $(GCCFLAGS) -D__powerpc__ -S
../../vacall/vacall-powerpc.c -I../../vacall -I. -o vacall-powerpc-secplt.s
+ (../../common/asm-powerpc.sh < vacall-powerpc-secplt.s ; cat
../../common/noexecstack.h) > vacall-powerpc-secplt-macro.S
+ $(RM) vacall-powerpc-secplt.s
+
vacall-powerpc-macos.s : ../../vacall/vacall-powerpc.c
../../vacall/vacall-internal.h vacall_r.h $(THISFILE)
$(CROSS_TOOL) powerpc-darwin gcc -V 3.3.6 $(GCCFLAGS) -D__powerpc__ -S
../../vacall/vacall-powerpc.c -I../../vacall -I. -o vacall-powerpc-macos.s
diff --git callback/vacall_r/Makefile.in callback/vacall_r/Makefile.in
index 9d52db5..0120192 100644
--- callback/vacall_r/Makefile.in
+++ callback/vacall_r/Makefile.in
@@ -147,15 +147,16 @@ vacall-arm64.s : $(srcdir)/vacall-arm64-macro.S
vacall-powerpc.lo : vacall-powerpc.s
$(LIBTOOL_COMPILE) $(CC) @GCC_X_NONE@ -c vacall-powerpc.s
-vacall-powerpc.s : $(srcdir)/vacall-powerpc-aix.s
$(srcdir)/vacall-powerpc-linux-macro.S $(srcdir)/vacall-powerpc-macos.s
$(srcdir)/vacall-powerpc-sysv4-macro.S
+vacall-powerpc.s : $(srcdir)/vacall-powerpc-aix.s
$(srcdir)/vacall-powerpc-linux-macro.S $(srcdir)/vacall-powerpc-macos.s
$(srcdir)/vacall-powerpc-sysv4-macro.S $(srcdir)/vacall-powerpc-secplt-macro.S
case "$(OS)" in \
aix*) syntax=aix;; \
- linux* | netbsd* | openbsd*) syntax=linux;; \
+ linux* | netbsd*) syntax=linux;; \
+ openbsd*) syntax=secplt;; \
macos* | darwin*) syntax=macos;; \
*) syntax=sysv4;; \
esac; \
case $${syntax} in \
- linux | netbsd | sysv4) \
+ linux | sysv4 | secplt) \
$(CPP) $(ASPFLAGS) -I$(srcdir)
$(srcdir)/vacall-powerpc-$${syntax}-macro.S | grep -v '^ *#line' | grep -v '^#'
| sed -e 's,% ,%,g' -e 's,//,@,g' -e 's,\$$,#,g' > vacall-powerpc.s || exit 1
;; \
macos) \
grep -v '\.machine' $(srcdir)/vacall-powerpc-$${syntax}.s >
vacall-powerpc.s || exit 1 ;; \
diff --git callback/vacall_r/vacall_r.h callback/vacall_r/vacall_r.h
index 0e66c2e..8ff0c6c 100644
--- callback/vacall_r/vacall_r.h
+++ callback/vacall_r/vacall_r.h
@@ -167,7 +167,7 @@ enum __VA_alist_flags
#if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) &&
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
__VA_SUNPROCC_STRUCT_RETURN,
#else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) ||
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) ||
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) ||
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) ||
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) ||
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) ||
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) ||
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) ||
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) ||
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) ||
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) ||
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) ||
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) &&
!defined(__powerpc64__) && defined(__OpenBSD__)) ||
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) ||
defined(__riscv32__) || defined(__riscv64__)
__VA_SMALL_STRUCT_RETURN |
#endif
#if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) &&
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))
diff --git cross-tools/cross.conf cross-tools/cross.conf
index 23d3faa..e20abfe 100644
--- cross-tools/cross.conf
+++ cross-tools/cross.conf
@@ -16,7 +16,7 @@ arm64 aarch64-linux 5.4.0 2.24
aarch64-linux
rs6000 rs6000-aix 3.3.6 2.17 rs6000-aix
rs6000 rs6000-aix6.1 5.4.0 2.17 rs6000-aix6.1
powerpc powerpc-darwin 3.3.6 2.17 ppc-macos
-powerpc powerpc-linux 3.3.6 2.17 ppc-linux
+powerpc powerpc-linux 3.3.6,4.1.2 2.17 ppc-linux
powerpc64 powerpc64le-linux 5.4.0 2.24 powerpc64le-linux
ia64 ia64-linux 4.0.1 2.16.1 ia64-linux
x86_64 x86_64-linux 4.0.2,5.4.0 2.16.1 x86_64-linux
diff --git cross-tools/patches/gcc-4.1.2.patch
cross-tools/patches/gcc-4.1.2.patch
new file mode 100644
index 0000000..297c241
--- /dev/null
+++ cross-tools/patches/gcc-4.1.2.patch
@@ -0,0 +1,21 @@
+diff -ru gcc-4.1.2/gcc/toplev.h gcc-4.1.2/gcc/toplev.h
+--- gcc-4.1.2/gcc/toplev.h 2005-09-08 17:47:05.000000000 -0700
++++ gcc-4.1.2/gcc/toplev.h 2019-10-16 10:05:49.377343593 -0700
+@@ -171,17 +171,6 @@
+ # define CTZ_HWI __builtin_ctz
+ # endif
+
+-extern inline int
+-floor_log2 (unsigned HOST_WIDE_INT x)
+-{
+- return x ? HOST_BITS_PER_WIDE_INT - 1 - (int) CLZ_HWI (x) : -1;
+-}
+-
+-extern inline int
+-exact_log2 (unsigned HOST_WIDE_INT x)
+-{
+- return x == (x & -x) && x ? (int) CTZ_HWI (x) : -1;
+-}
+ #endif /* GCC_VERSION >= 3004 */
+
+ /* Functions used to get and set GCC's notion of in what directory
diff --git vacall/vacall-internal.h vacall/vacall-internal.h
index e29a6d4..c120405 100644
--- vacall/vacall-internal.h
+++ vacall/vacall-internal.h
@@ -279,7 +279,7 @@ typedef struct vacall_alist
#define __va_start_struct1(LIST,TYPE_SIZE,TYPE_ALIGN,TYPE_SPLITTABLE) \
((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN, 0)
#endif
-#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) ||
(defined(__powerpc__) && !defined(__powerpc64__)) || (defined(__s390__) &&
!defined(__s390x__))
+#if (defined(__i386__) && !defined(_WIN32)) || defined(__m68k__) ||
(defined(__powerpc__) && !defined(__powerpc64__) && !defined(__OpenBSD)) ||
(defined(__s390__) && !defined(__s390x__))
#define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \
((TYPE_SIZE) == 1 || (TYPE_SIZE) == 2 || (TYPE_SIZE) == 4 \
|| ((TYPE_SIZE) == 8 && (TYPE_SPLITTABLE) \
@@ -355,6 +355,16 @@ typedef struct vacall_alist
&& ((LIST)->flags |= __VA_REGISTER_DOUBLESTRUCT_RETURN),
\
0)
#endif
+#if defined(__powerpc__) && !defined(__powerpc64__) && defined(__OpenBSD__)
+#define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \
+ ((TYPE_SIZE) <= 8)
+/* Turn on __VA_REGISTER_STRUCT_RETURN if __VA_SMALL_STRUCT_RETURN was set
+ * and the struct will actually be returned in registers.
+ */
+#define __va_start_struct1(LIST,TYPE_SIZE,TYPE_ALIGN,TYPE_SPLITTABLE) \
+ ((LIST)->flags |= __VA_REGISTER_STRUCT_RETURN, \
+ 0)
+#endif
#if (defined(__powerpc64__) && !defined(__powerpc64_elfv2__)) ||
defined(__s390x__)
#define __va_reg_struct_return(LIST,TYPE_SIZE,TYPE_SPLITTABLE) \
0
diff --git vacall/vacall-powerpc.c vacall/vacall-powerpc.c
index ea3e208..04a790e 100644
--- vacall/vacall-powerpc.c
+++ vacall/vacall-powerpc.c
@@ -155,18 +155,93 @@ vacall_receiver (__vaword word1, __vaword word2, __vaword
word3, __vaword word4,
} else
if (list.rtype == __VAstruct) {
if (list.flags & __VA_REGISTER_STRUCT_RETURN) {
- if (list.rsize == sizeof(char)) {
- iret = *(unsigned char *) list.raddr;
- } else
- if (list.rsize == sizeof(short)) {
- iret = *(unsigned short *) list.raddr;
- } else
- if (list.rsize == sizeof(int)) {
- iret = *(unsigned int *) list.raddr;
- } else
- if (list.rsize == 2*sizeof(__vaword)) {
- iret = ((__vaword *) list.raddr)[0];
- iret2 = ((__vaword *) list.raddr)[1];
+ if (list.rsize > 0 && list.rsize <= 8) {
+ #if 0 /* Unoptimized */
+ if (list.rsize == 1) {
+ iret = ((unsigned char *) list.raddr)[0];
+ } else
+ if (list.rsize == 2) {
+ iret = (((unsigned char *) list.raddr)[0] << 8)
+ | ((unsigned char *) list.raddr)[1];
+ } else
+ if (list.rsize == 3) {
+ iret = (((unsigned char *) list.raddr)[0] << 16)
+ | (((unsigned char *) list.raddr)[1] << 8)
+ | ((unsigned char *) list.raddr)[2];
+ } else
+ if (list.rsize == 4) {
+ iret = (((unsigned char *) list.raddr)[0] << 24)
+ | (((unsigned char *) list.raddr)[1] << 16)
+ | (((unsigned char *) list.raddr)[2] << 8)
+ | ((unsigned char *) list.raddr)[3];
+ } else
+ if (list.rsize == 5) {
+ iret = ((unsigned char *) list.raddr)[0];
+ iret2 = (((unsigned char *) list.raddr)[1] << 24)
+ | (((unsigned char *) list.raddr)[2] << 16)
+ | (((unsigned char *) list.raddr)[3] << 8)
+ | ((unsigned char *) list.raddr)[4];
+ } else
+ if (list.rsize == 6) {
+ iret = (((unsigned char *) list.raddr)[0] << 8)
+ | ((unsigned char *) list.raddr)[1];
+ iret2 = (((unsigned char *) list.raddr)[2] << 24)
+ | (((unsigned char *) list.raddr)[3] << 16)
+ | (((unsigned char *) list.raddr)[4] << 8)
+ | ((unsigned char *) list.raddr)[5];
+ } else
+ if (list.rsize == 7) {
+ iret = (((unsigned char *) list.raddr)[0] << 16)
+ | (((unsigned char *) list.raddr)[1] << 8)
+ | ((unsigned char *) list.raddr)[2];
+ iret2 = (((unsigned char *) list.raddr)[3] << 24)
+ | (((unsigned char *) list.raddr)[4] << 16)
+ | (((unsigned char *) list.raddr)[5] << 8)
+ | ((unsigned char *) list.raddr)[6];
+ } else
+ if (list.rsize == 8) {
+ iret = (((unsigned char *) list.raddr)[0] << 24)
+ | (((unsigned char *) list.raddr)[1] << 16)
+ | (((unsigned char *) list.raddr)[2] << 8)
+ | ((unsigned char *) list.raddr)[3];
+ iret2 = (((unsigned char *) list.raddr)[4] << 24)
+ | (((unsigned char *) list.raddr)[5] << 16)
+ | (((unsigned char *) list.raddr)[6] << 8)
+ | ((unsigned char *) list.raddr)[7];
+ }
+ #else /* Optimized: fewer conditional jumps, fewer memory accesses */
+ uintptr_t count = list.rsize; /* > 0, ≤ 2*sizeof(__vaword) */
+ __vaword* wordaddr = (__vaword*)((uintptr_t)list.raddr &
~(uintptr_t)(sizeof(__vaword)-1));
+ uintptr_t start_offset = (uintptr_t)list.raddr &
(uintptr_t)(sizeof(__vaword)-1); /* ≥ 0, < sizeof(__vaword) */
+ uintptr_t end_offset = start_offset + count; /* > 0, <
3*sizeof(__vaword) */
+ if (count <= sizeof(__vaword)) {
+ /* Assign iret. */
+ __vaword mask0 = ((__vaword)2 <<
(sizeof(__vaword)*8-start_offset*8-1)) - 1;
+ if (end_offset <= sizeof(__vaword)) {
+ /* 0 < end_offset ≤ sizeof(__vaword) */
+ iret = (wordaddr[0] & mask0) >> (sizeof(__vaword)*8-end_offset*8);
+ } else {
+ /* sizeof(__vaword) < end_offset < 2*sizeof(__vaword),
start_offset > 0 */
+ iret = ((wordaddr[0] & mask0) << (end_offset*8-sizeof(__vaword)*8))
+ | (wordaddr[1] >> (2*sizeof(__vaword)*8-end_offset*8));
+ }
+ } else {
+ /* Assign iret, iret2. */
+ __vaword mask0 = ((__vaword)2 <<
(sizeof(__vaword)*8-start_offset*8-1)) - 1;
+ if (end_offset <= 2*sizeof(__vaword)) {
+ /* sizeof(__vaword) < end_offset ≤ 2*sizeof(__vaword) */
+ iret = (wordaddr[0] & mask0) >>
(2*sizeof(__vaword)*8-end_offset*8);
+ iret2 = ((wordaddr[0] & mask0) <<
(end_offset*4-sizeof(__vaword)*4) << (end_offset*4-sizeof(__vaword)*4))
+ | (wordaddr[1] >> (2*sizeof(__vaword)*8-end_offset*8));
+ } else {
+ /* 2*sizeof(__vaword) < end_offset < 3*sizeof(__vaword),
start_offset > 0 */
+ iret = ((wordaddr[0] & mask0) <<
(end_offset*8-2*sizeof(__vaword)*8))
+ | (wordaddr[1] >> (3*sizeof(__vaword)*8-end_offset*8));
+ iret2 = (wordaddr[1] << (end_offset*8-2*sizeof(__vaword)*8))
+ | (wordaddr[2] >> (3*sizeof(__vaword)*8-end_offset*8));
+ }
+ }
+ #endif
}
}
}
diff --git vacall/vacall.h vacall/vacall.h
index 9485356..041588f 100644
--- vacall/vacall.h
+++ vacall/vacall.h
@@ -128,7 +128,7 @@ enum __VA_alist_flags
#if defined(__sparc__) && !defined(__sparc64__) && defined(__sun) &&
(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) /* SUNWspro cc or CC */
__VA_SUNPROCC_STRUCT_RETURN,
#else
-#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) ||
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) ||
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) ||
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) ||
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) ||
defined(__armhf__) || defined(__arm64__) || defined(__powerpc64_elfv2__) ||
defined(__ia64__) || defined(__x86_64__) || defined(__riscv32__) ||
defined(__riscv64__)
+#if (defined(__i386__) && (defined(_WIN32) || defined(__CYGWIN__) ||
(defined(__MACH__) && defined(__APPLE__)) || defined(__FreeBSD__) ||
defined(__DragonFly__) || defined(__OpenBSD__))) || defined(__m68k__) ||
defined(__mipsn32__) || defined(__mips64__) || defined(__sparc64__) ||
defined(__hppa__) || defined(__hppa64__) || defined(__arm__) ||
defined(__armhf__) || defined(__arm64__) || (defined(__powerpc__) &&
!defined(__powerpc64__) && defined(__OpenBSD__)) ||
defined(__powerpc64_elfv2__) || defined(__ia64__) || defined(__x86_64__) ||
defined(__riscv32__) || defined(__riscv64__)
__VA_SMALL_STRUCT_RETURN |
#endif
#if defined(__GNUC__) && !((defined(__mipsn32__) || defined(__mips64__)) &&
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || (__GNUC__ > 3)))