[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Optimize three-valued comparison between integers
From: |
Bruno Haible |
Subject: |
Optimize three-valued comparison between integers |
Date: |
Fri, 24 Jul 2020 00:15:54 +0200 |
User-agent: |
KMail/5.1.3 (Linux/4.4.0-179-generic; KDE/5.18.0; x86_64; ; ) |
A comment in Luca Saiu 'jitter' project [1] brought my attention to 3-valued
comparison and the book "Hacker's Delight".
===============================================
int sign1 (long n1, long n2)
{
return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
}
int sign2 (long n1, long n2)
{
return (n1 < n2 ? -1 : n1 > n2);
}
int sign3 (long n1, long n2)
{
return (n1 > n2) - (n1 < n2);
}
===============================================
Which of these, do you think, generates better code? The important fact,
nowadays, is that conditional jumps cause the CPU to stall when it has
mispredicted the jump. So, 1 or 2 more or less ALU instructions are
irrelevant, but conditional jumps are not.
You find conditional jumps in code (x86) produced by GCC versions
sign1: 4.1.2 4.2.4 4.3.6 4.4.7 4.5.4 4.6.4 4.7.3 4.8.5 4.9.4 5.5.0 7.5.0 8.4.0
9.3.0 10.1.0
sign2: 4.1.2 4.2.4 4.3.6 4.4.7 7.5.0 8.4.0 9.3.0
sign3:
So, the third variant comes out best.
Maybe in 10 years, GCC and clang will recognize the patterns from sign1 and
sign2, but we are not there yet.
[1] http://ageinghacker.net/projects/jitter/
[2]
https://books.google.de/books?id=VicPJYM0I5QC&printsec=frontcover&dq=%22hacker%27s+delight%22&hl=en&sa=X&ved=2ahUKEwjN7_LSouHqAhVJKewKHREQBZ8Q6AEwAXoECAMQAg#v=onepage&q&f=false
2020-07-23 Bruno Haible <bruno@clisp.org>
Optimize three-valued comparison between integers.
(a > b ? 1 : a < b ? -1 : 0) is the same as (a > b) - (a < b).
* m4/gnulib-common.m4 (gl_COMMON): Define _GL_CMP.
* lib/c-strcasecmp.c (c_strcasecmp): Use _GL_CMP.
* lib/c-strncasecmp.c (c_strncasecmp): Likewise.
* lib/dfa.c (compare): Likewise.
* lib/fts.c (fts_compare_ino): Likewise.
* lib/mbmemcasecmp.c (mbmemcasecmp): Likewise.
* lib/mbscasecmp.c (mbscasecmp): Likewise.
* lib/mbsncasecmp.c (mbsncasecmp): Likewise.
* lib/memcasecmp.c (memcasecmp): Likewise.
* lib/memcmp2.c (memcmp2): Likewise.
* lib/savedir.c (direntry_cmp_inode): Likewise.
* lib/strcasecmp.c (strcasecmp): Likewise.
* lib/strncasecmp.c (strncasecmp): Likewise.
* lib/unistr/u-cmp2.h (FUNC): Likewise.
diff --git a/m4/gnulib-common.m4 b/m4/gnulib-common.m4
index f4ba5e3..ba42391 100644
--- a/m4/gnulib-common.m4
+++ b/m4/gnulib-common.m4
@@ -1,4 +1,4 @@
-# gnulib-common.m4 serial 50
+# gnulib-common.m4 serial 51
dnl Copyright (C) 2007-2020 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -293,6 +293,20 @@ AC_DEFUN([gl_COMMON_BODY], [
errno. */
#define _GL_ASYNC_SAFE
])
+ AH_VERBATIM([micro_optimizations],
+[/* _GL_CMP (n1, n2) performs a three-valued comparison on n1 vs. n2.
+ It returns
+ 1 if n1 > n2
+ 0 if n1 == n2
+ -1 if n1 < n2
+ The naïve code (n1 > n2 ? 1 : n1 < n2 ? -1 : 0) produces a conditional
+ jump with nearly all GCC versions up to GCC 10.
+ This variant (n1 < n2 ? -1 : n1 > n2) produces a conditional with many
+ GCC versions up to GCC 9.
+ The better code (n1 > n2) - (n1 < n2) from Hacker's Delight § 2-9
+ avoids conditional jumps in all GCC versions >= 3.4. */
+#define _GL_CMP(n1, n2) ((n1) > (n2)) - ((n1) < (n2))
+])
dnl Hint which direction to take regarding cross-compilation guesses:
dnl When a user installs a program on a platform they are not intimately
dnl familiar with, --enable-cross-guesses=conservative is the appropriate
diff --git a/lib/c-strcasecmp.c b/lib/c-strcasecmp.c
index 3ac650f..1de04b7 100644
--- a/lib/c-strcasecmp.c
+++ b/lib/c-strcasecmp.c
@@ -52,5 +52,5 @@ c_strcasecmp (const char *s1, const char *s2)
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
diff --git a/lib/c-strncasecmp.c b/lib/c-strncasecmp.c
index a4e67d1..1782e54 100644
--- a/lib/c-strncasecmp.c
+++ b/lib/c-strncasecmp.c
@@ -52,5 +52,5 @@ c_strncasecmp (const char *s1, const char *s2, size_t n)
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
diff --git a/lib/dfa.c b/lib/dfa.c
index dee7be8..1d2d404 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -2466,7 +2466,7 @@ static int
compare (const void *a, const void *b)
{
position const *p = a, *q = b;
- return p->index < q->index ? -1 : p->index > q->index;
+ return _GL_CMP (p->index, q->index);
}
static void
diff --git a/lib/fts.c b/lib/fts.c
index 5e357be..a34092c 100644
--- a/lib/fts.c
+++ b/lib/fts.c
@@ -1190,8 +1190,7 @@ fts_children (register FTS *sp, int instr)
static int
fts_compare_ino (struct _ftsent const **a, struct _ftsent const **b)
{
- return (a[0]->fts_statp->st_ino < b[0]->fts_statp->st_ino ? -1
- : b[0]->fts_statp->st_ino < a[0]->fts_statp->st_ino ? 1 : 0);
+ return _GL_CMP (a[0]->fts_statp->st_ino, b[0]->fts_statp->st_ino);
}
/* Map the dirent.d_type value, DTYPE, to the corresponding stat.st_mode
diff --git a/lib/mbmemcasecmp.c b/lib/mbmemcasecmp.c
index 57039d2..d0eea69 100644
--- a/lib/mbmemcasecmp.c
+++ b/lib/mbmemcasecmp.c
@@ -33,7 +33,7 @@ int
mbmemcasecmp (const char *s1, size_t n1, const char *s2, size_t n2)
{
if (s1 == s2)
- return (n1 < n2 ? -1 : n1 > n2 ? 1 : 0);
+ return _GL_CMP (n1, n2);
if (MB_CUR_MAX > 1)
{
@@ -80,7 +80,7 @@ mbmemcasecmp (const char *s1, size_t n1, const char *s2,
size_t n2)
/* On machines where 'char' and 'int' are types of the same
size, the difference of two 'unsigned char' values
- including the sign bit - doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
++p1;
++p2;
diff --git a/lib/mbscasecmp.c b/lib/mbscasecmp.c
index 9a1ea4b..976e94a 100644
--- a/lib/mbscasecmp.c
+++ b/lib/mbscasecmp.c
@@ -93,6 +93,6 @@ mbscasecmp (const char *s1, const char *s2)
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
}
diff --git a/lib/mbsncasecmp.c b/lib/mbsncasecmp.c
index a414a69..da43d5c 100644
--- a/lib/mbsncasecmp.c
+++ b/lib/mbsncasecmp.c
@@ -94,6 +94,6 @@ mbsncasecmp (const char *s1, const char *s2, size_t n)
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
}
diff --git a/lib/memcasecmp.c b/lib/memcasecmp.c
index a443793..6720b8c 100644
--- a/lib/memcasecmp.c
+++ b/lib/memcasecmp.c
@@ -40,8 +40,7 @@ memcasecmp (const void *vs1, const void *vs2, size_t n)
unsigned char u2 = s2[i];
int U1 = toupper (u1);
int U2 = toupper (u2);
- int diff = (UCHAR_MAX <= INT_MAX ? U1 - U2
- : U1 < U2 ? -1 : U2 < U1);
+ int diff = (UCHAR_MAX <= INT_MAX ? U1 - U2 : _GL_CMP (U1, U2));
if (diff)
return diff;
}
diff --git a/lib/memcmp2.c b/lib/memcmp2.c
index c9c3316..bbfbb02 100644
--- a/lib/memcmp2.c
+++ b/lib/memcmp2.c
@@ -26,11 +26,6 @@ memcmp2 (const char *s1, size_t n1, const char *s2, size_t
n2)
{
int cmp = memcmp (s1, s2, n1 <= n2 ? n1 : n2);
if (cmp == 0)
- {
- if (n1 < n2)
- cmp = -1;
- else if (n1 > n2)
- cmp = 1;
- }
+ cmp = _GL_CMP (n1, n2);
return cmp;
}
diff --git a/lib/savedir.c b/lib/savedir.c
index 5d7151e..c93b81a 100644
--- a/lib/savedir.c
+++ b/lib/savedir.c
@@ -65,7 +65,7 @@ direntry_cmp_inode (void const *a, void const *b)
direntry_t const *dea = a;
direntry_t const *deb = b;
- return dea->ino < deb->ino ? -1 : dea->ino > deb->ino;
+ return _GL_CMP (dea->ino, deb->ino);
}
#endif
diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c
index c0b610e..bcb9adb 100644
--- a/lib/strcasecmp.c
+++ b/lib/strcasecmp.c
@@ -58,5 +58,5 @@ strcasecmp (const char *s1, const char *s2)
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
diff --git a/lib/strncasecmp.c b/lib/strncasecmp.c
index 4c1b5b5..396d1ca 100644
--- a/lib/strncasecmp.c
+++ b/lib/strncasecmp.c
@@ -58,5 +58,5 @@ strncasecmp (const char *s1, const char *s2, size_t n)
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
- return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
+ return _GL_CMP (c1, c2);
}
diff --git a/lib/unistr/u-cmp2.h b/lib/unistr/u-cmp2.h
index 85a5db8..1e0ea72 100644
--- a/lib/unistr/u-cmp2.h
+++ b/lib/unistr/u-cmp2.h
@@ -21,12 +21,7 @@ FUNC (const UNIT *s1, size_t n1, const UNIT *s2, size_t n2)
int cmp = U_CMP (s1, s2, MIN (n1, n2));
if (cmp == 0)
- {
- if (n1 < n2)
- cmp = -1;
- else if (n1 > n2)
- cmp = 1;
- }
+ cmp = _GL_CMP (n1, n2);
return cmp;
}
- Optimize three-valued comparison between integers,
Bruno Haible <=