From a4cb5c9ab8346145f78196686595ebab0b334451 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Fri, 14 Oct 2011 22:24:16 +0200 Subject: [PATCH] ffsl: Optimize on 64-bit platforms. * lib/ffsl.h (FUNC): Omit a test from the last loop round. Do loop unrolling. --- ChangeLog | 6 ++++++ lib/ffsl.h | 41 +++++++++++++++++++---------------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index fede05e01..3f9cf6184 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2011-10-14 Bruno Haible + + ffsl: Optimize on 64-bit platforms. + * lib/ffsl.h (FUNC): Omit a test from the last loop round. Do loop + unrolling. + 2011-10-13 Bruno Haible ffsl: Optimize on 32-bit platforms. diff --git a/lib/ffsl.h b/lib/ffsl.h index 99e7d264a..53d038012 100644 --- a/lib/ffsl.h +++ b/lib/ffsl.h @@ -34,34 +34,31 @@ FUNC (TYPE i) #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) && defined GCC_BUILTIN return GCC_BUILTIN (i); #else - if (sizeof (TYPE) == sizeof (int)) - return ffs (i); - else + unsigned TYPE j = i; + /* Split j into chunks, and look at one chunk after the other. */ + enum { chunk_bits = CHAR_BIT * sizeof (unsigned int) }; + /* The number of chunks is ceil (sizeof (TYPE) / sizeof (unsigned int)) + = (sizeof (TYPE) - 1) / sizeof (unsigned int) + 1. */ + enum { chunk_count = (sizeof (TYPE) - 1) / sizeof (unsigned int) + 1 }; + + if (chunk_count > 1) { - unsigned TYPE j = i; - /* Split j into chunks, and look at one chunk after the other. */ - /* Define chunk_bits so as to avoid a GCC warning - "right shift count >= width of type" - if sizeof (TYPE) == sizeof (int). */ - enum - { - chunk_bits = (sizeof (TYPE) != sizeof (int) - ? CHAR_BIT * sizeof (unsigned int) - : 0) - }; - int result = 0; + size_t k; /* It is tempting to write if (!j) here, but if we do this, Solaris 10/x86 "cc -O" miscompiles the code. */ if (!i) return 0; - while (1) - { - if ((unsigned int) j) - return result + ffs ((unsigned int) j); - j >>= chunk_bits; - result += chunk_bits; - } + /* Unroll the first loop round. k = 0. */ + if ((unsigned int) j) + return ffs ((unsigned int) j); + /* Generic loop. */ + for (k = 1; k < chunk_count - 1; k++) + if ((unsigned int) (j >> (k * chunk_bits)) != 0) + return k * chunk_bits + ffs ((unsigned int) (j >> (k * chunk_bits))); } + /* Last loop round. k = chunk_count - 1. */ + return (chunk_count - 1) * chunk_bits + + ffs ((unsigned int) (j >> ((chunk_count - 1) * chunk_bits))); #endif } -- 2.11.0