+2011-10-14 Bruno Haible <bruno@clisp.org>
+
+ ffsl: Optimize on 64-bit platforms.
+ * lib/ffsl.h (FUNC): Omit a test from the last loop round. Do loop
+ unrolling.
+
2011-10-13 Bruno Haible <bruno@clisp.org>
ffsl: Optimize on 32-bit platforms.
#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) && defined GCC_BUILTIN
return GCC_BUILTIN (i);
#else
- if (sizeof (TYPE) == sizeof (int))
- return ffs (i);
- else
+ unsigned TYPE j = i;
+ /* Split j into chunks, and look at one chunk after the other. */
+ enum { chunk_bits = CHAR_BIT * sizeof (unsigned int) };
+ /* The number of chunks is ceil (sizeof (TYPE) / sizeof (unsigned int))
+ = (sizeof (TYPE) - 1) / sizeof (unsigned int) + 1. */
+ enum { chunk_count = (sizeof (TYPE) - 1) / sizeof (unsigned int) + 1 };
+
+ if (chunk_count > 1)
{
- unsigned TYPE j = i;
- /* Split j into chunks, and look at one chunk after the other. */
- /* Define chunk_bits so as to avoid a GCC warning
- "right shift count >= width of type"
- if sizeof (TYPE) == sizeof (int). */
- enum
- {
- chunk_bits = (sizeof (TYPE) != sizeof (int)
- ? CHAR_BIT * sizeof (unsigned int)
- : 0)
- };
- int result = 0;
+ size_t k;
/* It is tempting to write if (!j) here, but if we do this,
Solaris 10/x86 "cc -O" miscompiles the code. */
if (!i)
return 0;
- while (1)
- {
- if ((unsigned int) j)
- return result + ffs ((unsigned int) j);
- j >>= chunk_bits;
- result += chunk_bits;
- }
+ /* Unroll the first loop round. k = 0. */
+ if ((unsigned int) j)
+ return ffs ((unsigned int) j);
+ /* Generic loop. */
+ for (k = 1; k < chunk_count - 1; k++)
+ if ((unsigned int) (j >> (k * chunk_bits)) != 0)
+ return k * chunk_bits + ffs ((unsigned int) (j >> (k * chunk_bits)));
}
+ /* Last loop round. k = chunk_count - 1. */
+ return (chunk_count - 1) * chunk_bits
+ + ffs ((unsigned int) (j >> ((chunk_count - 1) * chunk_bits)));
#endif
}