blob: b0e9877cc78ee16b9b1299591f0b71058bacaf6d [file] [log] [blame]
/* these first three reference functions were taken from
* http://www.hackersdelight.org/HDcode/nlz.cc and
* http://www.hackersdelight.org/HDcode/ntz.cc
*/
#if defined(__NetBSD__)
#include <sys/bitops.h>
#endif
#include <stdio.h>
#include <string.h>
#include <strings.h>
#if defined(__XS1B__)
#include "xcoretestsuite.h"
#endif
#define u 99
int nlz10b(unsigned x) {
static char table[64] =
{32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u,
u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u,
u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u,
22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31};
x = x | (x >> 1); // Propagate leftmost
x = x | (x >> 2); // 1-bit to the right.
x = x | (x >> 4);
x = x | (x >> 8);
x = x & ~(x >> 16);
x = (x << 9) - x; // Multiply by 511.
x = (x << 11) - x; // Multiply by 2047.
x = (x << 14) - x; // Multiply by 16383.
return table[x >> 26];
}
int nlzll(unsigned long long x) {
if ((x >> 32) == 0)
return nlz10b(x)+32;
return nlz10b(x>>32);
}
int pop(unsigned x) {
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x + (x >> 4)) & 0x0F0F0F0F;
x = x + (x << 8);
x = x + (x << 16);
return x >> 24;
}
int popll(unsigned long long x) {
return pop(x) + pop(x >> 32);
}
int ntz8(unsigned x) {
static char table[64] =
{32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
30, u, u, u, u,23, u,19, 29, u,22,18,28,17,16, u};
x = (x & -x)*0x0450FBAF;
return table[x >> 26];
}
/* Work with non-gcc compilers and GCC before 4.0 */
#if !defined(__GNUC__) || __GNUC__ < 4
#define __builtin_clz nlz10b
#define __builtin_popcount pop
#define __builtin_ctz ntz8
#define __builtin_clzll nlzll
#define __builtin_popcountll popll
#define __builtin_ffsl __builtin_ffs
#define ffsl ffs
#endif
#if defined(__NetBSD__)
#define ffsl ffs64
#endif
int i;
int main(void) {
long long l;
/* note: we don't test zero, because the _native_ test will get it
* wrong (GCC returns garbage for ctz/clz of 0), and the nightly tester
* will wrongly conclude that LLC is failing.
*/
for(i=10; i<139045193; i*=-3) {
printf("LLVM: n: %d, clz(n): %d, popcount(n): %d, ctz(n): %d\n",
i, __builtin_clz(i), __builtin_popcount(i), __builtin_ctz(i));
printf("REF : n: %d, clz(n): %d, popcount(n): %d, ctz(n): %d\n",
i, nlz10b(i), pop(i), ntz8(i));
printf(" *** \n");
i++;
}
for(l=-10000; l<139045193*10000LL; l*=-3) {
printf("LLVM: n: %lld, clz(n): %d, popcount(n): %d, ctz(n): %d\n",
l, __builtin_clzll(l), __builtin_popcountll(l), __builtin_ctz(l));
printf("REF LO BITS : n: %lld, clz(n): %d, popcount(n): %d, ctz(n): %d\n",
l, nlzll(l), popll(l), ntz8(l));
printf(" *** \n");
l++;
}
// Check some boundary and other cases for FFS call
printf("FFS: 0:%d, 1:%d, 2:%d, 7:%d, 1024:%d, 1234:%d i:%d, l:%d\n",
ffs(0), ffs(1), ffs(2), ffs(7), ffs(1024), ffs(1234), ffs(i), ffsl(l));
printf("__builtin_ffs: 0:%d, 1:%d, 2:%d, 7:%d, 1024:%d, 1234:%d i:%d l:%d\n",
__builtin_ffs(0), __builtin_ffs(1), __builtin_ffs(2), __builtin_ffs(7),
__builtin_ffs(1024), __builtin_ffs(1234), __builtin_ffs(i), __builtin_ffsl(l));
return(0);
}