libecb: PATCH ecb_ld* to use MSC intrinsics
Zsbán Ambrus
ambrus at math.bme.hu
Thu Nov 19 14:32:10 CET 2015
Hello.
I attach an attempt to patch libecb so that it uses the MSC (MS
compiler) intrinsic functions to implement the bit manipulation
functions ecb_ld32, ecb_ld64, ecb_ctz32, ecb_ctz64.
The patch seems to work for me on MSC, but needs a bit more testing on
other compilers to make sure I didn't accidentally break the code.
I suggest using the code below to test the return values of the
functions. Test in C and C++ mode. This may be worth to add to a
separate test file distributed with libecb, just like the previous
test file for the alignment stuff.
-- Ambrus
/* Test ctz and ld functions */
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include "ecb.h"
ecb_inline int generic_ctz32(uint32_t x)
{
int r = 0;
x &= ~x + 1; /* this isolates the lowest bit */
if (x & 0xaaaaaaaa) r += 1;
if (x & 0xcccccccc) r += 2;
if (x & 0xf0f0f0f0) r += 4;
if (x & 0xff00ff00) r += 8;
if (x & 0xffff0000) r += 16;
return r;
}
ecb_inline int generic_ld32(uint32_t x)
{
int r = 0;
if (x >> 16) { x >>= 16; r += 16; }
if (x >> 8) { x >>= 8; r += 8; }
if (x >> 4) { x >>= 4; r += 4; }
if (x >> 2) { x >>= 2; r += 2; }
if (x >> 1) { r += 1; }
return r;
}
ecb_inline int generic_ctz64 (uint64_t x)
{
int shift = x & 0xffffffffU ? 0 : 32;
return generic_ctz32 (x >> shift) + shift;
}
ecb_inline int generic_ld64 (uint64_t x)
{
int r = 0;
if (x >> 32) { x >>= 32; r += 32; }
return r + generic_ld32 (x);
}
void
bitscantest_main()
{
printf("Ctz test start.\n");
const int dbg = 0;
for (int j0 = 0; j0 < 32; j0++)
for (int j1 = 0; j1 < 32; j1++)
{
for (int j2 = 0; j2 < 32; j2++)
{
uint32_t x = (1u << j0 | 1u << j1 | 1u << j2);
int cte = generic_ctz32(x);
int ctw = ecb_ctz32(x);
int lde = generic_ld32(x);
int ldw = ecb_ld32(x);
if (dbg && (0 == j0 || 32 == j0))
printf("X32 %x ctz %d %d ld %d %d.\n", (unsigned
int)x, cte, ctw, lde, ldw);
if (cte != ctw || lde != ldw) abort();
}
if (dbg)
printf("T32 %d %d.\n", j0, j1);
}
for (int j0 = 0; j0 < 64; j0++)
for (int j1 = 0; j1 < 64; j1++)
{
for (int j2 = 0; j2 < 64; j2++)
{
uint64_t x = ((uint64_t)1 << j0 | (uint64_t)1 << j1 |
(uint64_t)1 << j2);
int cte = generic_ctz64(x);
int ctw = ecb_ctz64(x);
int lde = generic_ld64(x);
int ldw = ecb_ld64(x);
if (dbg && (0 == j0 || 63 == j0))
printf("X64 %llx ctz %d %d ld %d %d.\n", (unsigned
long long)x, cte, ctw, lde, ldw);
if (cte != ctw || lde != ldw) abort();
}
if (dbg)
printf("T64 %d %d.\n", j0, j1);
}
printf("Ctz test ok.\n");
}
int
main()
{
bitscantest_main();
return 0;
}
-------------- next part --------------
Index: ecb.h
===================================================================
--- ecb.h (.../git-master/Development/Swappear/projects/vecket/libecb/ecb.h) (revision 107)
+++ ecb.h (.../wall1/Development/Swappear/projects/vecket/libecb/ecb.h) (revision 125)
@@ -157,6 +157,10 @@
#include <builtins.h>
#endif
+#if 1400 <= _MSC_VER
+ #include <intrin.h> /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
+#endif
+
#ifndef ECB_MEMORY_FENCE
#if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
#if __i386 || __i386__
@@ -445,6 +449,11 @@
ecb_function_ ecb_const int
ecb_ctz32 (uint32_t x)
{
+#if 1400 <= _MSC_VER
+ unsigned long r = 0;
+ _BitScanForward(&r, x);
+ return (int)r;
+#else
int r = 0;
x &= ~x + 1; /* this isolates the lowest bit */
@@ -464,6 +473,7 @@
#endif
return r;
+#endif
}
ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
@@ -470,8 +480,14 @@
ecb_function_ ecb_const int
ecb_ctz64 (uint64_t x)
{
+#if 1400 <= _MSC_VER
+ unsigned long r = 0;
+ _BitScanForward64(&r, x);
+ return (int)r;
+#else
int shift = x & 0xffffffffU ? 0 : 32;
return ecb_ctz32 (x >> shift) + shift;
+#endif
}
ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
@@ -489,6 +505,11 @@
ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
{
+#if 1400 <= _MSC_VER
+ unsigned long r = 0;
+ _BitScanReverse(&r, x);
+ return (int)r;
+#else
int r = 0;
if (x >> 16) { x >>= 16; r += 16; }
@@ -498,16 +519,23 @@
if (x >> 1) { r += 1; }
return r;
+#endif
}
ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
{
+#if 1400 <= _MSC_VER
+ unsigned long r = 0;
+ _BitScanReverse64(&r, x);
+ return (int)r;
+#else
int r = 0;
if (x >> 32) { x >>= 32; r += 32; }
return r + ecb_ld32 (x);
+#endif
}
#endif
More information about the libev
mailing list