libecb: PATCH ecb_ld* to use MSC intrinsics

Zsbán Ambrus ambrus at math.bme.hu
Thu Nov 19 14:32:10 CET 2015


Hello.

I attach an attempt to patch libecb so that it uses the MSC (MS
compiler) intrinsic functions to implement the bit manipulation
functions ecb_ld32, ecb_ld64, ecb_ctz32, ecb_ctz64.

The patch seems to work for me on MSC, but needs a bit more testing on
other compilers to make sure I didn't accidentally break the code.

I suggest using the code below to test the return values of the
functions.  Test in C and C++ mode.  This may be worth to add to a
separate test file distributed with libecb, just like the previous
test file for the alignment stuff.

-- Ambrus



/* Test ctz and ld functions  */
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include "ecb.h"

ecb_inline int generic_ctz32(uint32_t x)
{
    int r = 0;
    x &= ~x + 1; /* this isolates the lowest bit */
    if (x & 0xaaaaaaaa) r +=  1;
    if (x & 0xcccccccc) r +=  2;
    if (x & 0xf0f0f0f0) r +=  4;
    if (x & 0xff00ff00) r +=  8;
    if (x & 0xffff0000) r += 16;
    return r;
}

ecb_inline int generic_ld32(uint32_t x)
{
    int r = 0;
    if (x >> 16) { x >>= 16; r += 16; }
    if (x >>  8) { x >>=  8; r +=  8; }
    if (x >>  4) { x >>=  4; r +=  4; }
    if (x >>  2) { x >>=  2; r +=  2; }
    if (x >>  1) {           r +=  1; }
    return r;
}

ecb_inline int generic_ctz64 (uint64_t x)
{
    int shift = x & 0xffffffffU ? 0 : 32;
    return generic_ctz32 (x >> shift) + shift;
}

ecb_inline int generic_ld64 (uint64_t x)
{
    int r = 0;
    if (x >> 32) { x >>= 32; r += 32; }
    return r + generic_ld32 (x);
}

void
bitscantest_main()
{
    printf("Ctz test start.\n");
    const int dbg = 0;
    for (int j0 = 0; j0 < 32; j0++)
    for (int j1 = 0; j1 < 32; j1++)
    {
        for (int j2 = 0; j2 < 32; j2++)
        {
            uint32_t x = (1u << j0 | 1u << j1 | 1u << j2);
            int cte = generic_ctz32(x);
            int ctw = ecb_ctz32(x);
            int lde = generic_ld32(x);
            int ldw = ecb_ld32(x);
            if (dbg && (0 == j0 || 32 == j0))
                printf("X32 %x ctz %d %d ld %d %d.\n", (unsigned
int)x, cte, ctw, lde, ldw);
            if (cte != ctw || lde != ldw) abort();
        }
        if (dbg)
            printf("T32 %d %d.\n", j0, j1);
    }
    for (int j0 = 0; j0 < 64; j0++)
    for (int j1 = 0; j1 < 64; j1++)
    {
        for (int j2 = 0; j2 < 64; j2++)
        {
            uint64_t x = ((uint64_t)1 << j0 | (uint64_t)1 << j1 |
(uint64_t)1 << j2);
            int cte = generic_ctz64(x);
            int ctw = ecb_ctz64(x);
            int lde = generic_ld64(x);
            int ldw = ecb_ld64(x);
            if (dbg && (0 == j0 || 63 == j0))
                printf("X64 %llx ctz %d %d ld %d %d.\n", (unsigned
long long)x, cte, ctw, lde, ldw);
            if (cte != ctw || lde != ldw) abort();
        }
        if (dbg)
            printf("T64 %d %d.\n", j0, j1);
    }
    printf("Ctz test ok.\n");
}

int
main()
{
    bitscantest_main();
    return 0;
}
-------------- next part --------------
Index: ecb.h
===================================================================
--- ecb.h	(.../git-master/Development/Swappear/projects/vecket/libecb/ecb.h)	(revision 107)
+++ ecb.h	(.../wall1/Development/Swappear/projects/vecket/libecb/ecb.h)	(revision 125)
@@ -157,6 +157,10 @@
   #include <builtins.h>
 #endif
 
+#if 1400 <= _MSC_VER
+  #include <intrin.h> /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
+#endif
+
 #ifndef ECB_MEMORY_FENCE
   #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
     #if __i386 || __i386__
@@ -445,6 +449,11 @@
   ecb_function_ ecb_const int
   ecb_ctz32 (uint32_t x)
   {
+#if 1400 <= _MSC_VER
+  	unsigned long r = 0;
+	_BitScanForward(&r, x);
+	return (int)r;
+#else
     int r = 0;
 
     x &= ~x + 1; /* this isolates the lowest bit */
@@ -464,6 +473,7 @@
 #endif
 
     return r;
+#endif
   }
 
   ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
@@ -470,8 +480,14 @@
   ecb_function_ ecb_const int
   ecb_ctz64 (uint64_t x)
   {
+#if 1400 <= _MSC_VER
+  	unsigned long r = 0;
+	_BitScanForward64(&r, x);
+	return (int)r;
+#else
     int shift = x & 0xffffffffU ? 0 : 32;
     return ecb_ctz32 (x >> shift) + shift;
+#endif
   }
 
   ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
@@ -489,6 +505,11 @@
   ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
   ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
   {
+#if 1400 <= _MSC_VER
+  	unsigned long r = 0;
+	_BitScanReverse(&r, x);
+	return (int)r;
+#else
     int r = 0;
 
     if (x >> 16) { x >>= 16; r += 16; }
@@ -498,16 +519,23 @@
     if (x >>  1) {           r +=  1; }
 
     return r;
+#endif
   }
 
   ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
   ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
   {
+#if 1400 <= _MSC_VER
+  	unsigned long r = 0;
+	_BitScanReverse64(&r, x);
+	return (int)r;
+#else
     int r = 0;
 
     if (x >> 32) { x >>= 32; r += 32; }
 
     return r + ecb_ld32 (x);
+#endif
   }
 #endif
 


More information about the libev mailing list