Branch data Line data Source code
1 : : /* Bit and bytes utilities.
2 : :
3 : : Bytes swap functions, reverse order of bytes:
4 : :
5 : : - _Py_bswap16(uint16_t)
6 : : - _Py_bswap32(uint32_t)
7 : : - _Py_bswap64(uint64_t)
8 : : */
9 : :
10 : : #ifndef Py_INTERNAL_BITUTILS_H
11 : : #define Py_INTERNAL_BITUTILS_H
12 : : #ifdef __cplusplus
13 : : extern "C" {
14 : : #endif
15 : :
16 : : #ifndef Py_BUILD_CORE
17 : : # error "this header requires Py_BUILD_CORE define"
18 : : #endif
19 : :
20 : : #if defined(__GNUC__) \
21 : : && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22 : : /* __builtin_bswap16() is available since GCC 4.8,
23 : : __builtin_bswap32() is available since GCC 4.3,
24 : : __builtin_bswap64() is available since GCC 4.3. */
25 : : # define _PY_HAVE_BUILTIN_BSWAP
26 : : #endif
27 : :
28 : : #ifdef _MSC_VER
29 : : /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
30 : : # include <intrin.h>
31 : : #endif
32 : :
33 : : static inline uint16_t
34 : 17 : _Py_bswap16(uint16_t word)
35 : : {
36 : : #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
37 : 17 : return __builtin_bswap16(word);
38 : : #elif defined(_MSC_VER)
39 : : Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40 : : return _byteswap_ushort(word);
41 : : #else
42 : : // Portable implementation which doesn't rely on circular bit shift
43 : : return ( ((word & UINT16_C(0x00FF)) << 8)
44 : : | ((word & UINT16_C(0xFF00)) >> 8));
45 : : #endif
46 : : }
47 : :
48 : : static inline uint32_t
49 : 1508338 : _Py_bswap32(uint32_t word)
50 : : {
51 : : #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
52 : 1508338 : return __builtin_bswap32(word);
53 : : #elif defined(_MSC_VER)
54 : : Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55 : : return _byteswap_ulong(word);
56 : : #else
57 : : // Portable implementation which doesn't rely on circular bit shift
58 : : return ( ((word & UINT32_C(0x000000FF)) << 24)
59 : : | ((word & UINT32_C(0x0000FF00)) << 8)
60 : : | ((word & UINT32_C(0x00FF0000)) >> 8)
61 : : | ((word & UINT32_C(0xFF000000)) >> 24));
62 : : #endif
63 : : }
64 : :
65 : : static inline uint64_t
66 : 754845 : _Py_bswap64(uint64_t word)
67 : : {
68 : : #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
69 : 754845 : return __builtin_bswap64(word);
70 : : #elif defined(_MSC_VER)
71 : : return _byteswap_uint64(word);
72 : : #else
73 : : // Portable implementation which doesn't rely on circular bit shift
74 : : return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
75 : : | ((word & UINT64_C(0x000000000000FF00)) << 40)
76 : : | ((word & UINT64_C(0x0000000000FF0000)) << 24)
77 : : | ((word & UINT64_C(0x00000000FF000000)) << 8)
78 : : | ((word & UINT64_C(0x000000FF00000000)) >> 8)
79 : : | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
80 : : | ((word & UINT64_C(0x00FF000000000000)) >> 40)
81 : : | ((word & UINT64_C(0xFF00000000000000)) >> 56));
82 : : #endif
83 : : }
84 : :
85 : :
86 : : // Population count: count the number of 1's in 'x'
87 : : // (number of bits set to 1), also known as the hamming weight.
88 : : //
89 : : // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90 : : // can be used, to keep the implementation simple. For example, Visual Studio
91 : : // __popcnt() is not used this reason. The clang and GCC builtin function can
92 : : // use the x86 POPCNT instruction if the target architecture has SSE4a or
93 : : // newer.
94 : : static inline int
95 : 364864 : _Py_popcount32(uint32_t x)
96 : : {
97 : : #if (defined(__clang__) || defined(__GNUC__))
98 : :
99 : : #if SIZEOF_INT >= 4
100 : : Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101 : 364864 : return __builtin_popcount(x);
102 : : #else
103 : : // The C standard guarantees that unsigned long will always be big enough
104 : : // to hold a uint32_t value without losing information.
105 : : Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106 : : return __builtin_popcountl(x);
107 : : #endif
108 : :
109 : : #else
110 : : // 32-bit SWAR (SIMD Within A Register) popcount
111 : :
112 : : // Binary: 0 1 0 1 ...
113 : : const uint32_t M1 = 0x55555555;
114 : : // Binary: 00 11 00 11. ..
115 : : const uint32_t M2 = 0x33333333;
116 : : // Binary: 0000 1111 0000 1111 ...
117 : : const uint32_t M4 = 0x0F0F0F0F;
118 : :
119 : : // Put count of each 2 bits into those 2 bits
120 : : x = x - ((x >> 1) & M1);
121 : : // Put count of each 4 bits into those 4 bits
122 : : x = (x & M2) + ((x >> 2) & M2);
123 : : // Put count of each 8 bits into those 8 bits
124 : : x = (x + (x >> 4)) & M4;
125 : : // Sum of the 4 byte counts.
126 : : // Take care when considering changes to the next line. Portability and
127 : : // correctness are delicate here, thanks to C's "integer promotions" (C99
128 : : // §6.3.1.1p2). On machines where the `int` type has width greater than 32
129 : : // bits, `x` will be promoted to an `int`, and following C's "usual
130 : : // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
131 : : // performed as a multiplication of two `unsigned int` operands. In this
132 : : // case it's critical that we cast back to `uint32_t` in order to keep only
133 : : // the least significant 32 bits. On machines where the `int` type has
134 : : // width no greater than 32, the multiplication is of two 32-bit unsigned
135 : : // integer types, and the (uint32_t) cast is a no-op. In both cases, we
136 : : // avoid the risk of undefined behaviour due to overflow of a
137 : : // multiplication of signed integer types.
138 : : return (uint32_t)(x * 0x01010101U) >> 24;
139 : : #endif
140 : : }
141 : :
142 : :
143 : : // Return the index of the most significant 1 bit in 'x'. This is the smallest
144 : : // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
145 : : static inline int
146 : 22545182 : _Py_bit_length(unsigned long x)
147 : : {
148 : : #if (defined(__clang__) || defined(__GNUC__))
149 [ + + ]: 22545182 : if (x != 0) {
150 : : // __builtin_clzl() is available since GCC 3.4.
151 : : // Undefined behavior for x == 0.
152 : 22545181 : return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
153 : : }
154 : : else {
155 : 1 : return 0;
156 : : }
157 : : #elif defined(_MSC_VER)
158 : : // _BitScanReverse() is documented to search 32 bits.
159 : : Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
160 : : unsigned long msb;
161 : : if (_BitScanReverse(&msb, x)) {
162 : : return (int)msb + 1;
163 : : }
164 : : else {
165 : : return 0;
166 : : }
167 : : #else
168 : : const int BIT_LENGTH_TABLE[32] = {
169 : : 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
170 : : 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
171 : : };
172 : : int msb = 0;
173 : : while (x >= 32) {
174 : : msb += 6;
175 : : x >>= 6;
176 : : }
177 : : msb += BIT_LENGTH_TABLE[x];
178 : : return msb;
179 : : #endif
180 : : }
181 : :
182 : :
183 : : #ifdef __cplusplus
184 : : }
185 : : #endif
186 : : #endif /* !Py_INTERNAL_BITUTILS_H */
|