|
| 1 | +#pragma once |
| 2 | +/* |
| 3 | +* PCG Random Number Generation for C++ |
| 4 | +* |
| 5 | +* Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>, |
| 6 | +* and the PCG Project contributors. |
| 7 | +* |
| 8 | +* SPDX-License-Identifier: (Apache-2.0 OR MIT) |
| 9 | +* |
| 10 | +* Licensed under the Apache License, Version 2.0 (provided in |
| 11 | +* LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) |
| 12 | +* or under the MIT license (provided in LICENSE-MIT.txt and at |
| 13 | +* http://opensource.org/licenses/MIT), at your option. This file may not |
| 14 | +* be copied, modified, or distributed except according to those terms. |
| 15 | +* |
| 16 | +* Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either |
| 17 | +* express or implied. See your chosen license for details. |
| 18 | +* |
| 19 | +* For additional information about the PCG random number generation scheme, |
| 20 | +* visit http://www.pcg-random.org/. |
| 21 | +*/ |
| 22 | + |
| 23 | +/* |
| 24 | +* This file provides support code that is useful for random-number generation |
| 25 | +* but not specific to the PCG generation scheme, including: |
| 26 | +* - 128-bit int support for platforms where it isn't available natively |
| 27 | +* - bit twiddling operations |
| 28 | +* - I/O of 128-bit and 8-bit integers |
| 29 | +* - Handling the evilness of SeedSeq |
| 30 | +* - Support for efficiently producing random numbers less than a given |
| 31 | +* bound |
| 32 | +*/ |
| 33 | + |
| 34 | +/* |
| 35 | + * Abstractions for compiler-specific directives |
| 36 | + */ |
| 37 | + |
| 38 | + #ifdef __GNUC__ |
| 39 | + #define PCG_NOINLINE __attribute__((noinline)) |
| 40 | +#else |
| 41 | + #define PCG_NOINLINE |
| 42 | +#endif |
| 43 | + |
| 44 | +#ifdef _MSC_VER |
| 45 | +#define PCG_ALWAYS_INLINE __forceinline |
| 46 | +#elif __GNUC__ |
| 47 | +#define PCG_ALWAYS_INLINE __attribute__((always_inline)) |
| 48 | +#else |
| 49 | +#define PCG_ALWAYS_INLINE inline |
| 50 | +#endif |
| 51 | + |
| 52 | + |
| 53 | +/* |
| 54 | +* Some members of the PCG library use 128-bit math. When compiling on 64-bit |
| 55 | +* platforms, both GCC and Clang provide 128-bit integer types that are ideal |
| 56 | +* for the job. |
| 57 | +* |
| 58 | +* On 32-bit platforms (or with other compilers), we fall back to a C++ |
| 59 | +* class that provides 128-bit unsigned integers instead. It may seem |
| 60 | +* like we're reinventing the wheel here, because libraries already exist |
| 61 | +* that support large integers, but most existing libraries provide a very |
| 62 | +* generic multiprecision code, but here we're operating at a fixed size. |
| 63 | +* Also, most other libraries are fairly heavyweight. So we use a direct |
| 64 | +* implementation. Sadly, it's much slower than hand-coded assembly or |
| 65 | +* direct CPU support. |
| 66 | +* |
| 67 | +*/ |
| 68 | +#if __SIZEOF_INT128__ && !PCG_FORCE_EMULATED_128BIT_MATH |
| 69 | +namespace pcg_extras { |
| 70 | + using pcg128_t = __uint128_t; |
| 71 | + } |
| 72 | + #define PCG_128BIT_CONSTANT(high,low) \ |
| 73 | + ((pcg_extras::pcg128_t(high) << 64) + low) |
| 74 | +#elif __has_include(<__msvc_int128.hpp>) |
| 75 | + #include <__msvc_int128.hpp> |
| 76 | + namespace pcg_extras { |
| 77 | + using pcg128_t = std::_Unsigned128; |
| 78 | + } |
| 79 | + #define PCG_128BIT_CONSTANT(high,low) \ |
| 80 | + pcg_extras::pcg128_t(low, high) |
| 81 | +#else |
| 82 | + #include "pcg_uint128.hpp" |
| 83 | + #define PCG_128BIT_CONSTANT(high,low) \ |
| 84 | + pcg_extras::pcg128_t(high,low) |
| 85 | + #define PCG_EMULATED_128BIT_MATH 1 |
| 86 | +#endif |
| 87 | + |
| 88 | + |
| 89 | +namespace pcg_extras { |
| 90 | + |
| 91 | +/* |
| 92 | + * We often need to represent a "number of bits". When used normally, these |
| 93 | + * numbers are never greater than 128, so an unsigned char is plenty. |
| 94 | + * If you're using a nonstandard generator of a larger size, you can set |
| 95 | + * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers |
| 96 | + * might produce faster code if you set it to an unsigned int.) |
| 97 | + */ |
| 98 | + |
| 99 | +#ifndef PCG_BITCOUNT_T |
| 100 | + using bitcount_t = uint8_t; |
| 101 | +#else |
| 102 | + using bitcount_t = PCG_BITCOUNT_T; |
| 103 | +#endif |
| 104 | + |
| 105 | +/* |
| 106 | + * Useful bitwise operations. |
| 107 | + */ |
| 108 | + |
| 109 | +/* |
| 110 | + * XorShifts are invertable, but they are someting of a pain to invert. |
| 111 | + * This function backs them out. It's used by the whacky "inside out" |
| 112 | + * generator defined later. |
| 113 | + */ |
| 114 | + |
| 115 | + template <typename itype> |
| 116 | + inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) |
| 117 | + { |
| 118 | + if (2*shift >= bits) { |
| 119 | + return x ^ (x >> shift); |
| 120 | + } |
| 121 | + itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; |
| 122 | + itype highmask1 = ~lowmask1; |
| 123 | + itype top1 = x; |
| 124 | + itype bottom1 = x & lowmask1; |
| 125 | + top1 ^= top1 >> shift; |
| 126 | + top1 &= highmask1; |
| 127 | + x = top1 | bottom1; |
| 128 | + itype lowmask2 = (itype(1U) << (bits - shift)) - 1; |
| 129 | + itype bottom2 = x & lowmask2; |
| 130 | + bottom2 = unxorshift(bottom2, bits - shift, shift); |
| 131 | + bottom2 &= lowmask1; |
| 132 | + return top1 | bottom2; |
| 133 | + } |
| 134 | + |
| 135 | + /* |
| 136 | + * Rotate left and right. |
| 137 | + * |
| 138 | + * In ideal world, compilers would spot idiomatic rotate code and convert it |
| 139 | + * to a rotate instruction. Of course, opinions vary on what the correct |
| 140 | + * idiom is and how to spot it. For clang, sometimes it generates better |
| 141 | + * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. |
| 142 | + */ |
| 143 | + |
| 144 | + template <typename itype> |
| 145 | + inline itype rotl(itype value, bitcount_t rot) |
| 146 | + { |
| 147 | + constexpr bitcount_t bits = sizeof(itype) * 8; |
| 148 | + constexpr bitcount_t mask = bits - 1; |
| 149 | + #if PCG_USE_ZEROCHECK_ROTATE_IDIOM |
| 150 | + return rot ? (value << rot) | (value >> (bits - rot)) : value; |
| 151 | + #else |
| 152 | + return (value << rot) | (value >> ((- rot) & mask)); |
| 153 | + #endif |
| 154 | + } |
| 155 | + |
| 156 | + template <typename itype> |
| 157 | + inline itype rotr(itype value, bitcount_t rot) |
| 158 | + { |
| 159 | + constexpr bitcount_t bits = sizeof(itype) * 8; |
| 160 | + constexpr bitcount_t mask = bits - 1; |
| 161 | + #if PCG_USE_ZEROCHECK_ROTATE_IDIOM |
| 162 | + return rot ? (value >> rot) | (value << (bits - rot)) : value; |
| 163 | + #else |
| 164 | + return (value >> rot) | (value << ((- rot) & mask)); |
| 165 | + #endif |
| 166 | + } |
| 167 | + |
| 168 | + /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes |
| 169 | + * to properly recognizing idiomatic rotate code, so for we also provide |
| 170 | + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. |
| 171 | + * (I hope that these compilers get better so that this code can die.) |
| 172 | + * |
| 173 | + * These overloads will be preferred over the general template code above. |
| 174 | + */ |
| 175 | + #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) |
| 176 | + |
| 177 | + inline uint8_t rotr(uint8_t value, bitcount_t rot) |
| 178 | + { |
| 179 | + asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| 180 | + return value; |
| 181 | + } |
| 182 | + |
| 183 | + inline uint16_t rotr(uint16_t value, bitcount_t rot) |
| 184 | + { |
| 185 | + asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| 186 | + return value; |
| 187 | + } |
| 188 | + |
| 189 | + inline uint32_t rotr(uint32_t value, bitcount_t rot) |
| 190 | + { |
| 191 | + asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| 192 | + return value; |
| 193 | + } |
| 194 | + |
| 195 | + #if __x86_64__ |
| 196 | + inline uint64_t rotr(uint64_t value, bitcount_t rot) |
| 197 | + { |
| 198 | + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| 199 | + return value; |
| 200 | + } |
| 201 | + #endif // __x86_64__ |
| 202 | + |
| 203 | + #elif defined(_MSC_VER) |
| 204 | + // Use MSVC++ bit rotation intrinsics |
| 205 | + |
| 206 | + #pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16) |
| 207 | + |
| 208 | + inline uint8_t rotr(uint8_t value, bitcount_t rot) |
| 209 | + { |
| 210 | + return _rotr8(value, rot); |
| 211 | + } |
| 212 | + |
| 213 | + inline uint16_t rotr(uint16_t value, bitcount_t rot) |
| 214 | + { |
| 215 | + return _rotr16(value, rot); |
| 216 | + } |
| 217 | + |
| 218 | + inline uint32_t rotr(uint32_t value, bitcount_t rot) |
| 219 | + { |
| 220 | + return _rotr(value, rot); |
| 221 | + } |
| 222 | + |
| 223 | + inline uint64_t rotr(uint64_t value, bitcount_t rot) |
| 224 | + { |
| 225 | + return _rotr64(value, rot); |
| 226 | + } |
| 227 | + |
| 228 | + #endif // PCG_USE_INLINE_ASM |
| 229 | + |
| 230 | +} |
0 commit comments