all repos — mgba @ 9197e5a1fbc8574ca26ce9d73b575cdeaebedccb

mGBA Game Boy Advance Emulator

src/third-party/discord-rpc/include/rapidjson/internal/diyfp.h (view raw)

  1// Tencent is pleased to support the open source community by making RapidJSON available.
  2// 
  3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
  4//
  5// Licensed under the MIT License (the "License"); you may not use this file except
  6// in compliance with the License. You may obtain a copy of the License at
  7//
  8// http://opensource.org/licenses/MIT
  9//
 10// Unless required by applicable law or agreed to in writing, software distributed 
 11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 12// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 13// specific language governing permissions and limitations under the License.
 14
 15// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
 16// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
 17// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
 18
 19#ifndef RAPIDJSON_DIYFP_H_
 20#define RAPIDJSON_DIYFP_H_
 21
 22#include "../rapidjson.h"
 23
 24#if defined(_MSC_VER) && defined(_M_AMD64)
 25#include <intrin.h>
 26#pragma intrinsic(_BitScanReverse64)
 27#pragma intrinsic(_umul128)
 28#endif
 29
 30RAPIDJSON_NAMESPACE_BEGIN
 31namespace internal {
 32
 33#ifdef __GNUC__
 34RAPIDJSON_DIAG_PUSH
 35RAPIDJSON_DIAG_OFF(effc++)
 36#endif
 37
 38#ifdef __clang__
 39RAPIDJSON_DIAG_PUSH
 40RAPIDJSON_DIAG_OFF(padded)
 41#endif
 42
 43struct DiyFp {
 44    DiyFp() : f(), e() {}
 45
 46    DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
 47
 48    explicit DiyFp(double d) {
 49        union {
 50            double d;
 51            uint64_t u64;
 52        } u = { d };
 53
 54        int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
 55        uint64_t significand = (u.u64 & kDpSignificandMask);
 56        if (biased_e != 0) {
 57            f = significand + kDpHiddenBit;
 58            e = biased_e - kDpExponentBias;
 59        } 
 60        else {
 61            f = significand;
 62            e = kDpMinExponent + 1;
 63        }
 64    }
 65
 66    DiyFp operator-(const DiyFp& rhs) const {
 67        return DiyFp(f - rhs.f, e);
 68    }
 69
 70    DiyFp operator*(const DiyFp& rhs) const {
 71#if defined(_MSC_VER) && defined(_M_AMD64)
 72        uint64_t h;
 73        uint64_t l = _umul128(f, rhs.f, &h);
 74        if (l & (uint64_t(1) << 63)) // rounding
 75            h++;
 76        return DiyFp(h, e + rhs.e + 64);
 77#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
 78        __extension__ typedef unsigned __int128 uint128;
 79        uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
 80        uint64_t h = static_cast<uint64_t>(p >> 64);
 81        uint64_t l = static_cast<uint64_t>(p);
 82        if (l & (uint64_t(1) << 63)) // rounding
 83            h++;
 84        return DiyFp(h, e + rhs.e + 64);
 85#else
 86        const uint64_t M32 = 0xFFFFFFFF;
 87        const uint64_t a = f >> 32;
 88        const uint64_t b = f & M32;
 89        const uint64_t c = rhs.f >> 32;
 90        const uint64_t d = rhs.f & M32;
 91        const uint64_t ac = a * c;
 92        const uint64_t bc = b * c;
 93        const uint64_t ad = a * d;
 94        const uint64_t bd = b * d;
 95        uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
 96        tmp += 1U << 31;  /// mult_round
 97        return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
 98#endif
 99    }
100
101    DiyFp Normalize() const {
102#if defined(_MSC_VER) && defined(_M_AMD64)
103        unsigned long index;
104        _BitScanReverse64(&index, f);
105        return DiyFp(f << (63 - index), e - (63 - index));
106#elif defined(__GNUC__) && __GNUC__ >= 4
107        int s = __builtin_clzll(f);
108        return DiyFp(f << s, e - s);
109#else
110        DiyFp res = *this;
111        while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
112            res.f <<= 1;
113            res.e--;
114        }
115        return res;
116#endif
117    }
118
119    DiyFp NormalizeBoundary() const {
120        DiyFp res = *this;
121        while (!(res.f & (kDpHiddenBit << 1))) {
122            res.f <<= 1;
123            res.e--;
124        }
125        res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
126        res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
127        return res;
128    }
129
130    void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
131        DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
132        DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
133        mi.f <<= mi.e - pl.e;
134        mi.e = pl.e;
135        *plus = pl;
136        *minus = mi;
137    }
138
139    double ToDouble() const {
140        union {
141            double d;
142            uint64_t u64;
143        }u;
144        const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 : 
145            static_cast<uint64_t>(e + kDpExponentBias);
146        u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
147        return u.d;
148    }
149
150    static const int kDiySignificandSize = 64;
151    static const int kDpSignificandSize = 52;
152    static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
153    static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
154    static const int kDpMinExponent = -kDpExponentBias;
155    static const int kDpDenormalExponent = -kDpExponentBias + 1;
156    static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
157    static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
158    static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
159
160    uint64_t f;
161    int e;
162};
163
164inline DiyFp GetCachedPowerByIndex(size_t index) {
165    // 10^-348, 10^-340, ..., 10^340
166    static const uint64_t kCachedPowers_F[] = {
167        RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
168        RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
169        RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
170        RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
171        RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
172        RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
173        RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
174        RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
175        RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
176        RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
177        RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
178        RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
179        RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
180        RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
181        RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
182        RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
183        RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
184        RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
185        RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
186        RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
187        RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
188        RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
189        RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
190        RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
191        RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
192        RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
193        RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
194        RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
195        RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
196        RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
197        RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
198        RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
199        RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
200        RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
201        RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
202        RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
203        RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
204        RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
205        RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
206        RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
207        RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
208        RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
209        RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
210        RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
211    };
212    static const int16_t kCachedPowers_E[] = {
213        -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
214        -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
215        -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
216        -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
217        -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
218        109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
219        375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
220        641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
221        907,   933,   960,   986,  1013,  1039,  1066
222    };
223    return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
224}
225    
226inline DiyFp GetCachedPower(int e, int* K) {
227
228    //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
229    double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
230    int k = static_cast<int>(dk);
231    if (dk - k > 0.0)
232        k++;
233
234    unsigned index = static_cast<unsigned>((k >> 3) + 1);
235    *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
236
237    return GetCachedPowerByIndex(index);
238}
239
240inline DiyFp GetCachedPower10(int exp, int *outExp) {
241     unsigned index = (static_cast<unsigned>(exp) + 348u) / 8u;
242     *outExp = -348 + static_cast<int>(index) * 8;
243     return GetCachedPowerByIndex(index);
244 }
245
246#ifdef __GNUC__
247RAPIDJSON_DIAG_POP
248#endif
249
250#ifdef __clang__
251RAPIDJSON_DIAG_POP
252RAPIDJSON_DIAG_OFF(padded)
253#endif
254
255} // namespace internal
256RAPIDJSON_NAMESPACE_END
257
258#endif // RAPIDJSON_DIYFP_H_