diff --git a/external_tools/WHFC b/external_tools/WHFC index d49400981..5ae2e3664 160000 --- a/external_tools/WHFC +++ b/external_tools/WHFC @@ -1 +1 @@ -Subproject commit d494009813c7a7b2432adc5c3f08189fca37405b +Subproject commit 5ae2e3664391ca0db7fab2c82973e98c48937a08 diff --git a/external_tools/kahypar-shared-resources b/external_tools/kahypar-shared-resources index 13585e093..6d5c8e244 160000 --- a/external_tools/kahypar-shared-resources +++ b/external_tools/kahypar-shared-resources @@ -1 +1 @@ -Subproject commit 13585e09300c3d0ab80568c84e72543d1b8eb084 +Subproject commit 6d5c8e2444e4310667ec1925e995f26179d7ee88 diff --git a/external_tools/pcg/pcg_extras.hpp b/external_tools/pcg/pcg_extras.hpp deleted file mode 100644 index 23a416fde..000000000 --- a/external_tools/pcg/pcg_extras.hpp +++ /dev/null @@ -1,637 +0,0 @@ -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014 Melissa O'Neill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For additional information about the PCG random number generation scheme, - * including its license and other licensing options, visit - * - * http://www.pcg-random.org - */ - -/* - * This file provides support code that is useful for random-number generation - * but not specific to the PCG generation scheme, including: - * - 128-bit int support for platforms where it isn't available natively - * - bit twiddling operations - * - I/O of 128-bit and 8-bit integers - * - Handling the evilness of SeedSeq - * - Support for efficiently producing random numbers less than a given - * bound - */ - -#ifndef PCG_EXTRAS_HPP_INCLUDED -#define PCG_EXTRAS_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __GNUC__ - #include -#endif - -/* - * Abstractions for compiler-specific directives - */ - -#ifdef __GNUC__ - #define PCG_NOINLINE __attribute__((noinline)) -#else - #define PCG_NOINLINE -#endif - -/* - * Some members of the PCG library use 128-bit math. When compiling on 64-bit - * platforms, both GCC and Clang provide 128-bit integer types that are ideal - * for the job. - * - * On 32-bit platforms (or with other compilers), we fall back to a C++ - * class that provides 128-bit unsigned integers instead. It may seem - * like we're reinventing the wheel here, because libraries already exist - * that support large integers, but most existing libraries provide a very - * generic multiprecision code, but here we're operating at a fixed size. - * Also, most other libraries are fairly heavyweight. So we use a direct - * implementation. Sadly, it's much slower than hand-coded assembly or - * direct CPU support. - * - */ -#if __SIZEOF_INT128__ - namespace pcg_extras { - typedef __uint128_t pcg128_t; - } - #define PCG_128BIT_CONSTANT(high,low) \ - ((pcg128_t(high) << 64) + low) -#else - #include "pcg_uint128.hpp" - namespace pcg_extras { - typedef pcg_extras::uint_x4 pcg128_t; - } - #define PCG_128BIT_CONSTANT(high,low) \ - pcg128_t(high,low) - #define PCG_EMULATED_128BIT_MATH 1 -#endif - - -namespace pcg_extras { - -/* - * We often need to represent a "number of bits". When used normally, these - * numbers are never greater than 128, so an unsigned char is plenty. - * If you're using a nonstandard generator of a larger size, you can set - * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers - * might produce faster code if you set it to an unsigned int.) - */ - -#ifndef PCG_BITCOUNT_T - typedef uint8_t bitcount_t; -#else - typedef PCG_BITCOUNT_T bitcount_t; -#endif - -/* - * C++ requires us to be able to serialize RNG state by printing or reading - * it from a stream. Because we use 128-bit ints, we also need to be able - * ot print them, so here is code to do so. - * - * This code provides enough functionality to print 128-bit ints in decimal - * and zero-padded in hex. It's not a full-featured implementation. - */ - -template -std::basic_ostream& -operator<<(std::basic_ostream& out, pcg128_t value) -{ - auto desired_base = out.flags() & out.basefield; - bool want_hex = desired_base == out.hex; - - if (want_hex) { - uint64_t highpart = uint64_t(value >> 64); - uint64_t lowpart = uint64_t(value); - auto desired_width = out.width(); - if (desired_width > 16) { - out.width(desired_width - 16); - } - if (highpart != 0 || desired_width > 16) - out << highpart; - CharT oldfill; - if (highpart != 0) { - out.width(16); - oldfill = out.fill('0'); - } - auto oldflags = out.setf(decltype(desired_base){}, out.showbase); - out << lowpart; - out.setf(oldflags); - if (highpart != 0) { - out.fill(oldfill); - } - return out; - } - constexpr size_t MAX_CHARS_128BIT = 40; - - char buffer[MAX_CHARS_128BIT]; - char* pos = buffer+sizeof(buffer); - *(--pos) = '\0'; - constexpr auto BASE = pcg128_t(10ULL); - do { - auto div = value / BASE; - auto mod = uint32_t(value - (div * BASE)); - *(--pos) = '0' + mod; - value = div; - } while(value != pcg128_t(0ULL)); - return out << pos; -} - -template -std::basic_istream& -operator>>(std::basic_istream& in, pcg128_t& value) -{ - typename std::basic_istream::sentry s(in); - - if (!s) - return in; - - constexpr auto BASE = pcg128_t(10ULL); - pcg128_t current(0ULL); - bool did_nothing = true; - bool overflow = false; - for(;;) { - CharT wide_ch = in.get(); - if (!in.good()) - break; - auto ch = in.narrow(wide_ch, '\0'); - if (ch < '0' || ch > '9') { - in.unget(); - break; - } - did_nothing = false; - pcg128_t digit(uint32_t(ch - '0')); - pcg128_t timesbase = current*BASE; - overflow = overflow || timesbase < current; - current = timesbase + digit; - overflow = overflow || current < digit; - } - - if (did_nothing || overflow) { - in.setstate(std::ios::failbit); - if (overflow) - current = ~pcg128_t(0ULL); - } - - value = current; - - return in; -} - -/* - * Likewise, if people use tiny rngs, we'll be serializing uint8_t. - * If we just used the provided IO operators, they'd read/write chars, - * not ints, so we need to define our own. We *can* redefine this operator - * here because we're in our own namespace. - */ - -template -std::basic_ostream& -operator<<(std::basic_ostream&out, uint8_t value) -{ - return out << uint32_t(value); -} - -template -std::basic_istream& -operator>>(std::basic_istream& in, uint8_t& target) -{ - uint32_t value = 0xdecea5edU; - in >> value; - if (!in && value == 0xdecea5edU) - return in; - if (value > uint8_t(~0)) { - in.setstate(std::ios::failbit); - value = ~0U; - } - target = uint8_t(value); - return in; -} - -/* Unfortunately, the above functions don't get found in preference to the - * built in ones, so we create some more specific overloads that will. - * Ugh. - */ - -inline std::ostream& operator<<(std::ostream& out, uint8_t value) -{ - return pcg_extras::operator<< (out, value); -} - -inline std::istream& operator>>(std::istream& in, uint8_t& value) -{ - return pcg_extras::operator>> (in, value); -} - - - -/* - * Useful bitwise operations. - */ - -/* - * XorShifts are invertable, but they are someting of a pain to invert. - * This function backs them out. It's used by the whacky "inside out" - * generator defined later. - */ - -template -inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) -{ - if (2*shift >= bits) { - return x ^ (x >> shift); - } - itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; - itype highmask1 = ~lowmask1; - itype top1 = x; - itype bottom1 = x & lowmask1; - top1 ^= top1 >> shift; - top1 &= highmask1; - x = top1 | bottom1; - itype lowmask2 = (itype(1U) << (bits - shift)) - 1; - itype bottom2 = x & lowmask2; - bottom2 = unxorshift(bottom2, bits - shift, shift); - bottom2 &= lowmask1; - return top1 | bottom2; -} - -/* - * Rotate left and right. - * - * In ideal world, compilers would spot idiomatic rotate code and convert it - * to a rotate instruction. Of course, opinions vary on what the correct - * idiom is and how to spot it. For clang, sometimes it generates better - * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. - */ - -template -inline itype rotl(itype value, bitcount_t rot) -{ - constexpr bitcount_t bits = sizeof(itype) * 8; - constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM - return rot ? (value << rot) | (value >> (bits - rot)) : value; -#else - return (value << rot) | (value >> ((- rot) & mask)); -#endif -} - -template -inline itype rotr(itype value, bitcount_t rot) -{ - constexpr bitcount_t bits = sizeof(itype) * 8; - constexpr bitcount_t mask = bits - 1; -#if PCG_USE_ZEROCHECK_ROTATE_IDIOM - return rot ? (value >> rot) | (value << (bits - rot)) : value; -#else - return (value >> rot) | (value << ((- rot) & mask)); -#endif -} - -/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes - * to properly recognizing idiomatic rotate code, so for we also provide - * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. - * (I hope that these compilers get better so that this code can die.) - * - * These overloads will be preferred over the general template code above. - */ -#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) - -inline uint8_t rotr(uint8_t value, bitcount_t rot) -{ - asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} - -inline uint16_t rotr(uint16_t value, bitcount_t rot) -{ - asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} - -inline uint32_t rotr(uint32_t value, bitcount_t rot) -{ - asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} - -#if __x86_64__ -inline uint64_t rotr(uint64_t value, bitcount_t rot) -{ - asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); - return value; -} -#endif // __x86_64__ - -#endif // PCG_USE_INLINE_ASM - - -/* - * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of - * 32-bit integers with seed data, but sometimes we want to produce - * larger or smaller integers. - * - * The following code handles this annoyance. - * - * uneven_copy will copy an array of 32-bit ints to an array of larger or - * smaller ints (actually, the code is general it only needing forward - * iterators). The copy is identical to the one that would be performed if - * we just did memcpy on a standard little-endian machine, but works - * regardless of the endian of the machine (or the weirdness of the ints - * involved). - * - * generate_to initializes an array of integers using a SeedSeq - * object. It is given the size as a static constant at compile time and - * tries to avoid memory allocation. If we're filling in 32-bit constants - * we just do it directly. If we need a separate buffer and it's small, - * we allocate it on the stack. Otherwise, we fall back to heap allocation. - * Ugh. - * - * generate_one produces a single value of some integral type using a - * SeedSeq object. - */ - - /* uneven_copy helper, case where destination ints are less than 32 bit. */ - -template -SrcIter uneven_copy_impl( - SrcIter src_first, DestIter dest_first, DestIter dest_last, - std::true_type) -{ - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; - - constexpr bitcount_t SRC_SIZE = sizeof(src_t); - constexpr bitcount_t DEST_SIZE = sizeof(dest_t); - constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; - constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; - - size_t count = 0; - src_t value; - - while (dest_first != dest_last) { - if ((count++ % SCALE) == 0) - value = *src_first++; // Get more bits - else - value >>= DEST_BITS; // Move down bits - - *dest_first++ = dest_t(value); // Truncates, ignores high bits. - } - return src_first; -} - - /* uneven_copy helper, case where destination ints are more than 32 bit. */ - -template -SrcIter uneven_copy_impl( - SrcIter src_first, DestIter dest_first, DestIter dest_last, - std::false_type) -{ - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; - - constexpr auto SRC_SIZE = sizeof(src_t); - constexpr auto SRC_BITS = SRC_SIZE * 8; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; - - while (dest_first != dest_last) { - dest_t value(0UL); - unsigned int shift = 0; - - for (size_t i = 0; i < SCALE; ++i) { - value |= dest_t(*src_first++) << shift; - shift += SRC_BITS; - } - - *dest_first++ = value; - } - return src_first; -} - -/* uneven_copy, call the right code for larger vs. smaller */ - -template -inline SrcIter uneven_copy(SrcIter src_first, - DestIter dest_first, DestIter dest_last) -{ - typedef typename std::iterator_traits::value_type src_t; - typedef typename std::iterator_traits::value_type dest_t; - - constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); - - return uneven_copy_impl(src_first, dest_first, dest_last, - std::integral_constant{}); -} - -/* generate_to, fill in a fixed-size array of integral type using a SeedSeq - * (actually works for any random-access iterator) - */ - -template -inline void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::true_type) -{ - generator.generate(dest, dest+size); -} - -template -void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::false_type) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto GEN_SIZE = sizeof(uint32_t); - - constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; - constexpr size_t FROM_ELEMS = - GEN_IS_SMALLER - ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) - : (size + (GEN_SIZE / DEST_SIZE) - 1) - / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); - // this odd code ^^^^^^^^^^^^^^^^^ is work-around for - // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 - - if (FROM_ELEMS <= 1024) { - uint32_t buffer[FROM_ELEMS]; - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - } else { - uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS); - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - free(buffer); - } -} - -template -inline void generate_to(SeedSeq&& generator, DestIter dest) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); - - generate_to_impl(std::forward(generator), dest, - std::integral_constant{}); -} - -/* generate_one, produce a value of integral type using a SeedSeq - * (optionally, we can have it produce more than one and pick which one - * we want) - */ - -template -inline UInt generate_one(SeedSeq&& generator) -{ - UInt result[N]; - generate_to(std::forward(generator), result); - return result[i]; -} - -template -auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) - -> typename RngType::result_type -{ - typedef typename RngType::result_type rtype; - rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) - % upper_bound; - for (;;) { - rtype r = rng() - RngType::min(); - if (r >= threshold) - return r % upper_bound; - } -} - -template -void shuffle(Iter from, Iter to, RandType&& rng) -{ - typedef typename std::iterator_traits::difference_type delta_t; - auto count = to - from; - while (count > 1) { - delta_t chosen(bounded_rand(rng, count)); - --count; - --to; - using std::swap; - swap(*(from+chosen), *to); - } -} - -/* - * Although std::seed_seq is useful, it isn't everything. Often we want to - * initialize a random-number generator some other way, such as from a random - * device. - * - * Technically, it does not meet the requirements of a SeedSequence because - * it lacks some of the rarely-used member functions (some of which would - * be impossible to provide). However the C++ standard is quite specific - * that actual engines only called the generate method, so it ought not to be - * a problem in practice. - */ - -template -class seed_seq_from { -private: - RngType rng_; - - typedef uint_least32_t result_type; - -public: - template - seed_seq_from(Args&&... args) : - rng_(std::forward(args)...) - { - // Nothing (else) to do... - } - - template - void generate(Iter start, Iter finish) - { - for (auto i = start; i != finish; ++i) - *i = result_type(rng_()); - } - - constexpr size_t size() const - { - return (sizeof(typename RngType::result_type) > sizeof(result_type) - && RngType::max() > ~size_t(0UL)) - ? ~size_t(0UL) - : size_t(RngType::max()); - } -}; - -/* - * Sometimes you might want a distinct seed based on when the program - * was compiled. That way, a particular instance of the program will - * behave the same way, but when recompiled it'll produce a different - * value. - */ - -template -struct static_arbitrary_seed { -private: - static constexpr IntType fnv(IntType hash, const char* pos) { - return *pos == '\0' - ? hash - : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); - } - -public: - static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), - __DATE__ __TIME__ __FILE__); -}; - -// Sometimes, when debugging or testing, it's handy to be able print the name -// of a (in human-readable form). This code allows the idiom: -// -// cout << printable_typename() -// -// to print out my_foo_type_t (or its concrete type if it is a synonym) - -template -struct printable_typename {}; - -template -std::ostream& operator<<(std::ostream& out, printable_typename) { - const char *implementation_typename = typeid(T).name(); -#ifdef __GNUC__ - int status; - const char* pretty_name = - abi::__cxa_demangle(implementation_typename, NULL, NULL, &status); - if (status == 0) - out << pretty_name; - free((void*) pretty_name); - if (status == 0) - return out; -#endif - out << implementation_typename; - return out; -} - -} // namespace pcg_extras - -#endif // PCG_EXTRAS_HPP_INCLUDED diff --git a/external_tools/pcg/pcg_random.hpp b/external_tools/pcg/pcg_random.hpp deleted file mode 100644 index 3f04d854e..000000000 --- a/external_tools/pcg/pcg_random.hpp +++ /dev/null @@ -1,1751 +0,0 @@ -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014 Melissa O'Neill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For additional information about the PCG random number generation scheme, - * including its license and other licensing options, visit - * - * http://www.pcg-random.org - */ - -/* - * This code provides the reference implementation of the PCG family of - * random number generators. The code is complex because it implements - * - * - several members of the PCG family, specifically members corresponding - * to the output functions: - * - XSH RR (good for 64-bit state, 32-bit output) - * - XSH RS (good for 64-bit state, 32-bit output) - * - XSL RR (good for 128-bit state, 64-bit output) - * - RXS M XS (statistically most powerful generator) - * - XSL RR RR (good for 128-bit state, 128-bit output) - * - and RXS, RXS M, XSH, XSL (mostly for testing) - * - at potentially *arbitrary* bit sizes - * - with four different techniques for random streams (MCG, one-stream - * LCG, settable-stream LCG, unique-stream LCG) - * - and the extended generation schemes allowing arbitrary periods - * - with all features of C++11 random number generation (and more), - * some of which are somewhat painful, including - * - initializing with a SeedSequence which writes 32-bit values - * to memory, even though the state of the generator may not - * use 32-bit values (it might use smaller or larger integers) - * - I/O for RNGs and a prescribed format, which needs to handle - * the issue that 8-bit and 128-bit integers don't have working - * I/O routines (e.g., normally 8-bit = char, not integer) - * - equality and inequality for RNGs - * - and a number of convenience typedefs to mask all the complexity - * - * The code employes a fairly heavy level of abstraction, and has to deal - * with various C++ minutia. If you're looking to learn about how the PCG - * scheme works, you're probably best of starting with one of the other - * codebases (see www.pcg-random.org). But if you're curious about the - * constants for the various output functions used in those other, simpler, - * codebases, this code shows how they are calculated. - * - * On the positive side, at least there are convenience typedefs so that you - * can say - * - * pcg32 myRNG; - * - * rather than: - * - * pcg_detail::engine< - * uint32_t, // Output Type - * uint64_t, // State Type - * pcg_detail::xsh_rr_mixin, true, // Output Func - * pcg_detail::specific_stream, // Stream Kind - * pcg_detail::default_multiplier // LCG Mult - * > myRNG; - * - */ - -#ifndef PCG_RAND_HPP_INCLUDED -#define PCG_RAND_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * The pcg_extras namespace contains some support code that is likley to - * be useful for a variety of RNGs, including: - * - 128-bit int support for platforms where it isn't available natively - * - bit twiddling operations - * - I/O of 128-bit and 8-bit integers - * - Handling the evilness of SeedSeq - * - Support for efficiently producing random numbers less than a given - * bound - */ - -#include "pcg_extras.hpp" - -namespace pcg_detail { - -using namespace pcg_extras; - -/* - * The LCG generators need some constants to function. This code lets you - * look up the constant by *type*. For example - * - * default_multiplier::multiplier() - * - * gives you the default multipler for 32-bit integers. We use the name - * of the constant and not a generic word like value to allow these classes - * to be used as mixins. - */ - -template -struct default_multiplier { - // Not defined for an arbitrary type -}; - -template -struct default_increment { - // Not defined for an arbitrary type -}; - -#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ - template <> \ - struct what ## _ ## kind { \ - static constexpr type kind() { \ - return constant; \ - } \ - }; - -PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) -PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) - -PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) -PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) - -PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) -PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) - -PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) -PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) - -PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, - PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL)) -PCG_DEFINE_CONSTANT(pcg128_t, default, increment, - PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL)) - - -/* - * Each PCG generator is available in four variants, based on how it applies - * the additive constant for its underlying LCG; the variations are: - * - * single stream - all instances use the same fixed constant, thus - * the RNG always somewhere in same sequence - * mcg - adds zero, resulting in a single stream and reduced - * period - * specific stream - the constant can be changed at any time, selecting - * a different random sequence - * unique stream - the constant is based on the memory addresss of the - * object, thus every RNG has its own unique sequence - * - * This variation is provided though mixin classes which define a function - * value called increment() that returns the nesessary additive constant. - */ - - - -/* - * unique stream - */ - - -template -class unique_stream { -protected: - static constexpr bool is_mcg = false; - - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } - -public: - typedef itype state_type; - - constexpr itype increment() const { - return itype(reinterpret_cast(this) | 1); - } - - constexpr itype stream() const - { - return increment() >> 1; - } - - static constexpr bool can_specify_stream = false; - - static constexpr size_t streams_pow2() - { - return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) - : sizeof(size_t))*8 - 1u; - } - -protected: - constexpr unique_stream() = default; -}; - - -/* - * no stream (mcg) - */ - -template -class no_stream { -protected: - static constexpr bool is_mcg = true; - - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } - -public: - typedef itype state_type; - - static constexpr itype increment() { - return 0; - } - - static constexpr bool can_specify_stream = false; - - static constexpr size_t streams_pow2() - { - return 0u; - } - -protected: - constexpr no_stream() = default; -}; - - -/* - * single stream/sequence (oneseq) - */ - -template -class oneseq_stream : public default_increment { -protected: - static constexpr bool is_mcg = false; - - // Is never called, but is provided for symmetry with specific_stream - void set_stream(...) - { - abort(); - } - -public: - typedef itype state_type; - - static constexpr itype stream() - { - return default_increment::increment() >> 1; - } - - static constexpr bool can_specify_stream = false; - - static constexpr size_t streams_pow2() - { - return 0u; - } - -protected: - constexpr oneseq_stream() = default; -}; - - -/* - * specific stream - */ - -template -class specific_stream { -protected: - static constexpr bool is_mcg = false; - - itype inc_ = default_increment::increment(); - -public: - typedef itype state_type; - typedef itype stream_state; - - constexpr itype increment() const { - return inc_; - } - - itype stream() - { - return inc_ >> 1; - } - - void set_stream(itype specific_seq) - { - inc_ = (specific_seq << 1) | 1; - } - - static constexpr bool can_specify_stream = true; - - static constexpr size_t streams_pow2() - { - return (sizeof(itype)*8) - 1u; - } - -protected: - specific_stream() = default; - - specific_stream(itype specific_seq) - : inc_((specific_seq << 1) | itype(1U)) - { - // Nothing (else) to do. - } -}; - - -/* - * This is where it all comes together. This function joins together three - * mixin classes which define - * - the LCG additive constant (the stream) - * - the LCG multiplier - * - the output function - * in addition, we specify the type of the LCG state, and the result type, - * and whether to use the pre-advance version of the state for the output - * (increasing instruction-level parallelism) or the post-advance version - * (reducing register pressure). - * - * Given the high level of parameterization, the code has to use some - * template-metaprogramming tricks to handle some of the suble variations - * involved. - */ - -template , - typename multiplier_mixin = default_multiplier > -class engine : protected output_mixin, - public stream_mixin, - protected multiplier_mixin { -protected: - itype state_; - - struct can_specify_stream_tag {}; - struct no_specifiable_stream_tag {}; - - using stream_mixin::increment; - using multiplier_mixin::multiplier; - -public: - typedef xtype result_type; - typedef itype state_type; - - static constexpr size_t period_pow2() - { - return sizeof(state_type)*8 - 2*stream_mixin::is_mcg; - } - - // It would be nice to use std::numeric_limits for these, but - // we can't be sure that it'd be defined for the 128-bit types. - - static constexpr result_type min() - { - return result_type(0UL); - } - - static constexpr result_type max() - { - return ~result_type(0UL); - } - -protected: - itype bump(itype state) - { - return state * multiplier() + increment(); - } - - itype base_generate() - { - return state_ = bump(state_); - } - - itype base_generate0() - { - itype old_state = state_; - state_ = bump(state_); - return old_state; - } - -public: - result_type operator()() - { - if (output_previous) - return this->output(base_generate0()); - else - return this->output(base_generate()); - } - - result_type operator()(result_type upper_bound) - { - return bounded_rand(*this, upper_bound); - } - -protected: - static itype advance(itype state, itype delta, - itype cur_mult, itype cur_plus); - - static itype distance(itype cur_state, itype newstate, itype cur_mult, - itype cur_plus, itype mask = ~itype(0U)); - - itype distance(itype newstate, itype mask = ~itype(0U)) const - { - return distance(state_, newstate, multiplier(), increment(), mask); - } - -public: - void advance(itype delta) - { - state_ = advance(state_, delta, this->multiplier(), this->increment()); - } - - void backstep(itype delta) - { - advance(-delta); - } - - void discard(itype delta) - { - advance(delta); - } - - bool wrapped() - { - if (stream_mixin::is_mcg) { - // For MCGs, the low order two bits never change. In this - // implementation, we keep them fixed at 3 to make this test - // easier. - return state_ == 3; - } else { - return state_ == 0; - } - } - - engine(itype state = itype(0xcafef00dd15ea5e5ULL)) - : state_(this->is_mcg ? state|state_type(3U) - : bump(state + this->increment())) - { - // Nothing else to do. - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - engine(itype state, typename sm::stream_state stream_seed) - : stream_mixin(stream_seed), - state_(this->is_mcg ? state|state_type(3U) - : bump(state + this->increment())) - { - // Nothing else to do. - } - - template - engine(SeedSeq&& seedSeq, typename std::enable_if< - !stream_mixin::can_specify_stream - && !std::is_convertible::value - && !std::is_convertible::value, - no_specifiable_stream_tag>::type = {}) - : engine(generate_one(std::forward(seedSeq))) - { - // Nothing else to do. - } - - template - engine(SeedSeq&& seedSeq, typename std::enable_if< - stream_mixin::can_specify_stream - && !std::is_convertible::value - && !std::is_convertible::value, - can_specify_stream_tag>::type = {}) - : engine(generate_one(seedSeq), - generate_one(seedSeq)) - { - // Nothing else to do. - } - - - template - void seed(Args&&... args) - { - new (this) engine(std::forward(args)...); - } - - template - friend bool operator==(const engine&, - const engine&); - - template - friend itype1 operator-(const engine&, - const engine&); - - template - friend std::basic_ostream& - operator<<(std::basic_ostream& out, - const engine&); - - template - friend std::basic_istream& - operator>>(std::basic_istream& in, - engine& rng); -}; - -template -std::basic_ostream& -operator<<(std::basic_ostream& out, - const engine& rng) -{ - auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); - auto space = out.widen(' '); - auto orig_fill = out.fill(); - - out << rng.multiplier() << space - << rng.increment() << space - << rng.state_; - - out.flags(orig_flags); - out.fill(orig_fill); - return out; -} - - -template -std::basic_istream& -operator>>(std::basic_istream& in, - engine& rng) -{ - auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); - - itype multiplier, increment, state; - in >> multiplier >> increment >> state; - - if (!in.fail()) { - bool good = true; - if (multiplier != rng.multiplier()) { - good = false; - } else if (rng.can_specify_stream) { - rng.set_stream(increment >> 1); - } else if (increment != rng.increment()) { - good = false; - } - if (good) { - rng.state_ = state; - } else { - in.clear(std::ios::failbit); - } - } - - in.flags(orig_flags); - return in; -} - - -template -itype engine::advance( - itype state, itype delta, itype cur_mult, itype cur_plus) -{ - // The method used here is based on Brown, "Random Number Generation - // with Arbitrary Stride,", Transactions of the American Nuclear - // Society (Nov. 1994). The algorithm is very similar to fast - // exponentiation. - // - // Even though delta is an unsigned integer, we can pass a - // signed integer to go backwards, it just goes "the long way round". - - constexpr itype ZERO = 0u; // itype may be a non-trivial types, so - constexpr itype ONE = 1u; // we define some ugly constants. - itype acc_mult = 1; - itype acc_plus = 0; - while (delta > ZERO) { - if (delta & ONE) { - acc_mult *= cur_mult; - acc_plus = acc_plus*cur_mult + cur_plus; - } - cur_plus = (cur_mult+ONE)*cur_plus; - cur_mult *= cur_mult; - delta >>= 1; - } - return acc_mult * state + acc_plus; -} - -template -itype engine::distance( - itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) -{ - constexpr itype ONE = 1u; // itype could be weird, so use constant - itype the_bit = stream_mixin::is_mcg ? itype(4u) : itype(1u); - itype distance = 0u; - while ((cur_state & mask) != (newstate & mask)) { - if ((cur_state & the_bit) != (newstate & the_bit)) { - cur_state = cur_state * cur_mult + cur_plus; - distance |= the_bit; - } - assert((cur_state & the_bit) == (newstate & the_bit)); - the_bit <<= 1; - cur_plus = (cur_mult+ONE)*cur_plus; - cur_mult *= cur_mult; - } - return stream_mixin::is_mcg ? distance >> 2 : distance; -} - -template -itype operator-(const engine& lhs, - const engine& rhs) -{ - if (lhs.multiplier() != rhs.multiplier() - || lhs.increment() != rhs.increment()) - throw std::logic_error("incomparable generators"); - return rhs.distance(lhs.state_); -} - - -template -bool operator==(const engine& lhs, - const engine& rhs) -{ - return (lhs.multiplier() == rhs.multiplier()) - && (lhs.increment() == rhs.increment()) - && (lhs.state_ == rhs.state_); -} - -template -inline bool operator!=(const engine& lhs, - const engine& rhs) -{ - return !operator==(lhs,rhs); -} - - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8)> -using oneseq_base = engine, output_previous, - oneseq_stream >; - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8)> -using unique_base = engine, output_previous, - unique_stream >; - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8)> -using setseq_base = engine, output_previous, - specific_stream >; - -template class output_mixin, - bool output_previous = (sizeof(itype) <= 8)> -using mcg_base = engine, output_previous, - no_stream >; - -/* - * OUTPUT FUNCTIONS. - * - * These are the core of the PCG generation scheme. They specify how to - * turn the base LCG's internal state into the output value of the final - * generator. - * - * They're implemented as mixin classes. - * - * All of the classes have code that is written to allow it to be applied - * at *arbitrary* bit sizes, although in practice they'll only be used at - * standard sizes supported by C++. - */ - -/* - * XSH RS -- high xorshift, followed by a random shift - * - * Fast. A good performer. - */ - -template -struct xsh_rs_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t opbits = - sparebits-5 >= 64 ? 5 - : sparebits-4 >= 32 ? 4 - : sparebits-3 >= 16 ? 3 - : sparebits-2 >= 4 ? 2 - : sparebits-1 >= 1 ? 1 - : 0; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t maxrandshift = mask; - constexpr bitcount_t topspare = opbits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/2; - bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> xshift; - xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); - return result; - } -}; - -/* - * XSH RR -- high xorshift, followed by a random rotate - * - * Fast. A good performer. Slightly better statistically than XSH RS. - */ - -template -struct xsh_rr_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t wantedopbits = - xtypebits >= 128 ? 7 - : xtypebits >= 64 ? 6 - : xtypebits >= 32 ? 5 - : xtypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = - sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = opbits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits)/2; - bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask - : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - xtype result = xtype(internal >> bottomspare); - result = rotr(result, amprot); - return result; - } -}; - -/* - * RXS -- random xorshift - */ - -template -struct rxs_mixin { -static xtype output_rxs(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t extrashift = (xtypebits - shift)/2; - bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63 - : shift > 32+4 ? (internal >> (bits - 5)) & 31 - : shift > 16+2 ? (internal >> (bits - 4)) & 15 - : shift > 8+1 ? (internal >> (bits - 3)) & 7 - : shift > 4+1 ? (internal >> (bits - 2)) & 3 - : shift > 2+1 ? (internal >> (bits - 1)) & 1 - : 0; - internal ^= internal >> (shift + extrashift - rshift); - xtype result = internal >> rshift; - return result; - } -}; - -/* - * RXS M XS -- random xorshift, mcg multiply, fixed xorshift - * - * The most statistically powerful generator, but all those steps - * make it slower than some of the others. We give it the rottenest jobs. - * - * Because it's usually used in contexts where the state type and the - * result type are the same, it is a permutation and is thus invertable. - * We thus provide a function to invert it. This function is used to - * for the "inside out" generator used by the extended generator. - */ - -/* Defined type-based concepts for the multiplication step. They're actually - * all derived by truncating the 128-bit, which was computed to be a good - * "universal" constant. - */ - -template -struct mcg_multiplier { - // Not defined for an arbitrary type -}; - -template -struct mcg_unmultiplier { - // Not defined for an arbitrary type -}; - -PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) -PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) - -PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) -PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) - -PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) -PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) - -PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) -PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) - -PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, - PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) -PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, - PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) - - -template -struct rxs_m_xs_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = xtypebits >= 128 ? 6 - : xtypebits >= 64 ? 5 - : xtypebits >= 32 ? 4 - : xtypebits >= 16 ? 3 - : 2; - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> (opbits + rshift); - internal *= mcg_multiplier::multiplier(); - xtype result = internal >> shift; - result ^= result >> ((2U*xtypebits+2U)/3U); - return result; - } - - static itype unoutput(itype internal) - { - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = bits >= 128 ? 6 - : bits >= 64 ? 5 - : bits >= 32 ? 4 - : bits >= 16 ? 3 - : 2; - constexpr bitcount_t mask = (1 << opbits) - 1; - - internal = unxorshift(internal, bits, (2U*bits+2U)/3U); - - internal *= mcg_unmultiplier::unmultiplier(); - - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; - internal = unxorshift(internal, bits, opbits + rshift); - - return internal; - } -}; - - -/* - * RXS M -- random xorshift, mcg multiply - */ - -template -struct rxs_m_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t opbits = xtypebits >= 128 ? 6 - : xtypebits >= 64 ? 5 - : xtypebits >= 32 ? 4 - : xtypebits >= 16 ? 3 - : 2; - constexpr bitcount_t shift = bits - xtypebits; - constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; - internal ^= internal >> (opbits + rshift); - internal *= mcg_multiplier::multiplier(); - xtype result = internal >> shift; - return result; - } -}; - -/* - * XSL RR -- fixed xorshift (to low bits), random rotate - * - * Useful for 128-bit types that are split across two CPU registers. - */ - -template -struct xsl_rr_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 - : xtypebits >= 64 ? 6 - : xtypebits >= 32 ? 5 - : xtypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - bitcount_t rot = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - xtype result = xtype(internal >> bottomspare); - result = rotr(result, amprot); - return result; - } -}; - - -/* - * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) - * - * Useful for 128-bit types that are split across two CPU registers. - * If you really want an invertable 128-bit RNG, I guess this is the one. - */ - -template struct halfsize_trait {}; -template <> struct halfsize_trait { typedef uint64_t type; }; -template <> struct halfsize_trait { typedef uint32_t type; }; -template <> struct halfsize_trait { typedef uint16_t type; }; -template <> struct halfsize_trait { typedef uint8_t type; }; - -template -struct xsl_rr_rr_mixin { - typedef typename halfsize_trait::type htype; - - static itype output(itype internal) - { - constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - htypebits; - constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 - : htypebits >= 64 ? 6 - : htypebits >= 32 ? 5 - : htypebits >= 16 ? 4 - : 3; - constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits - : sparebits; - constexpr bitcount_t amplifier = wantedopbits - opbits; - constexpr bitcount_t mask = (1 << opbits) - 1; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t xshift = (topspare + htypebits) / 2; - - bitcount_t rot = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; - bitcount_t amprot = (rot << amplifier) & mask; - internal ^= internal >> xshift; - htype lowbits = htype(internal); - lowbits = rotr(lowbits, amprot); - htype highbits = htype(internal >> topspare); - bitcount_t rot2 = lowbits & mask; - bitcount_t amprot2 = (rot2 << amplifier) & mask; - highbits = rotr(highbits, amprot2); - return (itype(highbits) << topspare) ^ itype(lowbits); - } -}; - - -/* - * XSH -- fixed xorshift (to high bits) - * - * You shouldn't use this at 64-bits or less. - */ - -template -struct xsh_mixin { - static xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t topspare = 0; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - internal ^= internal >> xshift; - xtype result = internal >> bottomspare; - return result; - } -}; - -/* - * XSL -- fixed xorshift (to low bits) - * - * You shouldn't use this at 64-bits or less. - */ - -template -struct xsl_mixin { - inline xtype output(itype internal) - { - constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); - constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); - constexpr bitcount_t sparebits = bits - xtypebits; - constexpr bitcount_t topspare = sparebits; - constexpr bitcount_t bottomspare = sparebits - topspare; - constexpr bitcount_t xshift = (topspare + xtypebits) / 2; - - internal ^= internal >> xshift; - xtype result = internal >> bottomspare; - return result; - } -}; - -/* ---- End of Output Functions ---- */ - - -template -struct inside_out : private baseclass { - inside_out() = delete; - - typedef typename baseclass::result_type result_type; - typedef typename baseclass::state_type state_type; - static_assert(sizeof(result_type) == sizeof(state_type), - "Require a RNG whose output function is a permutation"); - - static bool external_step(result_type& randval, size_t i) - { - state_type state = baseclass::unoutput(randval); - state = state * baseclass::multiplier() + baseclass::increment() - + state_type(i*2); - result_type result = baseclass::output(state); - randval = result; - state_type zero = - baseclass::is_mcg ? state & state_type(3U) : state_type(0U); - return result == zero; - } - - static bool external_advance(result_type& randval, size_t i, - result_type delta, bool forwards = true) - { - state_type state = baseclass::unoutput(randval); - state_type mult = baseclass::multiplier(); - state_type inc = baseclass::increment() + state_type(i*2); - state_type zero = - baseclass::is_mcg ? state & state_type(3U) : state_type(0U); - state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); - bool crosses_zero = - forwards ? dist_to_zero <= delta - : (-dist_to_zero) <= delta; - if (!forwards) - delta = -delta; - state = baseclass::advance(state, delta, mult, inc); - randval = baseclass::output(state); - return crosses_zero; - } -}; - - -template -class extended : public baseclass { -public: - typedef typename baseclass::state_type state_type; - typedef typename baseclass::result_type result_type; - typedef inside_out insideout; - -private: - static constexpr bitcount_t rtypebits = sizeof(result_type)*8; - static constexpr bitcount_t stypebits = sizeof(state_type)*8; - - static constexpr bitcount_t tick_limit_pow2 = 64U; - - static constexpr size_t table_size = 1UL << table_pow2; - static constexpr size_t table_shift = stypebits - table_pow2; - static constexpr state_type table_mask = - (state_type(1U) << table_pow2) - state_type(1U); - - static constexpr bool may_tick = - (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); - static constexpr size_t tick_shift = stypebits - advance_pow2; - static constexpr state_type tick_mask = - may_tick ? state_type( - (uint64_t(1) << (advance_pow2*may_tick)) - 1) - // ^-- stupidity to appease GCC warnings - : ~state_type(0U); - - static constexpr bool may_tock = stypebits < tick_limit_pow2; - - result_type data_[table_size]; - - PCG_NOINLINE void advance_table(); - - PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); - - result_type& get_extended_value() - { - state_type state = this->state_; - if (kdd && baseclass::is_mcg) { - // The low order bits of an MCG are constant, so drop them. - state >>= 2; - } - size_t index = kdd ? state & table_mask - : state >> table_shift; - - if (may_tick) { - bool tick = kdd ? (state & tick_mask) == state_type(0u) - : (state >> tick_shift) == state_type(0u); - if (tick) - advance_table(); - } - if (may_tock) { - bool tock = state == state_type(0u); - if (tock) - advance_table(); - } - return data_[index]; - } - -public: - static constexpr size_t period_pow2() - { - return baseclass::period_pow2() + table_size*extvalclass::period_pow2(); - } - - __attribute__((always_inline)) result_type operator()() - { - result_type rhs = get_extended_value(); - result_type lhs = this->baseclass::operator()(); - return lhs ^ rhs; - } - - result_type operator()(result_type upper_bound) - { - return bounded_rand(*this, upper_bound); - } - - void set(result_type wanted) - { - result_type& rhs = get_extended_value(); - result_type lhs = this->baseclass::operator()(); - rhs = lhs ^ wanted; - } - - void advance(state_type distance, bool forwards = true); - - void backstep(state_type distance) - { - advance(distance, false); - } - - extended(const result_type* data) - : baseclass() - { - datainit(data); - } - - extended(const result_type* data, state_type seed) - : baseclass(seed) - { - datainit(data); - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - extended(const result_type* data, state_type seed, - typename bc::stream_state stream_seed) - : baseclass(seed, stream_seed) - { - datainit(data); - } - - extended() - : baseclass() - { - selfinit(); - } - - extended(state_type seed) - : baseclass(seed) - { - selfinit(); - } - - // This function may or may not exist. It thus has to be a template - // to use SFINAE; users don't have to worry about its template-ness. - - template - extended(state_type seed, typename bc::stream_state stream_seed) - : baseclass(seed, stream_seed) - { - selfinit(); - } - -private: - void selfinit(); - void datainit(const result_type* data); - -public: - - template::value - && !std::is_convertible::value>::type> - extended(SeedSeq&& seedSeq) - : baseclass(seedSeq) - { - generate_to(seedSeq, data_); - } - - template - void seed(Args&&... args) - { - new (this) extended(std::forward(args)...); - } - - template - friend bool operator==(const extended&, - const extended&); - - template - friend std::basic_ostream& - operator<<(std::basic_ostream& out, - const extended&); - - template - friend std::basic_istream& - operator>>(std::basic_istream& in, - extended&); - -}; - - -template -void extended::datainit( - const result_type* data) -{ - for (size_t i = 0; i < table_size; ++i) - data_[i] = data[i]; -} - -template -void extended::selfinit() -{ - // We need to fill the extended table with something, and we have - // very little provided data, so we use the base generator to - // produce values. Although not ideal (use a seed sequence, folks!), - // unexpected correlations are mitigated by - // - using XOR differences rather than the number directly - // - the way the table is accessed, its values *won't* be accessed - // in the same order the were written. - // - any strange correlations would only be apparent if we - // were to backstep the generator so that the base generator - // was generating the same values again - result_type xdiff = baseclass::operator()() - baseclass::operator()(); - for (size_t i = 0; i < table_size; ++i) { - data_[i] = baseclass::operator()() ^ xdiff; - } -} - -template -bool operator==(const extended& lhs, - const extended& rhs) -{ - auto& base_lhs = static_cast(lhs); - auto& base_rhs = static_cast(rhs); - return base_lhs == base_rhs - && !memcmp((void*) lhs.data_, (void*) rhs.data_, sizeof(lhs.data_)); -} - -template -inline bool operator!=(const extended& lhs, - const extended& rhs) -{ - return lhs != rhs; -} - -template -std::basic_ostream& -operator<<(std::basic_ostream& out, - const extended& rng) -{ - auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); - auto space = out.widen(' '); - auto orig_fill = out.fill(); - - out << rng.multiplier() << space - << rng.increment() << space - << rng.state_; - - for (const auto& datum : rng.data_) - out << space << datum; - - out.flags(orig_flags); - out.fill(orig_fill); - return out; -} - -template -std::basic_istream& -operator>>(std::basic_istream& in, - extended& rng) -{ - extended new_rng; - auto& base_rng = static_cast(new_rng); - in >> base_rng; - - if (in.fail()) - return in; - - auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); - - for (auto& datum : new_rng.data_) { - in >> datum; - if (in.fail()) - goto bail; - } - - rng = new_rng; - -bail: - in.flags(orig_flags); - return in; -} - - - -template -void -extended::advance_table() -{ - bool carry = false; - for (size_t i = 0; i < table_size; ++i) { - if (carry) { - carry = insideout::external_step(data_[i],i+1); - } - bool carry2 = insideout::external_step(data_[i],i+1); - carry = carry || carry2; - } -} - -template -void -extended::advance_table( - state_type delta, bool isForwards) -{ - typedef typename baseclass::state_type base_state_t; - typedef typename extvalclass::state_type ext_state_t; - constexpr bitcount_t basebits = sizeof(base_state_t)*8; - constexpr bitcount_t extbits = sizeof(ext_state_t)*8; - static_assert(basebits <= extbits || advance_pow2 > 0, - "Current implementation might overflow its carry"); - - base_state_t carry = 0; - for (size_t i = 0; i < table_size; ++i) { - base_state_t total_delta = carry + delta; - ext_state_t trunc_delta = ext_state_t(total_delta); - if (basebits > extbits) { - carry = total_delta >> extbits; - } else { - carry = 0; - } - carry += - insideout::external_advance(data_[i],i+1, trunc_delta, isForwards); - } -} - -template -void extended::advance( - state_type distance, bool forwards) -{ - static_assert(kdd, - "Efficient advance is too hard for non-kdd extension. " - "For a weak advance, cast to base class"); - state_type zero = - baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); - if (may_tick) { - state_type ticks = distance >> (advance_pow2*may_tick); - // ^-- stupidity to appease GCC - // warnings - state_type adv_mask = - baseclass::is_mcg ? tick_mask << 2 : tick_mask; - state_type next_advance_distance = this->distance(zero, adv_mask); - if (!forwards) - next_advance_distance = (-next_advance_distance) & tick_mask; - if (next_advance_distance < (distance & tick_mask)) { - ++ticks; - } - if (ticks) - advance_table(ticks, forwards); - } - if (forwards) { - if (may_tock && this->distance(zero) <= distance) - advance_table(); - baseclass::advance(distance); - } else { - if (may_tock && -(this->distance(zero)) <= distance) - advance_table(state_type(1U), false); - baseclass::advance(-distance); - } -} - -} // namespace pcg_detail - -namespace pcg_engines { - -using namespace pcg_detail; - -/* Predefined types for XSH RS */ - -typedef oneseq_base oneseq_xsh_rs_16_8; -typedef oneseq_base oneseq_xsh_rs_32_16; -typedef oneseq_base oneseq_xsh_rs_64_32; -typedef oneseq_base oneseq_xsh_rs_128_64; - -typedef unique_base unique_xsh_rs_16_8; -typedef unique_base unique_xsh_rs_32_16; -typedef unique_base unique_xsh_rs_64_32; -typedef unique_base unique_xsh_rs_128_64; - -typedef setseq_base setseq_xsh_rs_16_8; -typedef setseq_base setseq_xsh_rs_32_16; -typedef setseq_base setseq_xsh_rs_64_32; -typedef setseq_base setseq_xsh_rs_128_64; - -typedef mcg_base mcg_xsh_rs_16_8; -typedef mcg_base mcg_xsh_rs_32_16; -typedef mcg_base mcg_xsh_rs_64_32; -typedef mcg_base mcg_xsh_rs_128_64; - -/* Predefined types for XSH RR */ - -typedef oneseq_base oneseq_xsh_rr_16_8; -typedef oneseq_base oneseq_xsh_rr_32_16; -typedef oneseq_base oneseq_xsh_rr_64_32; -typedef oneseq_base oneseq_xsh_rr_128_64; - -typedef unique_base unique_xsh_rr_16_8; -typedef unique_base unique_xsh_rr_32_16; -typedef unique_base unique_xsh_rr_64_32; -typedef unique_base unique_xsh_rr_128_64; - -typedef setseq_base setseq_xsh_rr_16_8; -typedef setseq_base setseq_xsh_rr_32_16; -typedef setseq_base setseq_xsh_rr_64_32; -typedef setseq_base setseq_xsh_rr_128_64; - -typedef mcg_base mcg_xsh_rr_16_8; -typedef mcg_base mcg_xsh_rr_32_16; -typedef mcg_base mcg_xsh_rr_64_32; -typedef mcg_base mcg_xsh_rr_128_64; - - -/* Predefined types for RXS M XS */ - -typedef oneseq_base oneseq_rxs_m_xs_8_8; -typedef oneseq_base oneseq_rxs_m_xs_16_16; -typedef oneseq_base oneseq_rxs_m_xs_32_32; -typedef oneseq_base oneseq_rxs_m_xs_64_64; -typedef oneseq_base oneseq_rxs_m_xs_128_128; - -typedef unique_base unique_rxs_m_xs_8_8; -typedef unique_base unique_rxs_m_xs_16_16; -typedef unique_base unique_rxs_m_xs_32_32; -typedef unique_base unique_rxs_m_xs_64_64; -typedef unique_base unique_rxs_m_xs_128_128; - -typedef setseq_base setseq_rxs_m_xs_8_8; -typedef setseq_base setseq_rxs_m_xs_16_16; -typedef setseq_base setseq_rxs_m_xs_32_32; -typedef setseq_base setseq_rxs_m_xs_64_64; -typedef setseq_base setseq_rxs_m_xs_128_128; - - // MCG versions don't make sense here, so aren't defined. - -/* Predefined types for XSL RR (only defined for "large" types) */ - -typedef oneseq_base oneseq_xsl_rr_64_32; -typedef oneseq_base oneseq_xsl_rr_128_64; - -typedef unique_base unique_xsl_rr_64_32; -typedef unique_base unique_xsl_rr_128_64; - -typedef setseq_base setseq_xsl_rr_64_32; -typedef setseq_base setseq_xsl_rr_128_64; - -typedef mcg_base mcg_xsl_rr_64_32; -typedef mcg_base mcg_xsl_rr_128_64; - - -/* Predefined types for XSL RR RR (only defined for "large" types) */ - -typedef oneseq_base - oneseq_xsl_rr_rr_64_64; -typedef oneseq_base - oneseq_xsl_rr_rr_128_128; - -typedef unique_base - unique_xsl_rr_rr_64_64; -typedef unique_base - unique_xsl_rr_rr_128_128; - -typedef setseq_base - setseq_xsl_rr_rr_64_64; -typedef setseq_base - setseq_xsl_rr_rr_128_128; - - // MCG versions don't make sense here, so aren't defined. - -/* Extended generators */ - -template -using ext_std8 = extended; - -template -using ext_std16 = extended; - -template -using ext_std32 = extended; - -template -using ext_std64 = extended; - - -template -using ext_oneseq_rxs_m_xs_32_32 = - ext_std32; - -template -using ext_mcg_xsh_rs_64_32 = - ext_std32; - -template -using ext_oneseq_xsh_rs_64_32 = - ext_std32; - -template -using ext_setseq_xsh_rr_64_32 = - ext_std32; - -template -using ext_mcg_xsl_rr_128_64 = - ext_std64; - -template -using ext_oneseq_xsl_rr_128_64 = - ext_std64; - -template -using ext_setseq_xsl_rr_128_64 = - ext_std64; - -} // namespace pcg_engines - -typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; -typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; -typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; -typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; - -typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; -typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; -typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; -typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; - -typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; -typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; -typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; - -typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; -typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; -typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; - - -// These two extended RNGs provide two-dimensionally equidistributed -// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, -// and can be called twice to generate 64 bits, but does not required -// 128-bit math; on 32-bit systems, it's faster than pcg64 as well. - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k2; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k2_fast; - -// These eight extended RNGs have about as much state as arc4random -// -// - the k variants are k-dimensionally equidistributed -// - the c variants offer better crypographic security -// -// (just how good the cryptographic security is is an open question) - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k64; -typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true> pcg32_k64_oneseq; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k64_fast; - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false> pcg32_c64; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false> pcg32_c64_oneseq; -typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false> pcg32_c64_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true> pcg64_k32; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true> pcg64_k32_oneseq; -typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true> pcg64_k32_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false> pcg64_c32; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false> pcg64_c32_oneseq; -typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false> pcg64_c32_fast; - -// These eight extended RNGs have more state than the Mersenne twister -// -// - the k variants are k-dimensionally equidistributed -// - the c variants offer better crypographic security -// -// (just how good the cryptographic security is is an open question) - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true> pcg32_k1024; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true> pcg32_k1024_fast; - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false> pcg32_c1024; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false> pcg32_c1024_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true> pcg64_k1024; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true> pcg64_k1024_fast; - -typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false> pcg64_c1024; -typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast; - -// These generators have an insanely huge period (2^524352), and is suitable -// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary -// point in the future. [Actually, over the full period of the generator, it -// will produce every 64 KB ZIP file 2^64 times!] - -typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true> pcg32_k16384; -typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast; - -#endif // PCG_RAND_HPP_INCLUDED diff --git a/external_tools/pcg/pcg_uint128.hpp b/external_tools/pcg/pcg_uint128.hpp deleted file mode 100644 index 99b20e780..000000000 --- a/external_tools/pcg/pcg_uint128.hpp +++ /dev/null @@ -1,750 +0,0 @@ -/* - * PCG Random Number Generation for C++ - * - * Copyright 2014 Melissa O'Neill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For additional information about the PCG random number generation scheme, - * including its license and other licensing options, visit - * - * http://www.pcg-random.org - */ - -/* - * This code provides a a C++ class that can provide 128-bit (or higher) - * integers. To produce 2K-bit integers, it uses two K-bit integers, - * placed in a union that allowes the code to also see them as four K/2 bit - * integers (and access them either directly name, or by index). - * - * It may seem like we're reinventing the wheel here, because several - * libraries already exist that support large integers, but most existing - * libraries provide a very generic multiprecision code, but here we're - * operating at a fixed size. Also, most other libraries are fairly - * heavyweight. So we use a direct implementation. Sadly, it's much slower - * than hand-coded assembly or direct CPU support. - */ - -#ifndef PCG_UINT128_HPP_INCLUDED -#define PCG_UINT128_HPP_INCLUDED 1 - -#include -#include -#include -#include -#include -#include -#include - -/* - * We want to lay the type out the same way that a native type would be laid - * out, which means we must know the machine's endian, at compile time. - * This ugliness attempts to do so. - */ - -#ifndef PCG_LITTLE_ENDIAN - #if defined(__BYTE_ORDER__) - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - #define PCG_LITTLE_ENDIAN 1 - #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - #define PCG_LITTLE_ENDIAN 0 - #else - #error __BYTE_ORDER__ does not match a standard endian, pick a side - #endif - #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN - #define PCG_LITTLE_ENDIAN 1 - #elif __BIG_ENDIAN__ || _BIG_ENDIAN - #define PCG_LITTLE_ENDIAN 0 - #elif __x86_64 || __x86_64__ || __i386 || __i386__ - #define PCG_LITTLE_ENDIAN 1 - #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \ - || __m68k__ || __mc68000__ - #define PCG_LITTLE_ENDIAN 0 - #else - #error Unable to determine target endianness - #endif -#endif - -namespace pcg_extras { - -// Recent versions of GCC have intrinsics we can use to quickly calculate -// the number of leading and trailing zeros in a number. If possible, we -// use them, otherwise we fall back to old-fashioned bit twiddling to figure -// them out. - -#ifndef PCG_BITCOUNT_T - typedef uint8_t bitcount_t; -#else - typedef PCG_BITCOUNT_T bitcount_t; -#endif - -/* - * Provide some useful helper functions - * * flog2 floor(log2(x)) - * * trailingzeros number of trailing zero bits - */ - -#ifdef __GNUC__ // Any GNU-compatible compiler supporting C++11 has - // some useful intrinsics we can use. - -inline bitcount_t flog2(uint32_t v) -{ - return 31 - __builtin_clz(v); -} - -inline bitcount_t trailingzeros(uint32_t v) -{ - return __builtin_ctz(v); -} - -inline bitcount_t flog2(uint64_t v) -{ -#if UINT64_MAX == ULONG_MAX - return 63 - __builtin_clzl(v); -#elif UINT64_MAX == ULLONG_MAX - return 63 - __builtin_clzll(v); -#else - #error Cannot find a function for uint64_t -#endif -} - -inline bitcount_t trailingzeros(uint64_t v) -{ -#if UINT64_MAX == ULONG_MAX - return __builtin_ctzl(v); -#elif UINT64_MAX == ULLONG_MAX - return __builtin_ctzll(v); -#else - #error Cannot find a function for uint64_t -#endif -} - -#else // Otherwise, we fall back to bit twiddling - // implementations - -inline bitcount_t flog2(uint32_t v) -{ - // Based on code by Eric Cole and Mark Dickinson, which appears at - // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn - - static const uint8_t multiplyDeBruijnBitPos[32] = { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 - }; - - v |= v >> 1; // first round down to one less than a power of 2 - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - - return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27]; -} - -inline bitcount_t trailingzeros(uint32_t v) -{ - static const uint8_t multiplyDeBruijnBitPos[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - - return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27]; -} - -inline bitcount_t flog2(uint64_t v) -{ - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - - return high ? 32+flog2(high) : flog2(low); -} - -inline bitcount_t trailingzeros(uint64_t v) -{ - uint32_t high = v >> 32; - uint32_t low = uint32_t(v); - - return low ? trailingzeros(low) : trailingzeros(high)+32; -} - -#endif - -template -inline bitcount_t clog2(UInt v) -{ - return flog2(v) + ((v & (-v)) != v); -} - -template -inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout) -{ - UInt half_result = y + carryin; - UInt result = x + half_result; - *carryout = (half_result < y) || (result < x); - return result; -} - -template -inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout) -{ - UInt half_result = y + carryin; - UInt result = x - half_result; - *carryout = (half_result < y) || (result > x); - return result; -} - - -template -class uint_x4 { -// private: -public: - union { -#if PCG_LITTLE_ENDIAN - struct { - UInt v0, v1, v2, v3; - } w; - struct { - UIntX2 v01, v23; - } d; -#else - struct { - UInt v3, v2, v1, v0; - } w; - struct { - UIntX2 v23, v01; - } d; -#endif - // For the array access versions, the code that uses the array - // must handle endian itself. Yuck. - UInt wa[4]; - UIntX2 da[2]; - }; - -public: - uint_x4() = default; - - constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0) -#if PCG_LITTLE_ENDIAN - : w{v0, v1, v2, v3} -#else - : w{v3, v2, v1, v0} -#endif - { - // Nothing (else) to do - } - - constexpr uint_x4(UIntX2 v23, UIntX2 v01) -#if PCG_LITTLE_ENDIAN - : d{v01,v23} -#else - : d{v23,v01} -#endif - { - // Nothing (else) to do - } - - template::value - && sizeof(Integral) <= sizeof(UIntX2)) - >::type* = nullptr> - constexpr uint_x4(Integral v01) -#if PCG_LITTLE_ENDIAN - : d{UIntX2(v01),0UL} -#else - : d{0UL,UIntX2(v01)} -#endif - { - // Nothing (else) to do - } - - explicit constexpr operator uint64_t() const - { - return d.v01; - } - - explicit constexpr operator uint32_t() const - { - return w.v0; - } - - explicit constexpr operator int() const - { - return w.v0; - } - - explicit constexpr operator uint16_t() const - { - return w.v0; - } - - explicit constexpr operator uint8_t() const - { - return w.v0; - } - - typedef typename std::conditional::value, - unsigned long long, - unsigned long>::type - uint_missing_t; - - explicit constexpr operator uint_missing_t() const - { - return d.v01; - } - - explicit constexpr operator bool() const - { - return d.v01 || d.v23; - } - - template - friend uint_x4 operator*(const uint_x4&, const uint_x4&); - - template - friend std::pair< uint_x4,uint_x4 > - divmod(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator+(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator-(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator<<(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator>>(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator&(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator|(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator^(const uint_x4&, const uint_x4&); - - template - friend bool operator==(const uint_x4&, const uint_x4&); - - template - friend bool operator!=(const uint_x4&, const uint_x4&); - - template - friend bool operator<(const uint_x4&, const uint_x4&); - - template - friend bool operator<=(const uint_x4&, const uint_x4&); - - template - friend bool operator>(const uint_x4&, const uint_x4&); - - template - friend bool operator>=(const uint_x4&, const uint_x4&); - - template - friend uint_x4 operator~(const uint_x4&); - - template - friend uint_x4 operator-(const uint_x4&); - - template - friend bitcount_t flog2(const uint_x4&); - - template - friend bitcount_t trailingzeros(const uint_x4&); - - uint_x4& operator*=(const uint_x4& rhs) - { - uint_x4 result = *this * rhs; - return *this = result; - } - - uint_x4& operator/=(const uint_x4& rhs) - { - uint_x4 result = *this / rhs; - return *this = result; - } - - uint_x4& operator%=(const uint_x4& rhs) - { - uint_x4 result = *this % rhs; - return *this = result; - } - - uint_x4& operator+=(const uint_x4& rhs) - { - uint_x4 result = *this + rhs; - return *this = result; - } - - uint_x4& operator-=(const uint_x4& rhs) - { - uint_x4 result = *this - rhs; - return *this = result; - } - - uint_x4& operator&=(const uint_x4& rhs) - { - uint_x4 result = *this & rhs; - return *this = result; - } - - uint_x4& operator|=(const uint_x4& rhs) - { - uint_x4 result = *this | rhs; - return *this = result; - } - - uint_x4& operator^=(const uint_x4& rhs) - { - uint_x4 result = *this ^ rhs; - return *this = result; - } - - uint_x4& operator>>=(bitcount_t shift) - { - uint_x4 result = *this >> shift; - return *this = result; - } - - uint_x4& operator<<=(bitcount_t shift) - { - uint_x4 result = *this << shift; - return *this = result; - } - -}; - -template -bitcount_t flog2(const uint_x4& v) -{ -#if PCG_LITTLE_ENDIAN - for (uint8_t i = 4; i !=0; /* dec in loop */) { - --i; -#else - for (uint8_t i = 0; i < 4; ++i) { -#endif - if (v.wa[i] == 0) - continue; - return flog2(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i; - } - abort(); -} - -template -bitcount_t trailingzeros(const uint_x4& v) -{ -#if PCG_LITTLE_ENDIAN - for (uint8_t i = 0; i < 4; ++i) { -#else - for (uint8_t i = 4; i !=0; /* dec in loop */) { - --i; -#endif - if (v.wa[i] != 0) - return trailingzeros(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i; - } - return (sizeof(U)*CHAR_BIT)*4; -} - -template -std::pair< uint_x4, uint_x4 > - divmod(const uint_x4& orig_dividend, - const uint_x4& divisor) -{ - // If the dividend is less than the divisor, the answer is always zero. - // This takes care of boundary cases like 0/x (which would otherwise be - // problematic because we can't take the log of zero. (The boundary case - // of division by zero is undefined.) - if (orig_dividend < divisor) - return { uint_x4(0UL), orig_dividend }; - - auto dividend = orig_dividend; - - auto log2_divisor = flog2(divisor); - auto log2_dividend = flog2(dividend); - // assert(log2_dividend >= log2_divisor); - bitcount_t logdiff = log2_dividend - log2_divisor; - - constexpr uint_x4 ONE(1UL); - if (logdiff == 0) - return { ONE, dividend - divisor }; - - // Now we change the log difference to - // floor(log2(divisor)) - ceil(log2(dividend)) - // to ensure that we *underestimate* the result. - logdiff -= 1; - - uint_x4 quotient(0UL); - - auto qfactor = ONE << logdiff; - auto factor = divisor << logdiff; - - do { - dividend -= factor; - quotient += qfactor; - while (dividend < factor) { - factor >>= 1; - qfactor >>= 1; - } - } while (dividend >= divisor); - - return { quotient, dividend }; -} - -template -uint_x4 operator/(const uint_x4& dividend, - const uint_x4& divisor) -{ - return divmod(dividend, divisor).first; -} - -template -uint_x4 operator%(const uint_x4& dividend, - const uint_x4& divisor) -{ - return divmod(dividend, divisor).second; -} - - -template -uint_x4 operator*(const uint_x4& a, - const uint_x4& b) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - bool carryin = false; - bool carryout; - UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0); - r.w.v0 = UInt(a0b0); - r.w.v1 = UInt(a0b0 >> 32); - - UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0); - r.w.v2 = UInt(a1b0 >> 32); - r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> 32), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - carryin = false; - r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout); - - UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1); - carryin = false; - r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> 32), carryin, &carryout); - - r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01; - - return r; -} - - -template -uint_x4 operator+(const uint_x4& a, - const uint_x4& b) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - - bool carryin = false; - bool carryout; - r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout); - carryin = carryout; - r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout); - carryin = carryout; - r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout); - carryin = carryout; - r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout); - - return r; -} - -template -uint_x4 operator-(const uint_x4& a, - const uint_x4& b) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - - bool carryin = false; - bool carryout; - r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout); - carryin = carryout; - r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout); - carryin = carryout; - r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout); - carryin = carryout; - r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout); - - return r; -} - - -template -uint_x4 operator&(const uint_x4& a, - const uint_x4& b) -{ - return uint_x4(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01); -} - -template -uint_x4 operator|(const uint_x4& a, - const uint_x4& b) -{ - return uint_x4(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01); -} - -template -uint_x4 operator^(const uint_x4& a, - const uint_x4& b) -{ - return uint_x4(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01); -} - -template -uint_x4 operator~(const uint_x4& v) -{ - return uint_x4(~v.d.v23, ~v.d.v01); -} - -template -uint_x4 operator-(const uint_x4& v) -{ - return uint_x4(0UL,0UL) - v; -} - -template -bool operator==(const uint_x4& a, const uint_x4& b) -{ - return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23); -} - -template -bool operator!=(const uint_x4& a, const uint_x4& b) -{ - return !operator==(a,b); -} - - -template -bool operator<(const uint_x4& a, const uint_x4& b) -{ - return (a.d.v23 < b.d.v23) - || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01)); -} - -template -bool operator>(const uint_x4& a, const uint_x4& b) -{ - return operator<(b,a); -} - -template -bool operator<=(const uint_x4& a, const uint_x4& b) -{ - return !(operator<(b,a)); -} - -template -bool operator>=(const uint_x4& a, const uint_x4& b) -{ - return !(operator<(a,b)); -} - - - -template -uint_x4 operator<<(const uint_x4& v, - const bitcount_t shift) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - const bitcount_t bits = sizeof(UInt) * CHAR_BIT; - const bitcount_t bitmask = bits - 1; - const bitcount_t shiftdiv = shift / bits; - const bitcount_t shiftmod = shift & bitmask; - - if (shiftmod) { - UInt carryover = 0; -#if PCG_LITTLE_ENDIAN - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#else - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#endif - r.wa[out] = (v.wa[in] << shiftmod) | carryover; - carryover = (v.wa[in] >> (bits - shiftmod)); - } - } else { -#if PCG_LITTLE_ENDIAN - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#else - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#endif - r.wa[out] = v.wa[in]; - } - } - - return r; -} - -template -uint_x4 operator>>(const uint_x4& v, - const bitcount_t shift) -{ - uint_x4 r = {0U, 0U, 0U, 0U}; - const bitcount_t bits = sizeof(UInt) * CHAR_BIT; - const bitcount_t bitmask = bits - 1; - const bitcount_t shiftdiv = shift / bits; - const bitcount_t shiftmod = shift & bitmask; - - if (shiftmod) { - UInt carryover = 0; -#if PCG_LITTLE_ENDIAN - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#else - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#endif - r.wa[out] = (v.wa[in] >> shiftmod) | carryover; - carryover = (v.wa[in] << (bits - shiftmod)); - } - } else { -#if PCG_LITTLE_ENDIAN - for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) { - --out, --in; -#else - for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) { -#endif - r.wa[out] = v.wa[in]; - } - } - - return r; -} - -} // namespace pcg_extras - -#endif // PCG_UINT128_HPP_INCLUDED diff --git a/mt-kahypar/datastructures/array.h b/mt-kahypar/datastructures/array.h index 92a9c1da9..d9d5c784b 100644 --- a/mt-kahypar/datastructures/array.h +++ b/mt-kahypar/datastructures/array.h @@ -31,9 +31,9 @@ #include #include -#include "tbb/parallel_for.h" -#include "tbb/scalable_allocator.h" -#include "tbb//parallel_invoke.h" +#include +#include +#include #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/memory_pool.h" diff --git a/mt-kahypar/datastructures/bitset.h b/mt-kahypar/datastructures/bitset.h index 7a27762e3..2cafbff65 100644 --- a/mt-kahypar/datastructures/bitset.h +++ b/mt-kahypar/datastructures/bitset.h @@ -31,6 +31,7 @@ #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/utils/bit_ops.h" namespace mt_kahypar { namespace ds { diff --git a/mt-kahypar/datastructures/concurrent_bucket_map.h b/mt-kahypar/datastructures/concurrent_bucket_map.h index 1cd2b41d7..792573121 100644 --- a/mt-kahypar/datastructures/concurrent_bucket_map.h +++ b/mt-kahypar/datastructures/concurrent_bucket_map.h @@ -31,8 +31,8 @@ #include #include -#include "tbb/task_arena.h" -#include "tbb/task_group.h" +#include +#include #include "kahypar-resources/meta/mandatory.h" @@ -81,7 +81,7 @@ class ConcurrentBucketMap { _num_buckets(other._num_buckets), _mod_mask(_num_buckets - 1), _spin_locks(_num_buckets), - _buckets(std::move(other._buffer)) { } + _buckets(std::move(other._buckets)) { } template void doParallelForAllBuckets(const F& f) { diff --git a/mt-kahypar/datastructures/connectivity_info.h b/mt-kahypar/datastructures/connectivity_info.h index 622e94f7c..c7bd2d7bd 100644 --- a/mt-kahypar/datastructures/connectivity_info.h +++ b/mt-kahypar/datastructures/connectivity_info.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/parallel_invoke.h" +#include #include "mt-kahypar/datastructures/pin_count_in_part.h" #include "mt-kahypar/datastructures/connectivity_set.h" diff --git a/mt-kahypar/datastructures/connectivity_set.h b/mt-kahypar/datastructures/connectivity_set.h index 3f24e5b5d..b5e4a2889 100644 --- a/mt-kahypar/datastructures/connectivity_set.h +++ b/mt-kahypar/datastructures/connectivity_set.h @@ -32,7 +32,7 @@ #include #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/datastructures/array.h" @@ -87,7 +87,9 @@ class ConnectivitySets { _shallow_copy_bitset() { if ( num_hyperedges > 0 ) { _bits.resize("Refinement", "connectivity_set", - static_cast(num_hyperedges) * _num_blocks_per_hyperedge, true, assign_parallel); + static_cast(num_hyperedges) * _num_blocks_per_hyperedge + + 1 /* The nextBlockID() implementation performs a (masked out) load past the end */ + , true, assign_parallel); } } diff --git a/mt-kahypar/datastructures/delta_connectivity_set.h b/mt-kahypar/datastructures/delta_connectivity_set.h index 27f2077c9..0216c1719 100644 --- a/mt-kahypar/datastructures/delta_connectivity_set.h +++ b/mt-kahypar/datastructures/delta_connectivity_set.h @@ -32,7 +32,7 @@ #include #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/datastructures/array.h" diff --git a/mt-kahypar/datastructures/delta_partitioned_graph.h b/mt-kahypar/datastructures/delta_partitioned_graph.h index 43e11c484..9c1f5e5bf 100644 --- a/mt-kahypar/datastructures/delta_partitioned_graph.h +++ b/mt-kahypar/datastructures/delta_partitioned_graph.h @@ -74,7 +74,6 @@ class DeltaPartitionedGraph { _part_weights_delta(context.partition.k, 0), _part_ids_delta(), _dummy_connectivity_set() { - const bool top_level = context.type == ContextType::main; _part_ids_delta.initialize(MAP_SIZE_SMALL); } diff --git a/mt-kahypar/datastructures/delta_partitioned_hypergraph.h b/mt-kahypar/datastructures/delta_partitioned_hypergraph.h index 9eb52ab3d..8c7149e3b 100644 --- a/mt-kahypar/datastructures/delta_partitioned_hypergraph.h +++ b/mt-kahypar/datastructures/delta_partitioned_hypergraph.h @@ -83,7 +83,6 @@ class DeltaPartitionedHypergraph { _part_ids_delta(), _pins_in_part_delta(), _connectivity_set_delta(context.partition.k) { - const bool top_level = context.type == ContextType::main; _part_ids_delta.initialize(MAP_SIZE_SMALL); _pins_in_part_delta.initialize(MAP_SIZE_LARGE); } diff --git a/mt-kahypar/datastructures/dynamic_adjacency_array.h b/mt-kahypar/datastructures/dynamic_adjacency_array.h index 61ea71b6f..09a5d3668 100644 --- a/mt-kahypar/datastructures/dynamic_adjacency_array.h +++ b/mt-kahypar/datastructures/dynamic_adjacency_array.h @@ -29,7 +29,7 @@ #include -#include "tbb/enumerable_thread_specific.h" +#include #include "kahypar-resources/datastructure/fast_reset_flag_array.h" diff --git a/mt-kahypar/datastructures/dynamic_graph.cpp b/mt-kahypar/datastructures/dynamic_graph.cpp index 13f2901c2..74f7dc2d1 100644 --- a/mt-kahypar/datastructures/dynamic_graph.cpp +++ b/mt-kahypar/datastructures/dynamic_graph.cpp @@ -28,11 +28,11 @@ #include "mt-kahypar/datastructures/dynamic_graph.h" -#include "tbb/blocked_range.h" -#include "tbb/parallel_scan.h" -#include "tbb/parallel_sort.h" -#include "tbb/parallel_reduce.h" -#include "tbb/concurrent_queue.h" +#include +#include +#include +#include +#include #include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/datastructures/concurrent_bucket_map.h" diff --git a/mt-kahypar/datastructures/dynamic_graph.h b/mt-kahypar/datastructures/dynamic_graph.h index 90e867471..464783e25 100644 --- a/mt-kahypar/datastructures/dynamic_graph.h +++ b/mt-kahypar/datastructures/dynamic_graph.h @@ -31,7 +31,7 @@ #include #include -#include "tbb/parallel_for.h" +#include #include "include/libmtkahypartypes.h" diff --git a/mt-kahypar/datastructures/dynamic_graph_factory.h b/mt-kahypar/datastructures/dynamic_graph_factory.h index b82b601c0..03b7e2023 100644 --- a/mt-kahypar/datastructures/dynamic_graph_factory.h +++ b/mt-kahypar/datastructures/dynamic_graph_factory.h @@ -28,7 +28,7 @@ #pragma once -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/datastructures/dynamic_graph.h" #include "mt-kahypar/parallel/atomic_wrapper.h" diff --git a/mt-kahypar/datastructures/dynamic_hypergraph.cpp b/mt-kahypar/datastructures/dynamic_hypergraph.cpp index 99afceec6..6502ee96f 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph.cpp +++ b/mt-kahypar/datastructures/dynamic_hypergraph.cpp @@ -27,11 +27,11 @@ #include "mt-kahypar/datastructures/dynamic_hypergraph.h" -#include "tbb/blocked_range.h" -#include "tbb/parallel_scan.h" -#include "tbb/parallel_sort.h" -#include "tbb/parallel_reduce.h" -#include "tbb/concurrent_queue.h" +#include +#include +#include +#include +#include #include "mt-kahypar/parallel/stl/scalable_queue.h" #include "mt-kahypar/datastructures/concurrent_bucket_map.h" diff --git a/mt-kahypar/datastructures/dynamic_hypergraph.h b/mt-kahypar/datastructures/dynamic_hypergraph.h index c36ac98a4..7130992e4 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph.h +++ b/mt-kahypar/datastructures/dynamic_hypergraph.h @@ -30,7 +30,7 @@ #include #include -#include "tbb/parallel_for.h" +#include #include "include/libmtkahypartypes.h" diff --git a/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp b/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp index 6e31fdea1..d7e335b9a 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp +++ b/mt-kahypar/datastructures/dynamic_hypergraph_factory.cpp @@ -27,9 +27,9 @@ #include "mt-kahypar/datastructures/dynamic_hypergraph_factory.h" -#include "tbb/parallel_for.h" -#include "tbb/parallel_invoke.h" -#include "tbb/parallel_scan.h" +#include +#include +#include #include "kahypar-resources/utils/math.h" diff --git a/mt-kahypar/datastructures/dynamic_hypergraph_factory.h b/mt-kahypar/datastructures/dynamic_hypergraph_factory.h index 5e23f9e70..7ecd46d30 100644 --- a/mt-kahypar/datastructures/dynamic_hypergraph_factory.h +++ b/mt-kahypar/datastructures/dynamic_hypergraph_factory.h @@ -27,7 +27,7 @@ #pragma once -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/datastructures/dynamic_hypergraph.h" diff --git a/mt-kahypar/datastructures/incident_net_array.h b/mt-kahypar/datastructures/incident_net_array.h index efefa3294..75a5bd605 100644 --- a/mt-kahypar/datastructures/incident_net_array.h +++ b/mt-kahypar/datastructures/incident_net_array.h @@ -29,9 +29,9 @@ #include -#include "tbb/enumerable_thread_specific.h" -#include "tbb/parallel_invoke.h" -#include "tbb/parallel_scan.h" +#include +#include +#include #include "kahypar-resources/datastructure/fast_reset_flag_array.h" diff --git a/mt-kahypar/datastructures/partitioned_graph.h b/mt-kahypar/datastructures/partitioned_graph.h index 1bc3acd9d..c0ea0910c 100644 --- a/mt-kahypar/datastructures/partitioned_graph.h +++ b/mt-kahypar/datastructures/partitioned_graph.h @@ -32,7 +32,7 @@ #include #include -#include "tbb/parallel_invoke.h" +#include #include "kahypar-resources/meta/mandatory.h" diff --git a/mt-kahypar/datastructures/partitioned_hypergraph.h b/mt-kahypar/datastructures/partitioned_hypergraph.h index b8826a080..dc2743508 100644 --- a/mt-kahypar/datastructures/partitioned_hypergraph.h +++ b/mt-kahypar/datastructures/partitioned_hypergraph.h @@ -31,7 +31,7 @@ #include #include -#include "tbb/parallel_invoke.h" +#include #include "kahypar-resources/meta/mandatory.h" diff --git a/mt-kahypar/datastructures/pin_count_in_part.h b/mt-kahypar/datastructures/pin_count_in_part.h index b901391d5..d7306d1e5 100644 --- a/mt-kahypar/datastructures/pin_count_in_part.h +++ b/mt-kahypar/datastructures/pin_count_in_part.h @@ -30,7 +30,7 @@ #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/hypergraph_common.h" diff --git a/mt-kahypar/datastructures/priority_queue.h b/mt-kahypar/datastructures/priority_queue.h index 6d22d179b..7ddedef68 100644 --- a/mt-kahypar/datastructures/priority_queue.h +++ b/mt-kahypar/datastructures/priority_queue.h @@ -32,7 +32,7 @@ #include #include -#include +#include "mt-kahypar/parallel/stl/scalable_vector.h" namespace mt_kahypar { @@ -67,7 +67,9 @@ static constexpr bool enable_heavy_assert = false; positions[heap.back().id] = 0; heap[0] = heap.back(); heap.pop_back(); - siftDown(0); + if (!empty()) { + siftDown(0); + } } void insert(const IdT e, const KeyT k) { @@ -87,6 +89,9 @@ static constexpr bool enable_heavy_assert = false; positions[heap.back().id] = pos; positions[e] = invalid_position; heap.pop_back(); + if (pos == size()) { + return; + } if (comp(removedKey, lastKey)) { siftUp(pos); } else { diff --git a/mt-kahypar/datastructures/sparse_pin_counts.h b/mt-kahypar/datastructures/sparse_pin_counts.h index 5eb69774d..2677d057c 100644 --- a/mt-kahypar/datastructures/sparse_pin_counts.h +++ b/mt-kahypar/datastructures/sparse_pin_counts.h @@ -34,9 +34,11 @@ #include "mt-kahypar/macros.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" +#include "mt-kahypar/datastructures/static_bitset.h" #include "mt-kahypar/datastructures/pin_count_snapshot.h" #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" +#include "mt-kahypar/utils/range.h" namespace mt_kahypar { diff --git a/mt-kahypar/datastructures/static_graph.h b/mt-kahypar/datastructures/static_graph.h index 0d49ee9ff..18488b0b4 100644 --- a/mt-kahypar/datastructures/static_graph.h +++ b/mt-kahypar/datastructures/static_graph.h @@ -30,7 +30,7 @@ #include -#include "tbb/parallel_for.h" +#include #include "include/libmtkahypartypes.h" diff --git a/mt-kahypar/datastructures/static_graph_factory.h b/mt-kahypar/datastructures/static_graph_factory.h index 1ecc2a4bd..5610cf5ea 100644 --- a/mt-kahypar/datastructures/static_graph_factory.h +++ b/mt-kahypar/datastructures/static_graph_factory.h @@ -28,7 +28,7 @@ #pragma once -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/datastructures/static_graph.h" #include "mt-kahypar/parallel/atomic_wrapper.h" diff --git a/mt-kahypar/datastructures/static_hypergraph.h b/mt-kahypar/datastructures/static_hypergraph.h index c0184ec8c..3966d0fa3 100644 --- a/mt-kahypar/datastructures/static_hypergraph.h +++ b/mt-kahypar/datastructures/static_hypergraph.h @@ -28,7 +28,7 @@ #pragma once -#include "tbb/parallel_for.h" +#include #include "include/libmtkahypartypes.h" diff --git a/mt-kahypar/datastructures/static_hypergraph_factory.h b/mt-kahypar/datastructures/static_hypergraph_factory.h index d583035f6..6e9cde25c 100644 --- a/mt-kahypar/datastructures/static_hypergraph_factory.h +++ b/mt-kahypar/datastructures/static_hypergraph_factory.h @@ -27,7 +27,7 @@ #pragma once -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/datastructures/static_hypergraph.h" #include "mt-kahypar/parallel/atomic_wrapper.h" diff --git a/mt-kahypar/datastructures/streaming_vector.h b/mt-kahypar/datastructures/streaming_vector.h index d2137670a..55c61ae46 100644 --- a/mt-kahypar/datastructures/streaming_vector.h +++ b/mt-kahypar/datastructures/streaming_vector.h @@ -31,8 +31,8 @@ #include #include -#include "tbb/task_arena.h" -#include "tbb/task_group.h" +#include +#include #include "kahypar-resources/meta/mandatory.h" diff --git a/mt-kahypar/io/hypergraph_io.cpp b/mt-kahypar/io/hypergraph_io.cpp index 1c7705b4b..84415c449 100644 --- a/mt-kahypar/io/hypergraph_io.cpp +++ b/mt-kahypar/io/hypergraph_io.cpp @@ -46,7 +46,7 @@ #endif -#include "tbb/parallel_for.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/context_enum_classes.h" diff --git a/mt-kahypar/io/partitioning_output.cpp b/mt-kahypar/io/partitioning_output.cpp index 13f1f0160..806c8dc89 100644 --- a/mt-kahypar/io/partitioning_output.cpp +++ b/mt-kahypar/io/partitioning_output.cpp @@ -29,11 +29,11 @@ #include -#include "tbb/blocked_range.h" -#include "tbb/parallel_invoke.h" -#include "tbb/parallel_sort.h" -#include "tbb/parallel_reduce.h" -#include "tbb/enumerable_thread_specific.h" +#include +#include +#include +#include +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/parallel/memory_pool.h" @@ -763,7 +763,7 @@ namespace mt_kahypar::io { std::sort(internal_pins.begin(), internal_pins.end()); std::sort(internal_degree.begin(), internal_degree.end()); - auto square = [&](size_t x) { return x * x; }; + auto square = [&](double x) { return x * x; }; auto avg_and_std_dev = [&](const std::vector& v) { const double avg = std::accumulate(v.begin(), v.end(), 0.0) / static_cast(v.size()); diff --git a/mt-kahypar/macros.h b/mt-kahypar/macros.h index 0249d5a16..ceb27bc2d 100644 --- a/mt-kahypar/macros.h +++ b/mt-kahypar/macros.h @@ -30,7 +30,7 @@ #if defined(MT_KAHYPAR_LIBRARY_MODE) || \ !defined(KAHYPAR_ENABLE_THREAD_PINNING) || defined(__APPLE__) -#include "tbb/task_arena.h" +#include // If we use the C or Python interface or thread pinning is disabled, the cpu ID // to which the current thread is assigned to is not unique. We therefore use // the slot index of the current task arena as unique thread ID. Note that the @@ -63,11 +63,6 @@ #define MT_KAHYPAR_ATTRIBUTE_ALWAYS_INLINE #endif -#define HEAVY_ASSERT0(cond) \ - !(enable_heavy_assert) ? (void)0 : [&]() { ASSERT(cond); }() -#define HEAVY_ASSERT1(cond, msg) \ - !(enable_heavy_assert) ? (void)0 : [&]() { ASSERT(cond, msg); }() - #ifdef KAHYPAR_ENABLE_HEAVY_PREPROCESSING_ASSERTIONS #define HEAVY_PREPROCESSING_ASSERT_1(cond) ASSERT(cond) #define HEAVY_PREPROCESSING_ASSERT_2(cond, msg) ASSERT(cond, msg) @@ -100,9 +95,6 @@ #define HEAVY_REFINEMENT_ASSERT_2(cond, msg) HEAVY_ASSERT1(cond, msg) #endif -#define HEAVY_ASSERT_(TYPE, N) HEAVY_##TYPE##_ASSERT_##N -#define HEAVY_ASSERT_EVAL(TYPE, N) HEAVY_ASSERT_(TYPE, N) - // Heavy assertions are assertions which increase the complexity of the scope // which they are executed in by an polynomial factor. In debug mode you are // often only interested in certain phase of the multilevel paradigm. However, diff --git a/mt-kahypar/parallel/atomic_wrapper.h b/mt-kahypar/parallel/atomic_wrapper.h index e6d14a5a6..1dcd72dd3 100644 --- a/mt-kahypar/parallel/atomic_wrapper.h +++ b/mt-kahypar/parallel/atomic_wrapper.h @@ -33,6 +33,8 @@ #include #include +#include "mt-kahypar/macros.h" + template class CAtomic : public std::__atomic_base { public: diff --git a/mt-kahypar/parallel/hwloc_topology.h b/mt-kahypar/parallel/hwloc_topology.h index 03a241771..684e51f99 100644 --- a/mt-kahypar/parallel/hwloc_topology.h +++ b/mt-kahypar/parallel/hwloc_topology.h @@ -27,8 +27,12 @@ #pragma once +#include + #include +#include "kahypar-resources/macros.h" + namespace mt_kahypar { namespace parallel { /** diff --git a/mt-kahypar/parallel/memory_pool.h b/mt-kahypar/parallel/memory_pool.h index 798d0b118..0267df9d3 100644 --- a/mt-kahypar/parallel/memory_pool.h +++ b/mt-kahypar/parallel/memory_pool.h @@ -40,8 +40,8 @@ #include #endif -#include "tbb/parallel_for.h" -#include "tbb/scalable_allocator.h" +#include +#include #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/stl/scalable_unique_ptr.h" diff --git a/mt-kahypar/parallel/stl/scalable_queue.h b/mt-kahypar/parallel/stl/scalable_queue.h index 27d9f6c80..06bbc51c3 100644 --- a/mt-kahypar/parallel/stl/scalable_queue.h +++ b/mt-kahypar/parallel/stl/scalable_queue.h @@ -29,7 +29,7 @@ #include -#include "tbb/scalable_allocator.h" +#include namespace mt_kahypar { namespace parallel { diff --git a/mt-kahypar/parallel/stl/scalable_unique_ptr.h b/mt-kahypar/parallel/stl/scalable_unique_ptr.h index dedec0565..fda013c30 100644 --- a/mt-kahypar/parallel/stl/scalable_unique_ptr.h +++ b/mt-kahypar/parallel/stl/scalable_unique_ptr.h @@ -29,7 +29,7 @@ #include -#include "tbb/scalable_allocator.h" +#include namespace mt_kahypar { namespace parallel { diff --git a/mt-kahypar/parallel/stl/scalable_vector.h b/mt-kahypar/parallel/stl/scalable_vector.h index b74807692..2f0f9f2ba 100644 --- a/mt-kahypar/parallel/stl/scalable_vector.h +++ b/mt-kahypar/parallel/stl/scalable_vector.h @@ -29,9 +29,9 @@ #include -#include "tbb/parallel_for.h" -#include "tbb/parallel_invoke.h" -#include "tbb/scalable_allocator.h" +#include +#include +#include #include "mt-kahypar/macros.h" diff --git a/mt-kahypar/parallel/stl/zero_allocator.h b/mt-kahypar/parallel/stl/zero_allocator.h index 56cfa40fe..3a48051fe 100644 --- a/mt-kahypar/parallel/stl/zero_allocator.h +++ b/mt-kahypar/parallel/stl/zero_allocator.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/tbb_allocator.h" +#include namespace mt_kahypar { namespace parallel { diff --git a/mt-kahypar/parallel/tbb_initializer.h b/mt-kahypar/parallel/tbb_initializer.h index d0e50d484..0280b60d5 100644 --- a/mt-kahypar/parallel/tbb_initializer.h +++ b/mt-kahypar/parallel/tbb_initializer.h @@ -33,9 +33,9 @@ #include #include -#include "tbb/task_arena.h" -#include "tbb/task_group.h" -#include "tbb/global_control.h" +#include +#include +#include #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/thread_pinning_observer.h" diff --git a/mt-kahypar/parallel/thread_pinning_observer.h b/mt-kahypar/parallel/thread_pinning_observer.h index ce30572b1..d2e421b13 100644 --- a/mt-kahypar/parallel/thread_pinning_observer.h +++ b/mt-kahypar/parallel/thread_pinning_observer.h @@ -40,7 +40,7 @@ #undef __TBB_ARENA_OBSERVER #define __TBB_ARENA_OBSERVER true -#include "tbb/task_scheduler_observer.h" +#include #undef __TBB_ARENA_OBSERVER #include "mt-kahypar/macros.h" diff --git a/mt-kahypar/parallel/work_stack.h b/mt-kahypar/parallel/work_stack.h index 7c5ea04d8..a4d725ad3 100644 --- a/mt-kahypar/parallel/work_stack.h +++ b/mt-kahypar/parallel/work_stack.h @@ -33,6 +33,7 @@ #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/utils/memory_tree.h" #include "mt-kahypar/utils/range.h" +#include "mt-kahypar/utils/randomize.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/coarsening/coarsening_commons.h b/mt-kahypar/partition/coarsening/coarsening_commons.h index 26b6e4d16..3e4351a64 100644 --- a/mt-kahypar/partition/coarsening/coarsening_commons.h +++ b/mt-kahypar/partition/coarsening/coarsening_commons.h @@ -30,6 +30,7 @@ #include "mt-kahypar/partition/context.h" #include "mt-kahypar/utils/timer.h" +#include "mt-kahypar/definitions.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp b/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp index a627db6ca..a9575d2fe 100644 --- a/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp +++ b/mt-kahypar/partition/coarsening/deterministic_multilevel_coarsener.cpp @@ -29,6 +29,7 @@ #include #include "mt-kahypar/definitions.h" +#include "mt-kahypar/utils/hash.h" namespace mt_kahypar { @@ -148,7 +149,7 @@ void DeterministicMultilevelCoarsener::calculatePreferredTargetClust best_target = u; } else { hashing::SimpleIntHash sih; - hashing::HashRNG hash_prng(sih, u); + hashing::HashRNG> hash_prng(sih, u); size_t pos = std::uniform_int_distribution(0, best_targets.size() - 1)(hash_prng); assert(pos < best_targets.size()); best_target = best_targets[pos]; diff --git a/mt-kahypar/partition/coarsening/multilevel_coarsener.h b/mt-kahypar/partition/coarsening/multilevel_coarsener.h index a89948998..c6dfdfa58 100644 --- a/mt-kahypar/partition/coarsening/multilevel_coarsener.h +++ b/mt-kahypar/partition/coarsening/multilevel_coarsener.h @@ -29,10 +29,10 @@ #include -#include "tbb/concurrent_queue.h" -#include "tbb/task_group.h" -#include "tbb/parallel_for.h" -#include "tbb/parallel_reduce.h" +#include +#include +#include +#include #include "kahypar-resources/meta/mandatory.h" diff --git a/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp b/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp index 39197062f..2c95bfd25 100644 --- a/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp +++ b/mt-kahypar/partition/coarsening/multilevel_uncoarsener.cpp @@ -26,8 +26,7 @@ * SOFTWARE. ******************************************************************************/ -#include - +#include "mt-kahypar/partition/coarsening/multilevel_uncoarsener.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/io/partitioning_output.h" #include "mt-kahypar/partition/refinement/i_refiner.h" @@ -195,7 +194,11 @@ namespace mt_kahypar { template void MultilevelUncoarsener::refineImpl() { PartitionedHypergraph& partitioned_hypergraph = *_uncoarseningData.partitioned_hg; - const double time_limit = Base::refinementTimeLimit(_context, (_uncoarseningData.hierarchy)[_current_level].coarseningTime()); + double time_limit = std::numeric_limits::max(); + if (_current_level >= 0 && _current_level != _num_levels) { + // there is a refinement run on the coarsest graph before projection. There is no value stored for this run, so we must avoid looking it up. + time_limit = Base::refinementTimeLimit(_context, (_uncoarseningData.hierarchy)[_current_level].coarseningTime()); + } if ( debug && _context.type == ContextType::main ) { io::printHypergraphInfo(partitioned_hypergraph.hypergraph(), diff --git a/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h b/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h index b8a11223f..c99c17cb7 100644 --- a/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h +++ b/mt-kahypar/partition/coarsening/multilevel_vertex_pair_rater.h @@ -32,7 +32,7 @@ #include #include -#include "tbb/enumerable_thread_specific.h" +#include #include "kahypar-resources/datastructure/fast_reset_flag_array.h" #include "kahypar-resources/meta/mandatory.h" diff --git a/mt-kahypar/partition/coarsening/nlevel_coarsener.h b/mt-kahypar/partition/coarsening/nlevel_coarsener.h index ad11f2b66..d05e75495 100644 --- a/mt-kahypar/partition/coarsening/nlevel_coarsener.h +++ b/mt-kahypar/partition/coarsening/nlevel_coarsener.h @@ -28,9 +28,9 @@ #include -#include "tbb/parallel_for.h" -#include "tbb/parallel_sort.h" -#include "tbb/parallel_scan.h" +#include +#include +#include #include "include/libmtkahypartypes.h" diff --git a/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h b/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h index 9543b2616..c61956794 100644 --- a/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h +++ b/mt-kahypar/partition/coarsening/nlevel_coarsener_base.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/task_group.h" +#include #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/metrics.h" @@ -34,7 +34,7 @@ #include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/utils/utilities.h" -#include +#include "mt-kahypar/partition/coarsening/coarsening_commons.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h b/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h index 0dc381fd9..5c8e79500 100644 --- a/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h +++ b/mt-kahypar/partition/coarsening/nlevel_vertex_pair_rater.h @@ -31,7 +31,7 @@ #include #include -#include "tbb/enumerable_thread_specific.h" +#include #include "kahypar-resources/datastructure/fast_reset_flag_array.h" #include "kahypar-resources/meta/mandatory.h" diff --git a/mt-kahypar/partition/deep_multilevel.cpp b/mt-kahypar/partition/deep_multilevel.cpp index d200a496b..c543629d3 100644 --- a/mt-kahypar/partition/deep_multilevel.cpp +++ b/mt-kahypar/partition/deep_multilevel.cpp @@ -31,7 +31,7 @@ #include #include -#include "tbb/parallel_for.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/macros.h" diff --git a/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h b/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h index 7fe8d5978..1c68fefd5 100644 --- a/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h +++ b/mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h @@ -33,7 +33,7 @@ #include "kahypar-resources/datastructure/fast_reset_flag_array.h" -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/datastructures/hypergraph_common.h" diff --git a/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h b/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h index 7aa4b1294..e4de87c10 100644 --- a/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h +++ b/mt-kahypar/partition/initial_partitioning/initial_partitioning_data_container.h @@ -29,7 +29,7 @@ #include #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h" diff --git a/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h b/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h index 488f0b554..5e8550de7 100644 --- a/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h +++ b/mt-kahypar/partition/initial_partitioning/policies/gain_computation_policy.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/task.h" +#include #include "mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h" diff --git a/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h b/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h index 9ca48638f..085242503 100644 --- a/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h +++ b/mt-kahypar/partition/initial_partitioning/policies/pq_selection_policy.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/task.h" +#include #include "mt-kahypar/partition/initial_partitioning/initial_partitioning_commons.h" diff --git a/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h b/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h index c8f5784c4..da308eef3 100644 --- a/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h +++ b/mt-kahypar/partition/initial_partitioning/policies/pseudo_peripheral_start_nodes.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/task.h" +#include #include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/parallel/stl/scalable_queue.h" diff --git a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp index 8f0c235a3..bd7b6f6ef 100644 --- a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp +++ b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.cpp @@ -26,7 +26,7 @@ #include "pool_initial_partitioner.h" -#include "tbb/task_group.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/factories.h" diff --git a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h index 8de09914a..1eb51c18e 100644 --- a/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h +++ b/mt-kahypar/partition/initial_partitioning/pool_initial_partitioner.h @@ -26,10 +26,10 @@ #pragma once -#include "tbb/task.h" +#include #include "include/libmtkahypartypes.h" - +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/mapping/initial_mapping.cpp b/mt-kahypar/partition/mapping/initial_mapping.cpp index eda22ffdb..7a2ee3730 100644 --- a/mt-kahypar/partition/mapping/initial_mapping.cpp +++ b/mt-kahypar/partition/mapping/initial_mapping.cpp @@ -26,7 +26,7 @@ #include "mt-kahypar/partition/mapping/initial_mapping.h" -#include "tbb/parallel_invoke.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/mapping/target_graph.h" diff --git a/mt-kahypar/partition/mapping/target_graph.h b/mt-kahypar/partition/mapping/target_graph.h index c02f9213a..c16488aa4 100644 --- a/mt-kahypar/partition/mapping/target_graph.h +++ b/mt-kahypar/partition/mapping/target_graph.h @@ -30,7 +30,7 @@ #include #include -#include "tbb/enumerable_thread_specific.h" +#include #ifdef __linux__ #pragma GCC diagnostic push @@ -40,7 +40,7 @@ #include "data-structures/table_config.hpp" #pragma GCC diagnostic pop #elif defined(_WIN32) or defined(__APPLE__) -#include "tbb/concurrent_unordered_map.h" +#include #endif #include "mt-kahypar/macros.h" @@ -299,9 +299,39 @@ class TargetGraph { return 0; } + + HyperedgeWeight distance(const ds::Bitset&) const { + return 0; + } + + HyperedgeWeight distance(const PartitionID, const PartitionID) const { + return 0; + } + + HyperedgeWeight distanceWithBlock(ds::Bitset&, const PartitionID) const { + return 0; + } + + HyperedgeWeight distanceWithoutBlock(ds::Bitset&, const PartitionID) const { + return 0; + } + + HyperedgeWeight distanceAfterExchangingBlocks(ds::Bitset&, const PartitionID, const PartitionID) const { + return 0; + } + + + const ds::StaticGraph& graph() const { + return _dummy_graph; + } + void printStats() const { } void printStats(std::stringstream&) const { } + + private: + + ds::StaticGraph _dummy_graph; }; #endif diff --git a/mt-kahypar/partition/multilevel.cpp b/mt-kahypar/partition/multilevel.cpp index 124d1ffea..8057d8b1c 100644 --- a/mt-kahypar/partition/multilevel.cpp +++ b/mt-kahypar/partition/multilevel.cpp @@ -29,7 +29,7 @@ #include -#include "tbb/task.h" +#include #include "include/libmtkahypartypes.h" diff --git a/mt-kahypar/partition/partitioner.cpp b/mt-kahypar/partition/partitioner.cpp index 66200838a..b0bba00ae 100644 --- a/mt-kahypar/partition/partitioner.cpp +++ b/mt-kahypar/partition/partitioner.cpp @@ -27,8 +27,8 @@ #include "partitioner.h" -#include "tbb/parallel_sort.h" -#include "tbb/parallel_reduce.h" +#include +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/io/partitioning_output.h" diff --git a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h index 6a05c3f14..cceaf9ad8 100644 --- a/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h +++ b/mt-kahypar/partition/preprocessing/community_detection/local_moving_modularity.h @@ -39,9 +39,6 @@ #include "mt-kahypar/utils/randomize.h" #include "mt-kahypar/utils/reproducible_random.h" - -#include "gtest/gtest_prod.h" - namespace mt_kahypar::metrics { template double modularity(const Graph& graph, const ds::Clustering& communities); @@ -80,7 +77,7 @@ class ParallelLocalMovingModularity { size_t parallelNonDeterministicRound(const Graph& graph, ds::Clustering& communities); size_t synchronousParallelRound(const Graph& graph, ds::Clustering& communities); size_t sequentialRound(const Graph& graph, ds::Clustering& communities); - +public: struct ClearList { vec weights; vec used; @@ -99,6 +96,8 @@ class ParallelLocalMovingModularity { return LargeIncidentClusterWeights(3UL * std::min(_max_degree, _vertex_degree_sampling_threshold), 0); } +public: + // ! Only for testing void initializeClusterVolumes(const Graph& graph, ds::Clustering& communities); @@ -154,6 +153,7 @@ class ParallelLocalMovingModularity { return bestCluster; } +private: inline double modularityGain(const ArcWeight weight_to, const ArcWeight volume_to, @@ -197,18 +197,5 @@ class ParallelLocalMovingModularity { } }; ds::BufferedVector volume_updates_to, volume_updates_from; - - - - FRIEND_TEST(ALouvain, ComputesMaxGainMove1); - FRIEND_TEST(ALouvain, ComputesMaxGainMove2); - FRIEND_TEST(ALouvain, ComputesMaxGainMove3); - FRIEND_TEST(ALouvain, ComputesMaxGainMove4); - FRIEND_TEST(ALouvain, ComputesMaxGainMove5); - FRIEND_TEST(ALouvain, ComputesMaxGainMove6); - FRIEND_TEST(ALouvain, ComputesMaxGainMove7); - FRIEND_TEST(ALouvain, ComputesMaxGainMove8); - FRIEND_TEST(ALouvain, ComputesMaxGainMove9); - FRIEND_TEST(ALouvain, ComputesMaxGainMove10); }; } diff --git a/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h b/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h index 44655ea88..f8aa7f665 100644 --- a/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h +++ b/mt-kahypar/partition/preprocessing/sparsification/degree_zero_hn_remover.h @@ -27,7 +27,7 @@ #pragma once -#include "tbb/parallel_sort.h" +#include #include "mt-kahypar/partition/context.h" #include "mt-kahypar/datastructures/streaming_vector.h" diff --git a/mt-kahypar/partition/recursive_bipartitioning.cpp b/mt-kahypar/partition/recursive_bipartitioning.cpp index 82fae1cfb..c6811cdea 100644 --- a/mt-kahypar/partition/recursive_bipartitioning.cpp +++ b/mt-kahypar/partition/recursive_bipartitioning.cpp @@ -27,7 +27,7 @@ #include "mt-kahypar/partition/recursive_bipartitioning.h" -#include "tbb/task_group.h" +#include #include #include diff --git a/mt-kahypar/partition/refinement/do_nothing_refiner.h b/mt-kahypar/partition/refinement/do_nothing_refiner.h index e86a77cf0..37ca77bc7 100644 --- a/mt-kahypar/partition/refinement/do_nothing_refiner.h +++ b/mt-kahypar/partition/refinement/do_nothing_refiner.h @@ -31,7 +31,7 @@ #include #include -#include "mt-kahypar/partition/refinement/i_refiner.h" +#include "mt-kahypar/partition/refinement/i_rebalancer.h" namespace mt_kahypar { class DoNothingRefiner final : public IRebalancer { diff --git a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp index 732b4816f..2963906ac 100644 --- a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp +++ b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.cpp @@ -27,11 +27,11 @@ #include "mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h" -#include "tbb/blocked_range.h" -#include "tbb/parallel_invoke.h" -#include "tbb/parallel_reduce.h" -#include "tbb/parallel_scan.h" -#include "tbb/parallel_for.h" +#include +#include +#include +#include +#include namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h index 08daf5342..12eae177f 100644 --- a/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h +++ b/mt-kahypar/partition/refinement/flows/flow_hypergraph_builder.h @@ -28,7 +28,7 @@ #pragma once #include "datastructure/flow_hypergraph.h" - +#include "mt-kahypar/parallel/stl/scalable_vector.h" #include "mt-kahypar/macros.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/flows/flow_refiner.cpp b/mt-kahypar/partition/refinement/flows/flow_refiner.cpp index 35b5b475d..806e3b7ce 100644 --- a/mt-kahypar/partition/refinement/flows/flow_refiner.cpp +++ b/mt-kahypar/partition/refinement/flows/flow_refiner.cpp @@ -29,7 +29,7 @@ #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/utils/utilities.h" -#include "tbb/concurrent_queue.h" +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/flows/parallel_construction.cpp b/mt-kahypar/partition/refinement/flows/parallel_construction.cpp index 7f9cb7915..7bedf9a57 100644 --- a/mt-kahypar/partition/refinement/flows/parallel_construction.cpp +++ b/mt-kahypar/partition/refinement/flows/parallel_construction.cpp @@ -28,7 +28,7 @@ #include "kahypar-resources/utils/math.h" -#include "tbb/concurrent_queue.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/parallel/stl/scalable_queue.h" diff --git a/mt-kahypar/partition/refinement/flows/parallel_construction.h b/mt-kahypar/partition/refinement/flows/parallel_construction.h index a49713673..b27ac7de2 100644 --- a/mt-kahypar/partition/refinement/flows/parallel_construction.h +++ b/mt-kahypar/partition/refinement/flows/parallel_construction.h @@ -27,7 +27,7 @@ #pragma once #include -#include "tbb/enumerable_thread_specific.h" +#include #include "algorithm/hyperflowcutter.h" #include "algorithm/parallel_push_relabel.h" diff --git a/mt-kahypar/partition/refinement/flows/problem_construction.cpp b/mt-kahypar/partition/refinement/flows/problem_construction.cpp index c057a0eff..885d9b047 100644 --- a/mt-kahypar/partition/refinement/flows/problem_construction.cpp +++ b/mt-kahypar/partition/refinement/flows/problem_construction.cpp @@ -29,7 +29,7 @@ #include -#include "tbb/parallel_for.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/mapping/target_graph.h" diff --git a/mt-kahypar/partition/refinement/flows/problem_construction.h b/mt-kahypar/partition/refinement/flows/problem_construction.h index c36cdd80c..863785c2d 100644 --- a/mt-kahypar/partition/refinement/flows/problem_construction.h +++ b/mt-kahypar/partition/refinement/flows/problem_construction.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/partition/context.h" #include "mt-kahypar/datastructures/sparse_map.h" diff --git a/mt-kahypar/partition/refinement/flows/quotient_graph.cpp b/mt-kahypar/partition/refinement/flows/quotient_graph.cpp index be8833a3f..ea5a71415 100644 --- a/mt-kahypar/partition/refinement/flows/quotient_graph.cpp +++ b/mt-kahypar/partition/refinement/flows/quotient_graph.cpp @@ -29,7 +29,7 @@ #include -#include "tbb/parallel_sort.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/datastructures/sparse_map.h" diff --git a/mt-kahypar/partition/refinement/flows/quotient_graph.h b/mt-kahypar/partition/refinement/flows/quotient_graph.h index ebafd6b99..a3dbc0fd4 100644 --- a/mt-kahypar/partition/refinement/flows/quotient_graph.h +++ b/mt-kahypar/partition/refinement/flows/quotient_graph.h @@ -26,9 +26,9 @@ #pragma once -#include "tbb/concurrent_queue.h" -#include "tbb/concurrent_vector.h" -#include "tbb/enumerable_thread_specific.h" +#include +#include +#include #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/flows/refiner_adapter.h" diff --git a/mt-kahypar/partition/refinement/flows/refiner_adapter.h b/mt-kahypar/partition/refinement/flows/refiner_adapter.h index bd2a6dd4a..d989adaeb 100644 --- a/mt-kahypar/partition/refinement/flows/refiner_adapter.h +++ b/mt-kahypar/partition/refinement/flows/refiner_adapter.h @@ -25,8 +25,8 @@ ******************************************************************************/ #pragma once -#include "tbb/concurrent_vector.h" -#include "tbb/concurrent_queue.h" +#include +#include #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/refinement/flows/i_flow_refiner.h" diff --git a/mt-kahypar/partition/refinement/fm/fm_commons.h b/mt-kahypar/partition/refinement/fm/fm_commons.h index 97ee1f34b..c6830b264 100644 --- a/mt-kahypar/partition/refinement/fm/fm_commons.h +++ b/mt-kahypar/partition/refinement/fm/fm_commons.h @@ -28,10 +28,10 @@ #include -#include -#include -#include -#include +#include "mt-kahypar/datastructures/concurrent_bucket_map.h" +#include "mt-kahypar/datastructures/priority_queue.h" +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/parallel/work_stack.h" #include "kahypar-resources/datastructure/fast_reset_flag_array.h" diff --git a/mt-kahypar/partition/refinement/fm/global_rollback.cpp b/mt-kahypar/partition/refinement/fm/global_rollback.cpp index 143b611ba..6931d491a 100644 --- a/mt-kahypar/partition/refinement/fm/global_rollback.cpp +++ b/mt-kahypar/partition/refinement/fm/global_rollback.cpp @@ -26,7 +26,7 @@ #include "mt-kahypar/partition/refinement/fm/global_rollback.h" -#include "tbb/parallel_scan.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/metrics.h" @@ -434,9 +434,6 @@ namespace mt_kahypar { } }); - - size_t num_unbalanced_slots = 0; - size_t overloaded = 0; for (PartitionID i = 0; i < context.partition.k; ++i) { if (phg.partWeight(i) > maxPartWeights[i]) { @@ -465,10 +462,6 @@ namespace mt_kahypar { overloaded++; } - if (overloaded > 0) { - num_unbalanced_slots++; - } - if (overloaded == 0 && gain_sum > best_gain) { best_index = localMoveID + 1; best_gain = gain_sum; diff --git a/mt-kahypar/partition/refinement/fm/global_rollback.h b/mt-kahypar/partition/refinement/fm/global_rollback.h index 950412ad5..6c801ee36 100644 --- a/mt-kahypar/partition/refinement/fm/global_rollback.h +++ b/mt-kahypar/partition/refinement/fm/global_rollback.h @@ -26,7 +26,7 @@ #pragma once -#include "tbb/parallel_invoke.h" +#include #include "mt-kahypar/partition/refinement/fm/fm_commons.h" diff --git a/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h b/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h index aaf009944..c23be30a7 100644 --- a/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h +++ b/mt-kahypar/partition/refinement/fm/localized_kway_fm_core.h @@ -28,8 +28,8 @@ #pragma once -#include -#include +#include "mt-kahypar/partition/context.h" +#include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/partition/refinement/fm/fm_commons.h" diff --git a/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp b/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp index 568cb29f9..6b10bdde0 100644 --- a/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp +++ b/mt-kahypar/partition/refinement/fm/sequential_twoway_fm_refiner.cpp @@ -220,7 +220,7 @@ void SequentialTwoWayFmRefiner::deltaGainUpdate(const HyperedgeID he if (_phg.edgeSize(he) == 2) { for (const HypernodeID& pin : _phg.pins(he)) { if ( _vertex_state[pin] == VertexState::ACTIVE ) { - const char factor = (_phg.partID(pin) == from ? 2 : -2); + const int factor = (_phg.partID(pin) == from ? 2 : -2); updatePin(pin, factor * he_weight); } } diff --git a/mt-kahypar/partition/refinement/fm/stop_rule.h b/mt-kahypar/partition/refinement/fm/stop_rule.h index 4e2dbf1bd..bf3ac4cb1 100644 --- a/mt-kahypar/partition/refinement/fm/stop_rule.h +++ b/mt-kahypar/partition/refinement/fm/stop_rule.h @@ -27,7 +27,7 @@ #pragma once -#include +#include "mt-kahypar/datastructures/hypergraph_common.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h b/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h index 303aa71ed..a3cc32da8 100644 --- a/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h +++ b/mt-kahypar/partition/refinement/fm/strategies/i_fm_strategy.h @@ -32,6 +32,7 @@ #include "mt-kahypar/macros.h" #include "mt-kahypar/definitions.h" #include "mt-kahypar/utils/cast.h" +#include "mt-kahypar/partition/refinement/fm/fm_commons.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp index 50ac8c25e..7d0b09b8a 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.cpp @@ -26,9 +26,9 @@ #include "mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/concurrent_vector.h" +#include +#include +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h index 4b6bb5e47..11ca7ce66 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/cut/cut_gain_cache.h @@ -35,6 +35,7 @@ #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/utils/range.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h b/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h index 7af5970d0..c57f1fcb6 100644 --- a/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h +++ b/mt-kahypar/partition/refinement/gains/cut/cut_gain_computation.h @@ -28,7 +28,7 @@ #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" #include "mt-kahypar/partition/refinement/gains/cut/cut_attributed_gains.h" diff --git a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp index da1244a14..6aaf12705 100644 --- a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp +++ b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.cpp @@ -26,9 +26,9 @@ #include "mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/concurrent_vector.h" +#include +#include +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h index d98ee8ff0..ec64934f2 100644 --- a/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/cut_for_graphs/cut_gain_cache_for_graphs.h @@ -35,6 +35,7 @@ #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/utils/range.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/gains/gain_computation_base.h b/mt-kahypar/partition/refinement/gains/gain_computation_base.h index dd7833630..a09e1afd8 100644 --- a/mt-kahypar/partition/refinement/gains/gain_computation_base.h +++ b/mt-kahypar/partition/refinement/gains/gain_computation_base.h @@ -30,11 +30,12 @@ #include "kahypar-resources/meta/mandatory.h" -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/partition/metrics.h" #include "mt-kahypar/partition/context.h" #include "mt-kahypar/utils/randomize.h" +#include "mt-kahypar/datastructures/sparse_map.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.cpp index 3f9812d38..a90c61544 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.cpp @@ -26,9 +26,9 @@ #include "mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/concurrent_vector.h" +#include +#include +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h b/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h index 36d397a77..e4fb4afa4 100644 --- a/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/km1/km1_gain_cache.h @@ -37,6 +37,7 @@ #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/utils/range.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp index 5c4d010ac..9c83d2545 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.cpp @@ -26,9 +26,9 @@ #include "mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/concurrent_vector.h" +#include +#include +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h index 4fcc30a6e..cfd13135b 100644 --- a/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/soed/soed_gain_cache.h @@ -37,6 +37,7 @@ #include "mt-kahypar/parallel/atomic_wrapper.h" #include "mt-kahypar/macros.h" #include "mt-kahypar/utils/range.h" +#include "mt-kahypar/partition/context.h" namespace mt_kahypar { diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp index ebd0dd195..0bfd31fee 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.cpp @@ -26,9 +26,9 @@ #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/concurrent_vector.h" +#include +#include +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h index 2e19423cf..972d72e76 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_cache.h @@ -30,7 +30,7 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "tbb/parallel_invoke.h" +#include #include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/mapping/target_graph.h" diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h index 742cc0892..cabd6ebbf 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_gain_computation.h @@ -28,7 +28,7 @@ #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" #include "mt-kahypar/partition/refinement/gains/steiner_tree/steiner_tree_attributed_gains.h" diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp index 0d809762f..27ce1c04d 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.cpp @@ -26,9 +26,9 @@ #include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/concurrent_vector.h" +#include +#include +#include #include "mt-kahypar/definitions.h" diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h index 8e3e61782..f06c9394b 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_cache_for_graphs.h @@ -30,10 +30,11 @@ #include "kahypar-resources/meta/policy_registry.h" -#include "tbb/parallel_invoke.h" -#include "tbb/concurrent_vector.h" +#include +#include #include "mt-kahypar/partition/context_enum_classes.h" +#include "mt-kahypar/partition/context.h" #include "mt-kahypar/partition/mapping/target_graph.h" #include "mt-kahypar/datastructures/hypergraph_common.h" #include "mt-kahypar/datastructures/array.h" diff --git a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h index 0d5700ec6..4d33756c1 100644 --- a/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h +++ b/mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_gain_computation_for_graphs.h @@ -28,7 +28,7 @@ #include -#include "tbb/enumerable_thread_specific.h" +#include #include "mt-kahypar/partition/refinement/gains/gain_computation_base.h" #include "mt-kahypar/partition/refinement/gains/steiner_tree_for_graphs/steiner_tree_attributed_gains_for_graphs.h" diff --git a/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp b/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp index ec32e79d7..cc5c33d8c 100644 --- a/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp +++ b/mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.cpp @@ -27,7 +27,7 @@ #include "mt-kahypar/partition/refinement/label_propagation/label_propagation_refiner.h" -#include "tbb/parallel_for.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/metrics.h" diff --git a/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp b/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp index 0a04165c0..21e463c43 100644 --- a/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp +++ b/mt-kahypar/partition/refinement/rebalancing/advanced_rebalancer.cpp @@ -28,13 +28,12 @@ #include #include +#include #include "mt-kahypar/partition/refinement/gains/gain_definitions.h" #include "mt-kahypar/utils/cast.h" #include "mt-kahypar/partition/context.h" -#include "pcg_random.hpp" - namespace mt_kahypar { namespace impl { @@ -119,7 +118,7 @@ namespace impl { return result; } - pcg32 rng; + std::mt19937 rng; std::uniform_int_distribution dist; }; @@ -323,7 +322,8 @@ namespace impl { const int seed = phg.initialNumNodes() + task_id; - impl::NextMoveFinder next_move_finder(seed, _context, phg, _gain_cache, _pqs, _target_part, _node_state); + impl::NextMoveFinder next_move_finder( + seed, _context, phg, _gain_cache, _pqs, _target_part, _node_state); while (num_overloaded_blocks > 0 && next_move_finder.findNextMove()) { const Move& m = next_move_finder.next_move; diff --git a/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp b/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp index 26285fd32..2d46e346b 100644 --- a/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp +++ b/mt-kahypar/partition/refinement/rebalancing/simple_rebalancer.cpp @@ -31,8 +31,8 @@ #include #include -#include "tbb/enumerable_thread_specific.h" -#include "tbb/parallel_for.h" +#include +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/partition/metrics.h" diff --git a/mt-kahypar/utils/bit_ops.h b/mt-kahypar/utils/bit_ops.h index 09da04b3d..2e95c82f1 100644 --- a/mt-kahypar/utils/bit_ops.h +++ b/mt-kahypar/utils/bit_ops.h @@ -27,6 +27,8 @@ #pragma once +#include + namespace mt_kahypar::utils { inline int popcount_64(const uint64_t x) { diff --git a/mt-kahypar/utils/hash.h b/mt-kahypar/utils/hash.h index e484bcb78..f2358c5c7 100644 --- a/mt-kahypar/utils/hash.h +++ b/mt-kahypar/utils/hash.h @@ -31,6 +31,7 @@ #include #include +#include "mt-kahypar/macros.h" namespace mt_kahypar::hashing { diff --git a/mt-kahypar/utils/hypergraph_statistics.h b/mt-kahypar/utils/hypergraph_statistics.h index 01f5860a5..6f28b6f95 100644 --- a/mt-kahypar/utils/hypergraph_statistics.h +++ b/mt-kahypar/utils/hypergraph_statistics.h @@ -25,9 +25,13 @@ * SOFTWARE. ******************************************************************************/ +#include #include -#include "tbb/parallel_reduce.h" +#include +#include + +#include "mt-kahypar/macros.h" namespace mt_kahypar { namespace utils { diff --git a/mt-kahypar/utils/progress_bar.h b/mt-kahypar/utils/progress_bar.h index 8a2db7e76..8395ea64a 100644 --- a/mt-kahypar/utils/progress_bar.h +++ b/mt-kahypar/utils/progress_bar.h @@ -50,7 +50,7 @@ class ProgressBar { public: explicit ProgressBar(const size_t expected_count, - const HyperedgeWeight objective, + const int64_t objective, const bool enable = true) : _display_mutex(), _count(0), @@ -105,11 +105,11 @@ class ProgressBar { return _count; } - void setObjective(const HyperedgeWeight objective) { + void setObjective(const int64_t objective) { _objective = objective; } - void addToObjective(const HyperedgeWeight delta) { + void addToObjective(const int64_t delta) { __atomic_fetch_add(&_objective, delta, __ATOMIC_RELAXED); } @@ -191,7 +191,7 @@ class ProgressBar { std::atomic _next_tic_count; size_t _expected_count; HighResClockTimepoint _start; - HyperedgeWeight _objective; + int64_t _objective; size_t _progress_bar_size; bool _enable; }; diff --git a/mt-kahypar/utils/randomize.h b/mt-kahypar/utils/randomize.h index 60be2a50d..f4ac64b40 100644 --- a/mt-kahypar/utils/randomize.h +++ b/mt-kahypar/utils/randomize.h @@ -32,8 +32,8 @@ #include #include -#include "tbb/task_group.h" -#include "tbb/parallel_for.h" +#include +#include #include "mt-kahypar/macros.h" #include "mt-kahypar/parallel/stl/scalable_vector.h" diff --git a/mt-kahypar/utils/utilities.h b/mt-kahypar/utils/utilities.h index 508e5dfc4..74cf5f444 100644 --- a/mt-kahypar/utils/utilities.h +++ b/mt-kahypar/utils/utilities.h @@ -28,7 +28,7 @@ #include -#include "tbb/concurrent_vector.h" +#include #include "mt-kahypar/macros.h" #include "mt-kahypar/utils/stats.h" diff --git a/tests/datastructures/connectivity_set_test.cc b/tests/datastructures/connectivity_set_test.cc index 79e539d52..faca3b09a 100644 --- a/tests/datastructures/connectivity_set_test.cc +++ b/tests/datastructures/connectivity_set_test.cc @@ -28,7 +28,7 @@ #include #include "gmock/gmock.h" -#include "tbb/task_group.h" +#include #include "mt-kahypar/datastructures/connectivity_set.h" #include "mt-kahypar/datastructures/delta_connectivity_set.h" diff --git a/tests/datastructures/fixed_vertex_support_test.cc b/tests/datastructures/fixed_vertex_support_test.cc index fa9a60ed1..0d2c88f84 100644 --- a/tests/datastructures/fixed_vertex_support_test.cc +++ b/tests/datastructures/fixed_vertex_support_test.cc @@ -27,7 +27,7 @@ #include "gmock/gmock.h" -#include "tbb/task_group.h" +#include #include "mt-kahypar/datastructures/static_hypergraph.h" #include "mt-kahypar/datastructures/static_hypergraph_factory.h" diff --git a/tests/datastructures/partitioned_graph_test.cc b/tests/datastructures/partitioned_graph_test.cc index 1338812c7..ae6674824 100644 --- a/tests/datastructures/partitioned_graph_test.cc +++ b/tests/datastructures/partitioned_graph_test.cc @@ -393,18 +393,26 @@ TYPED_TEST(APartitionedGraph, ComputesGainsCorrectly) { TYPED_TEST(APartitionedGraph, ComputesDeltaAndGainsCorrectlyIfAllNodesMoveConcurrently) { this->gain_cache.initializeGainCache(this->partitioned_hypergraph); - CAtomic delta(0); - auto delta_fun = [&](const SynchronizedEdgeUpdate& sync_update) { - delta.fetch_add(CutAttributedGains::gain(sync_update)); - }; - + std::array deltas { 0 , 0 }; + std::vector> move_deltas(2 , std::vector()); + executeConcurrent([&] { + auto delta_fun = [&](const SynchronizedEdgeUpdate& sync_update) { + HyperedgeWeight d = CutAttributedGains::gain(sync_update); + deltas[0] += d; + move_deltas[0].push_back(d); + }; ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 4, 1, 2, 5, []{}, delta_fun)); ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 2, 0, 2, 5, []{}, delta_fun)); ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 3, 1, 2, 5, []{}, delta_fun)); ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 4, 2, 0, 5, []{}, delta_fun)); ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 2, 2, 1, 5, []{}, delta_fun)); }, [&] { + auto delta_fun = [&](const SynchronizedEdgeUpdate& sync_update) { + HyperedgeWeight d = CutAttributedGains::gain(sync_update); + deltas[1] += d; + move_deltas[1].push_back(d); + }; ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 5, 2, 0, 5, []{}, delta_fun)); ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 1, 0, 2, 5, []{}, delta_fun)); ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 6, 2, 0, 5, []{}, delta_fun)); @@ -412,7 +420,8 @@ TYPED_TEST(APartitionedGraph, ComputesDeltaAndGainsCorrectlyIfAllNodesMoveConcur ASSERT_TRUE(this->partitioned_hypergraph.changeNodePart(this->gain_cache, 1, 2, 1, 5, []{}, delta_fun)); }); - ASSERT_EQ(-2, delta.load()); + HyperedgeWeight delta = deltas[0] + deltas[1]; + ASSERT_EQ(-2, delta); this->verifyGains(0, {0, 0, 0}); this->verifyGains(1, {0, 0, -1}); this->verifyGains(2, {-1, 0, 0}); diff --git a/tests/datastructures/partitioned_hypergraph_smoke_test.cc b/tests/datastructures/partitioned_hypergraph_smoke_test.cc index 085ed134c..be9e4b00b 100644 --- a/tests/datastructures/partitioned_hypergraph_smoke_test.cc +++ b/tests/datastructures/partitioned_hypergraph_smoke_test.cc @@ -29,10 +29,10 @@ #include #include "gmock/gmock.h" -#include "tbb/blocked_range.h" -#include "tbb/enumerable_thread_specific.h" -#include "tbb/task_arena.h" -#include "tbb/task_group.h" +#include +#include +#include +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/io/hypergraph_factory.h" diff --git a/tests/datastructures/pin_count_in_part_test.cc b/tests/datastructures/pin_count_in_part_test.cc index b14715513..5b2e3535b 100644 --- a/tests/datastructures/pin_count_in_part_test.cc +++ b/tests/datastructures/pin_count_in_part_test.cc @@ -30,7 +30,7 @@ #include #include "gmock/gmock.h" -#include "tbb/task_group.h" +#include #include "mt-kahypar/datastructures/pin_count_in_part.h" #ifdef KAHYPAR_ENABLE_LARGE_K_PARTITIONING_FEATURES diff --git a/tests/datastructures/sparse_map_test.cc b/tests/datastructures/sparse_map_test.cc index ed431cf6a..6d21aa934 100644 --- a/tests/datastructures/sparse_map_test.cc +++ b/tests/datastructures/sparse_map_test.cc @@ -29,8 +29,8 @@ #include #include "gmock/gmock.h" -#include "tbb/task_group.h" -#include "tbb/parallel_invoke.h" +#include +#include #include "mt-kahypar/datastructures/sparse_map.h" #include "mt-kahypar/datastructures/concurrent_flat_map.h" diff --git a/tests/interface/interface_test.cc b/tests/interface/interface_test.cc index d2b0c9e74..3a1ed8acd 100644 --- a/tests/interface/interface_test.cc +++ b/tests/interface/interface_test.cc @@ -29,7 +29,7 @@ #include -#include "tbb/parallel_invoke.h" +#include #include "libmtkahypar.h" #include "mt-kahypar/macros.h" diff --git a/tests/parallel/memory_pool_test.cc b/tests/parallel/memory_pool_test.cc index 97e3f64fb..6f5eec1c2 100644 --- a/tests/parallel/memory_pool_test.cc +++ b/tests/parallel/memory_pool_test.cc @@ -27,7 +27,7 @@ #include #include "gmock/gmock.h" -#include "tbb/task_group.h" +#include #include "mt-kahypar/parallel/memory_pool.h" #include "mt-kahypar/parallel/hardware_topology.h" diff --git a/tests/partition/initial_partitioning/flat_initial_partitioner_test.cc b/tests/partition/initial_partitioning/flat_initial_partitioner_test.cc index 7d8c29358..566764181 100644 --- a/tests/partition/initial_partitioning/flat_initial_partitioner_test.cc +++ b/tests/partition/initial_partitioning/flat_initial_partitioner_test.cc @@ -28,7 +28,7 @@ #include -#include "tbb/parallel_invoke.h" +#include #include "tests/datastructures/hypergraph_fixtures.h" #include "mt-kahypar/utils/utilities.h" diff --git a/tests/partition/initial_partitioning/initial_partitioning_data_container_test.cc b/tests/partition/initial_partitioning/initial_partitioning_data_container_test.cc index 1ee896067..cf1be4cb4 100644 --- a/tests/partition/initial_partitioning/initial_partitioning_data_container_test.cc +++ b/tests/partition/initial_partitioning/initial_partitioning_data_container_test.cc @@ -28,7 +28,7 @@ #include -#include "tbb/parallel_invoke.h" +#include #include "tests/datastructures/hypergraph_fixtures.h" #include "mt-kahypar/definitions.h" diff --git a/tests/partition/initial_partitioning/pool_initial_partitioner_test.cc b/tests/partition/initial_partitioning/pool_initial_partitioner_test.cc index f3497535e..adc4c65ff 100644 --- a/tests/partition/initial_partitioning/pool_initial_partitioner_test.cc +++ b/tests/partition/initial_partitioning/pool_initial_partitioner_test.cc @@ -28,7 +28,7 @@ #include -#include "tbb/parallel_invoke.h" +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/utils/utilities.h" diff --git a/tests/partition/mapping/target_graph_test.cc b/tests/partition/mapping/target_graph_test.cc index 27b0b76b3..999f15288 100644 --- a/tests/partition/mapping/target_graph_test.cc +++ b/tests/partition/mapping/target_graph_test.cc @@ -26,7 +26,7 @@ #include "gmock/gmock.h" -#include "tbb/task_group.h" +#include #include "mt-kahypar/datastructures/static_graph_factory.h" #include "mt-kahypar/partition/mapping/target_graph.h" diff --git a/tests/partition/preprocessing/louvain_test.cc b/tests/partition/preprocessing/louvain_test.cc index aa07ab06f..82be925d3 100644 --- a/tests/partition/preprocessing/louvain_test.cc +++ b/tests/partition/preprocessing/louvain_test.cc @@ -85,8 +85,9 @@ TEST_F(ALouvain, ComputesMaxGainMove1) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 3, 4, 5, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 7, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 7, clear_list); ASSERT_EQ(0, to); } @@ -94,8 +95,9 @@ TEST_F(ALouvain, ComputesMaxGainMove2) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 3, 3, 4, 5, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 8, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 8, clear_list); ASSERT_EQ(3, to); } @@ -103,8 +105,9 @@ TEST_F(ALouvain, ComputesMaxGainMove3) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 3, 4, 5, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 8, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 8, clear_list); ASSERT_EQ(2, to); } @@ -112,8 +115,9 @@ TEST_F(ALouvain, ComputesMaxGainMove4) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 3, 4, 5, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 9, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 9, clear_list); ASSERT_EQ(3, to); } @@ -121,8 +125,9 @@ TEST_F(ALouvain, ComputesMaxGainMove5) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 2, 4, 5, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 9, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 9, clear_list); ASSERT_EQ(2, to); } @@ -130,8 +135,9 @@ TEST_F(ALouvain, ComputesMaxGainMove6) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 2, 4, 5, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 10, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 10, clear_list); ASSERT_EQ(4, to); } @@ -139,8 +145,9 @@ TEST_F(ALouvain, ComputesMaxGainMove7) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 2, 4, 0, 1, 2, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 10, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 10, clear_list); ASSERT_EQ(0, to); } @@ -148,8 +155,9 @@ TEST_F(ALouvain, ComputesMaxGainMove8) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 2, 4, 0, 1, 1, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 0, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 0, clear_list); ASSERT_EQ(1, to); } @@ -157,8 +165,9 @@ TEST_F(ALouvain, ComputesMaxGainMove9) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 2, 4, 0, 1, 3, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 4, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 4, clear_list); ASSERT_EQ(3, to); } @@ -166,8 +175,9 @@ TEST_F(ALouvain, ComputesMaxGainMove10) { ParallelLocalMovingModularity plm(context, graph->numNodes()); ds::Clustering communities = clustering( { 0, 1, 0, 2, 2, 0, 4, 1, 3, 3, 4 } ); plm.initializeClusterVolumes(*graph, communities); + ParallelLocalMovingModularity::ClearList clear_list(graph->numNodes()); PartitionID to = plm.computeMaxGainCluster( - *graph, communities, 6, plm.non_sampling_incident_cluster_weights.local()); + *graph, communities, 6, clear_list); ASSERT_EQ(4, to); } diff --git a/tests/partition/refinement/gain_cache_test.cc b/tests/partition/refinement/gain_cache_test.cc index 915e9a205..fc9e2d0f7 100644 --- a/tests/partition/refinement/gain_cache_test.cc +++ b/tests/partition/refinement/gain_cache_test.cc @@ -27,8 +27,8 @@ #include #include "gmock/gmock.h" -#include "tbb/parallel_for.h" -#include "tbb/enumerable_thread_specific.h" +#include +#include #include "mt-kahypar/definitions.h" #include "mt-kahypar/datastructures/static_graph_factory.h"