/root/bitcoin/src/cuckoocache.h

Source
// Copyright (c) 2016 Jeremy Rubin
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef BITCOIN_CUCKOOCACHE_H
#define BITCOIN_CUCKOOCACHE_H

#include <util/fastrange.h>
#include <util/overflow.h>

#include <algorithm>
#include <array>
#include <atomic>
#include <cmath>
#include <cstring>
#include <limits>
#include <memory>
#include <utility>
#include <vector>


/** High-performance cache primitives.
 *
 * Summary:
 *
 * 1. @ref bit_packed_atomic_flags is bit-packed atomic flags for garbage collection
 *
 * 2. @ref cache is a cache which is performant in memory usage and lookup speed. It
 * is lockfree for erase operations. Elements are lazily erased on the next insert.
 */
namespace CuckooCache
{
/** @ref bit_packed_atomic_flags implements a container for garbage collection flags
 * that is only thread unsafe on calls to setup. This class bit-packs collection
 * flags for memory efficiency.
 *
 * All operations are `std::memory_order_relaxed` so external mechanisms must
 * ensure that writes and reads are properly synchronized.
 *
 * On setup(n), all bits up to `n` are marked as collected.
 *
 * Under the hood, because it is an 8-bit type, it makes sense to use a multiple
 * of 8 for setup, but it will be safe if that is not the case as well.
 */
class bit_packed_atomic_flags
{
    std::unique_ptr<std::atomic<uint8_t>[]> mem;

public:
    /** No default constructor, as there must be some size. */
    bit_packed_atomic_flags() = delete;

    /**
     * bit_packed_atomic_flags constructor creates memory to sufficiently
     * keep track of garbage collection information for `size` entries.
     *
     * @param size the number of elements to allocate space for
     *
     * @post bit_set, bit_unset, and bit_is_set function properly forall x. x <
     * size
     * @post All calls to bit_is_set (without subsequent bit_unset) will return
     * true.
     */
    explicit bit_packed_atomic_flags(uint32_t size)
    {
        // pad out the size if needed
        size = CeilDiv(size, 8u);
        mem.reset(new std::atomic<uint8_t>[size]);
        for (uint32_t i = 0; i < size; ++i)
            mem[i].store(0xFF);
    };

    /** setup marks all entries and ensures that bit_packed_atomic_flags can store
     * at least `b` entries.
     *
     * @param b the number of elements to allocate space for
     * @post bit_set, bit_unset, and bit_is_set function properly forall x. x <
     * b
     * @post All calls to bit_is_set (without subsequent bit_unset) will return
     * true.
     */
    inline void setup(uint32_t b)
    {
        bit_packed_atomic_flags d(b);
        std::swap(mem, d.mem);
    }

    /** bit_set sets an entry as discardable.
     *
     * @param s the index of the entry to bit_set
     * @post immediately subsequent call (assuming proper external memory
     * ordering) to bit_is_set(s) == true.
     */
    inline void bit_set(uint32_t s)
    {
        mem[s >> 3].fetch_or(uint8_t(1 << (s & 7)), std::memory_order_relaxed);
    }

    /** bit_unset marks an entry as something that should not be overwritten.
     *
     * @param s the index of the entry to bit_unset
     * @post immediately subsequent call (assuming proper external memory
     * ordering) to bit_is_set(s) == false.
     */
    inline void bit_unset(uint32_t s)
    {
        mem[s >> 3].fetch_and(uint8_t(~(1 << (s & 7))), std::memory_order_relaxed);
    }

    /** bit_is_set queries the table for discardability at `s`.
     *
     * @param s the index of the entry to read
     * @returns true if the bit at index `s` was set, false otherwise
     * */
    inline bool bit_is_set(uint32_t s) const
    {
        return (1 << (s & 7)) & mem[s >> 3].load(std::memory_order_relaxed);
    }
};

/** @ref cache implements a cache with properties similar to a cuckoo-set.
 *
 *  The cache is able to hold up to `(~(uint32_t)0) - 1` elements.
 *
 *  Read Operations:
 *      - contains() for `erase=false`
 *
 *  Read+Erase Operations:
 *      - contains() for `erase=true`
 *
 *  Erase Operations:
 *      - allow_erase()
 *
 *  Write Operations:
 *      - setup()
 *      - setup_bytes()
 *      - insert()
 *      - please_keep()
 *
 *  Synchronization Free Operations:
 *      - invalid()
 *      - compute_hashes()
 *
 * User Must Guarantee:
 *
 * 1. Write requires synchronized access (e.g. a lock)
 * 2. Read requires no concurrent Write, synchronized with last insert.
 * 3. Erase requires no concurrent Write, synchronized with last insert.
 * 4. An Erase caller must release all memory before allowing a new Writer.
 *
 *
 * Note on function names:
 *   - The name "allow_erase" is used because the real discard happens later.
 *   - The name "please_keep" is used because elements may be erased anyways on insert.
 *
 * @tparam Element should be a movable and copyable type
 * @tparam Hash should be a function/callable which takes a template parameter
 * hash_select and an Element and extracts a hash from it. Should return
 * high-entropy uint32_t hashes for `Hash h; h<0>(e) ... h<7>(e)`.
 */
template <typename Element, typename Hash>
class cache
{
private:
    /** table stores all the elements */
    std::vector<Element> table;

    /** size stores the total available slots in the hash table */
    uint32_t size{0};

    /** The bit_packed_atomic_flags array is marked mutable because we want
     * garbage collection to be allowed to occur from const methods */
    mutable bit_packed_atomic_flags collection_flags;

    /** epoch_flags tracks how recently an element was inserted into
     * the cache. true denotes recent, false denotes not-recent. See insert()
     * method for full semantics.
     */
    mutable std::vector<bool> epoch_flags;

    /** epoch_heuristic_counter is used to determine when an epoch might be aged
     * & an expensive scan should be done. epoch_heuristic_counter is
     * decremented on insert and reset to the new number of inserts which would
     * cause the epoch to reach epoch_size when it reaches zero.
     */
    uint32_t epoch_heuristic_counter{0};

    /** epoch_size is set to be the number of elements supposed to be in a
     * epoch. When the number of non-erased elements in an epoch
     * exceeds epoch_size, a new epoch should be started and all
     * current entries demoted. epoch_size is set to be 45% of size because
     * we want to keep load around 90%, and we support 3 epochs at once --
     * one "dead" which has been erased, one "dying" which has been marked to be
     * erased next, and one "living" which new inserts add to.
     */
    uint32_t epoch_size{0};

    /** depth_limit determines how many elements insert should try to replace.
     * Should be set to log2(n).
     */
    uint8_t depth_limit{0};

    /** hash_function is a const instance of the hash function. It cannot be
     * static or initialized at call time as it may have internal state (such as
     * a nonce).
     */
    const Hash hash_function;

    /** compute_hashes is convenience for not having to write out this
     * expression everywhere we use the hash values of an Element.
     *
     * We need to map the 32-bit input hash onto a hash bucket in a range [0, size) in a
     *  manner which preserves as much of the hash's uniformity as possible. Ideally
     *  this would be done by bitmasking but the size is usually not a power of two.
     *
     * The naive approach would be to use a mod -- which isn't perfectly uniform but so
     *  long as the hash is much larger than size it is not that bad. Unfortunately,
     *  mod/division is fairly slow on ordinary microprocessors (e.g. 90-ish cycles on
     *  haswell, ARM doesn't even have an instruction for it.); when the divisor is a
     *  constant the compiler will do clever tricks to turn it into a multiply+add+shift,
     *  but size is a run-time value so the compiler can't do that here.
     *
     * One option would be to implement the same trick the compiler uses and compute the
     *  constants for exact division based on the size, as described in "{N}-bit Unsigned
     *  Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is
     *  somewhat complicated and the result is still slower than an even simpler option:
     *  see the FastRange32 function in util/fastrange.h.
     *
     * The resulting non-uniformity is also more equally distributed which would be
     *  advantageous for something like linear probing, though it shouldn't matter
     *  one way or the other for a cuckoo table.
     *
     * The primary disadvantage of this approach is increased intermediate precision is
     *  required but for a 32-bit random number we only need the high 32 bits of a
     *  32*32->64 multiply, which means the operation is reasonably fast even on a
     *  typical 32-bit processor.
     *
     * @param e The element whose hashes will be returned
     * @returns Deterministic hashes derived from `e` uniformly mapped onto the range [0, size)
     */
    inline std::array<uint32_t, 8> compute_hashes(const Element& e) const
    {
        return {{FastRange32(hash_function.template operator()<0>(e), size),
                 FastRange32(hash_function.template operator()<1>(e), size),
                 FastRange32(hash_function.template operator()<2>(e), size),
                 FastRange32(hash_function.template operator()<3>(e), size),
                 FastRange32(hash_function.template operator()<4>(e), size),
                 FastRange32(hash_function.template operator()<5>(e), size),
                 FastRange32(hash_function.template operator()<6>(e), size),
                 FastRange32(hash_function.template operator()<7>(e), size)}};
    }

    /** invalid returns a special index that can never be inserted to
     * @returns the special constexpr index that can never be inserted to */
    constexpr uint32_t invalid() const
    {
        return ~(uint32_t)0;
    }

    /** allow_erase marks the element at index `n` as discardable. Threadsafe
     * without any concurrent insert.
     * @param n the index to allow erasure of
     */
    inline void allow_erase(uint32_t n) const
    {
        collection_flags.bit_set(n);
    }

    /** please_keep marks the element at index `n` as an entry that should be kept.
     * Threadsafe without any concurrent insert.
     * @param n the index to prioritize keeping
     */
    inline void please_keep(uint32_t n) const
    {
        collection_flags.bit_unset(n);
    }

    /** epoch_check handles the changing of epochs for elements stored in the
     * cache. epoch_check should be run before every insert.
     *
     * First, epoch_check decrements and checks the cheap heuristic, and then does
     * a more expensive scan if the cheap heuristic runs out. If the expensive
     * scan succeeds, the epochs are aged and old elements are allow_erased. The
     * cheap heuristic is reset to retrigger after the worst case growth of the
     * current epoch's elements would exceed the epoch_size.
     */
    void epoch_check()
    {
        if (epoch_heuristic_counter != 0) {
            --epoch_heuristic_counter;
            return;
        }
        // count the number of elements from the latest epoch which
        // have not been erased.
        uint32_t epoch_unused_count = 0;
        for (uint32_t i = 0; i < size; ++i)
            epoch_unused_count += epoch_flags[i] &&
                                  !collection_flags.bit_is_set(i);
        // If there are more non-deleted entries in the current epoch than the
        // epoch size, then allow_erase on all elements in the old epoch (marked
        // false) and move all elements in the current epoch to the old epoch
        // but do not call allow_erase on their indices.
        if (epoch_unused_count >= epoch_size) {
            for (uint32_t i = 0; i < size; ++i)
                if (epoch_flags[i])
                    epoch_flags[i] = false;
                else
                    allow_erase(i);
            epoch_heuristic_counter = epoch_size;
        } else
            // reset the epoch_heuristic_counter to next do a scan when worst
            // case behavior (no intermittent erases) would exceed epoch size,
            // with a reasonable minimum scan size.
            // Ordinarily, we would have to sanity check std::min(epoch_size,
            // epoch_unused_count), but we already know that `epoch_unused_count
            // < epoch_size` in this branch
            epoch_heuristic_counter = std::max(1u, std::max(epoch_size / 16,
                        epoch_size - epoch_unused_count));
    }

public:
    /** You must always construct a cache with some elements via a subsequent
     * call to setup or setup_bytes, otherwise operations may segfault.
     */
    cache() : table(), collection_flags(0), epoch_flags(), hash_function()
    {
    }

    /** setup initializes the container to store no more than new_size
     * elements and no less than 2 elements.
     *
     * setup should only be called once.
     *
     * @param new_size the desired number of elements to store
     * @returns the maximum number of elements storable
     */
    uint32_t setup(uint32_t new_size)
    {
        // depth_limit must be at least one otherwise errors can occur.
        size = std::max<uint32_t>(2, new_size);
        depth_limit = static_cast<uint8_t>(std::log2(static_cast<float>(size)));
        table.resize(size);
        collection_flags.setup(size);
        epoch_flags.resize(size);
        // Set to 45% as described above
        epoch_size = std::max(uint32_t{1}, (45 * size) / 100);
        // Initially set to wait for a whole epoch
        epoch_heuristic_counter = epoch_size;
        return size;
    }

    /** setup_bytes is a convenience function which accounts for internal memory
     * usage when deciding how many elements to store. It isn't perfect because
     * it doesn't account for any overhead (struct size, MallocUsage, collection
     * and epoch flags). This was done to simplify selecting a power of two
     * size. In the expected use case, an extra two bits per entry should be
     * negligible compared to the size of the elements.
     *
     * @param bytes the approximate number of bytes to use for this data
     * structure
     * @returns A pair of the maximum number of elements storable (see setup()
     * documentation for more detail) and the approximate total size of these
     * elements in bytes.
     */
    std::pair<uint32_t, size_t> setup_bytes(size_t bytes)
    {
        uint32_t requested_num_elems(std::min<size_t>(
            bytes / sizeof(Element),
            std::numeric_limits<uint32_t>::max()));

        auto num_elems = setup(requested_num_elems);

        size_t approx_size_bytes = num_elems * sizeof(Element);
        return std::make_pair(num_elems, approx_size_bytes);
    }

    /** insert loops at most depth_limit times trying to insert a hash
     * at various locations in the table via a variant of the Cuckoo Algorithm
     * with eight hash locations.
     *
     * It drops the last tried element if it runs out of depth before
     * encountering an open slot.
     *
     * Thus:
     *
     * ```
     * insert(x);
     * return contains(x, false);
     * ```
     *
     * is not guaranteed to return true.
     *
     * @param e the element to insert
     * @post one of the following: All previously inserted elements and e are
     * now in the table, one previously inserted element is evicted from the
     * table, the entry attempted to be inserted is evicted.
     */
    inline void insert(Element e)
    {
        epoch_check();
        uint32_t last_loc = invalid();
        bool last_epoch = true;
        std::array<uint32_t, 8> locs = compute_hashes(e);
        // Make sure we have not already inserted this element
        // If we have, make sure that it does not get deleted
        for (const uint32_t loc : locs)
            if (table[loc] == e) {
                please_keep(loc);
                epoch_flags[loc] = last_epoch;
                return;
            }
        for (uint8_t depth = 0; depth < depth_limit; ++depth) {
            // First try to insert to an empty slot, if one exists
            for (const uint32_t loc : locs) {
                if (!collection_flags.bit_is_set(loc))
                    continue;
                table[loc] = std::move(e);
                please_keep(loc);
                epoch_flags[loc] = last_epoch;
                return;
            }
            /** Swap with the element at the location that was
            * not the last one looked at. Example:
            *
            * 1. On first iteration, last_loc == invalid(), find returns last, so
            *    last_loc defaults to locs[0].
            * 2. On further iterations, where last_loc == locs[k], last_loc will
            *    go to locs[k+1 % 8], i.e., next of the 8 indices wrapping around
            *    to 0 if needed.
            *
            * This prevents moving the element we just put in.
            *
            * The swap is not a move -- we must switch onto the evicted element
            * for the next iteration.
            */
            last_loc = locs[(1 + (std::find(locs.begin(), locs.end(), last_loc) - locs.begin())) & 7];
            std::swap(table[last_loc], e);
            // Can't std::swap a std::vector<bool>::reference and a bool&.
            bool epoch = last_epoch;
            last_epoch = epoch_flags[last_loc];
            epoch_flags[last_loc] = epoch;

            // Recompute the locs -- unfortunately happens one too many times!
            locs = compute_hashes(e);
        }
    }

    /** contains iterates through the hash locations for a given element
     * and checks to see if it is present.
     *
     * contains does not check garbage collected state (in other words,
     * garbage is only collected when the space is needed), so:
     *
     * ```
     * insert(x);
     * if (contains(x, true))
     *     return contains(x, false);
     * else
     *     return true;
     * ```
     *
     * executed on a single thread will always return true!
     *
     * This is a great property for re-org performance for example.
     *
     * contains returns a bool set true if the element was found.
     *
     * @param e the element to check
     * @param erase whether to attempt setting the garbage collect flag
     *
     * @post if erase is true and the element is found, then the garbage collect
     * flag is set
     * @returns true if the element is found, false otherwise
     */
    inline bool contains(const Element& e, const bool erase) const
    {
        std::array<uint32_t, 8> locs = compute_hashes(e);
        for (const uint32_t loc : locs)
            if (table[loc] == e) {
                if (erase)
                    allow_erase(loc);
                return true;
            }
        return false;
    }
};
} // namespace CuckooCache

#endif // BITCOIN_CUCKOOCACHE_H

Bitcoin Core Fuzz Coverage Report

Coverage Report

Created: 2026-03-24 13:57

Line	Count	Source
1		// Copyright (c) 2016 Jeremy Rubin
2		// Distributed under the MIT software license, see the accompanying
3		// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5		#ifndef BITCOIN_CUCKOOCACHE_H
6		#define BITCOIN_CUCKOOCACHE_H
7
8		#include <util/fastrange.h>
9		#include <util/overflow.h>
10
11		#include <algorithm>
12		#include <array>
13		#include <atomic>
14		#include <cmath>
15		#include <cstring>
16		#include <limits>
17		#include <memory>
18		#include <utility>
19		#include <vector>
20
21
22		/** High-performance cache primitives.
23		*
24		* Summary:
25		*
26		* 1. @ref bit_packed_atomic_flags is bit-packed atomic flags for garbage collection
27		*
28		* 2. @ref cache is a cache which is performant in memory usage and lookup speed. It
29		* is lockfree for erase operations. Elements are lazily erased on the next insert.
30		*/
31		namespace CuckooCache
32		{
33		/** @ref bit_packed_atomic_flags implements a container for garbage collection flags
34		* that is only thread unsafe on calls to setup. This class bit-packs collection
35		* flags for memory efficiency.
36		*
37		* All operations are `std::memory_order_relaxed` so external mechanisms must
38		* ensure that writes and reads are properly synchronized.
39		*
40		* On setup(n), all bits up to `n` are marked as collected.
41		*
42		* Under the hood, because it is an 8-bit type, it makes sense to use a multiple
43		* of 8 for setup, but it will be safe if that is not the case as well.
44		*/
45		class bit_packed_atomic_flags
46		{
47		std::unique_ptr<std::atomic<uint8_t>[]> mem;
48
49		public:
50		/** No default constructor, as there must be some size. */
51		bit_packed_atomic_flags() = delete;
52
53		/**
54		* bit_packed_atomic_flags constructor creates memory to sufficiently
55		* keep track of garbage collection information for `size` entries.
56		*
57		* @param size the number of elements to allocate space for
58		*
59		* @post bit_set, bit_unset, and bit_is_set function properly forall x. x <
60		* size
61		* @post All calls to bit_is_set (without subsequent bit_unset) will return
62		* true.
63		*/
64		explicit bit_packed_atomic_flags(uint32_t size)
65	0	{
66		// pad out the size if needed
67	0	size = CeilDiv(size, 8u);
68	0	mem.reset(new std::atomic<uint8_t>[size]);
69	0	for (uint32_t i = 0; i < size; ++i)
70	0	mem[i].store(0xFF);
71	0	};
72
73		/** setup marks all entries and ensures that bit_packed_atomic_flags can store
74		* at least `b` entries.
75		*
76		* @param b the number of elements to allocate space for
77		* @post bit_set, bit_unset, and bit_is_set function properly forall x. x <
78		* b
79		* @post All calls to bit_is_set (without subsequent bit_unset) will return
80		* true.
81		*/
82		inline void setup(uint32_t b)
83	0	{
84	0	bit_packed_atomic_flags d(b);
85	0	std::swap(mem, d.mem);
86	0	}
87
88		/** bit_set sets an entry as discardable.
89		*
90		* @param s the index of the entry to bit_set
91		* @post immediately subsequent call (assuming proper external memory
92		* ordering) to bit_is_set(s) == true.
93		*/
94		inline void bit_set(uint32_t s)
95	0	{
96	0	mem[s >> 3].fetch_or(uint8_t(1 << (s & 7)), std::memory_order_relaxed);
97	0	}
98
99		/** bit_unset marks an entry as something that should not be overwritten.
100		*
101		* @param s the index of the entry to bit_unset
102		* @post immediately subsequent call (assuming proper external memory
103		* ordering) to bit_is_set(s) == false.
104		*/
105		inline void bit_unset(uint32_t s)
106	0	{
107	0	mem[s >> 3].fetch_and(uint8_t(~(1 << (s & 7))), std::memory_order_relaxed);
108	0	}
109
110		/** bit_is_set queries the table for discardability at `s`.
111		*
112		* @param s the index of the entry to read
113		* @returns true if the bit at index `s` was set, false otherwise
114		* */
115		inline bool bit_is_set(uint32_t s) const
116	0	{
117	0	return (1 << (s & 7)) & mem[s >> 3].load(std::memory_order_relaxed);
118	0	}
119		};
120
121		/** @ref cache implements a cache with properties similar to a cuckoo-set.
122		*
123		* The cache is able to hold up to `(~(uint32_t)0) - 1` elements.
124		*
125		* Read Operations:
126		* - contains() for `erase=false`
127		*
128		* Read+Erase Operations:
129		* - contains() for `erase=true`
130		*
131		* Erase Operations:
132		* - allow_erase()
133		*
134		* Write Operations:
135		* - setup()
136		* - setup_bytes()
137		* - insert()
138		* - please_keep()
139		*
140		* Synchronization Free Operations:
141		* - invalid()
142		* - compute_hashes()
143		*
144		* User Must Guarantee:
145		*
146		* 1. Write requires synchronized access (e.g. a lock)
147		* 2. Read requires no concurrent Write, synchronized with last insert.
148		* 3. Erase requires no concurrent Write, synchronized with last insert.
149		* 4. An Erase caller must release all memory before allowing a new Writer.
150		*
151		*
152		* Note on function names:
153		* - The name "allow_erase" is used because the real discard happens later.
154		* - The name "please_keep" is used because elements may be erased anyways on insert.
155		*
156		* @tparam Element should be a movable and copyable type
157		* @tparam Hash should be a function/callable which takes a template parameter
158		* hash_select and an Element and extracts a hash from it. Should return
159		* high-entropy uint32_t hashes for `Hash h; h<0>(e) ... h<7>(e)`.
160		*/
161		template <typename Element, typename Hash>
162		class cache
163		{
164		private:
165		/** table stores all the elements */
166		std::vector<Element> table;
167
168		/** size stores the total available slots in the hash table */
169		uint32_t size{0};
170
171		/** The bit_packed_atomic_flags array is marked mutable because we want
172		* garbage collection to be allowed to occur from const methods */
173		mutable bit_packed_atomic_flags collection_flags;
174
175		/** epoch_flags tracks how recently an element was inserted into
176		* the cache. true denotes recent, false denotes not-recent. See insert()
177		* method for full semantics.
178		*/
179		mutable std::vector<bool> epoch_flags;
180
181		/** epoch_heuristic_counter is used to determine when an epoch might be aged
182		* & an expensive scan should be done. epoch_heuristic_counter is
183		* decremented on insert and reset to the new number of inserts which would
184		* cause the epoch to reach epoch_size when it reaches zero.
185		*/
186		uint32_t epoch_heuristic_counter{0};
187
188		/** epoch_size is set to be the number of elements supposed to be in a
189		* epoch. When the number of non-erased elements in an epoch
190		* exceeds epoch_size, a new epoch should be started and all
191		* current entries demoted. epoch_size is set to be 45% of size because
192		* we want to keep load around 90%, and we support 3 epochs at once --
193		* one "dead" which has been erased, one "dying" which has been marked to be
194		* erased next, and one "living" which new inserts add to.
195		*/
196		uint32_t epoch_size{0};
197
198		/** depth_limit determines how many elements insert should try to replace.
199		* Should be set to log2(n).
200		*/
201		uint8_t depth_limit{0};
202
203		/** hash_function is a const instance of the hash function. It cannot be
204		* static or initialized at call time as it may have internal state (such as
205		* a nonce).
206		*/
207		const Hash hash_function;
208
209		/** compute_hashes is convenience for not having to write out this
210		* expression everywhere we use the hash values of an Element.
211		*
212		* We need to map the 32-bit input hash onto a hash bucket in a range [0, size) in a
213		* manner which preserves as much of the hash's uniformity as possible. Ideally
214		* this would be done by bitmasking but the size is usually not a power of two.
215		*
216		* The naive approach would be to use a mod -- which isn't perfectly uniform but so
217		* long as the hash is much larger than size it is not that bad. Unfortunately,
218		* mod/division is fairly slow on ordinary microprocessors (e.g. 90-ish cycles on
219		* haswell, ARM doesn't even have an instruction for it.); when the divisor is a
220		* constant the compiler will do clever tricks to turn it into a multiply+add+shift,
221		* but size is a run-time value so the compiler can't do that here.
222		*
223		* One option would be to implement the same trick the compiler uses and compute the
224		* constants for exact division based on the size, as described in "{N}-bit Unsigned
225		* Division via {N}-bit Multiply-Add" by Arch D. Robison in 2005. But that code is
226		* somewhat complicated and the result is still slower than an even simpler option:
227		* see the FastRange32 function in util/fastrange.h.
228		*
229		* The resulting non-uniformity is also more equally distributed which would be
230		* advantageous for something like linear probing, though it shouldn't matter
231		* one way or the other for a cuckoo table.
232		*
233		* The primary disadvantage of this approach is increased intermediate precision is
234		* required but for a 32-bit random number we only need the high 32 bits of a
235		* 32*32->64 multiply, which means the operation is reasonably fast even on a
236		* typical 32-bit processor.
237		*
238		* @param e The element whose hashes will be returned
239		* @returns Deterministic hashes derived from `e` uniformly mapped onto the range [0, size)
240		*/
241		inline std::array<uint32_t, 8> compute_hashes(const Element& e) const
242	0	{
243	0	return {{FastRange32(hash_function.template operator()<0>(e), size),
244	0	FastRange32(hash_function.template operator()<1>(e), size),
245	0	FastRange32(hash_function.template operator()<2>(e), size),
246	0	FastRange32(hash_function.template operator()<3>(e), size),
247	0	FastRange32(hash_function.template operator()<4>(e), size),
248	0	FastRange32(hash_function.template operator()<5>(e), size),
249	0	FastRange32(hash_function.template operator()<6>(e), size),
250	0	FastRange32(hash_function.template operator()<7>(e), size)}};
251	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::compute_hashes(int const&) const Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::compute_hashes(uint256 const&) const
252
253		/** invalid returns a special index that can never be inserted to
254		* @returns the special constexpr index that can never be inserted to */
255		constexpr uint32_t invalid() const
256	0	{
257	0	return ~(uint32_t)0;
258	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::invalid() const Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::invalid() const
259
260		/** allow_erase marks the element at index `n` as discardable. Threadsafe
261		* without any concurrent insert.
262		* @param n the index to allow erasure of
263		*/
264		inline void allow_erase(uint32_t n) const
265	0	{
266	0	collection_flags.bit_set(n);
267	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::allow_erase(unsigned int) const Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::allow_erase(unsigned int) const
268
269		/** please_keep marks the element at index `n` as an entry that should be kept.
270		* Threadsafe without any concurrent insert.
271		* @param n the index to prioritize keeping
272		*/
273		inline void please_keep(uint32_t n) const
274	0	{
275	0	collection_flags.bit_unset(n);
276	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::please_keep(unsigned int) const Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::please_keep(unsigned int) const
277
278		/** epoch_check handles the changing of epochs for elements stored in the
279		* cache. epoch_check should be run before every insert.
280		*
281		* First, epoch_check decrements and checks the cheap heuristic, and then does
282		* a more expensive scan if the cheap heuristic runs out. If the expensive
283		* scan succeeds, the epochs are aged and old elements are allow_erased. The
284		* cheap heuristic is reset to retrigger after the worst case growth of the
285		* current epoch's elements would exceed the epoch_size.
286		*/
287		void epoch_check()
288	0	{
289	0	if (epoch_heuristic_counter != 0) {
290	0	--epoch_heuristic_counter;
291	0	return;
292	0	}
293		// count the number of elements from the latest epoch which
294		// have not been erased.
295	0	uint32_t epoch_unused_count = 0;
296	0	for (uint32_t i = 0; i < size; ++i)
297	0	epoch_unused_count += epoch_flags[i] &&
298	0	!collection_flags.bit_is_set(i);
299		// If there are more non-deleted entries in the current epoch than the
300		// epoch size, then allow_erase on all elements in the old epoch (marked
301		// false) and move all elements in the current epoch to the old epoch
302		// but do not call allow_erase on their indices.
303	0	if (epoch_unused_count >= epoch_size) {
304	0	for (uint32_t i = 0; i < size; ++i)
305	0	if (epoch_flags[i])
306	0	epoch_flags[i] = false;
307	0	else
308	0	allow_erase(i);
309	0	epoch_heuristic_counter = epoch_size;
310	0	} else
311		// reset the epoch_heuristic_counter to next do a scan when worst
312		// case behavior (no intermittent erases) would exceed epoch size,
313		// with a reasonable minimum scan size.
314		// Ordinarily, we would have to sanity check std::min(epoch_size,
315		// epoch_unused_count), but we already know that `epoch_unused_count
316		// < epoch_size` in this branch
317	0	epoch_heuristic_counter = std::max(1u, std::max(epoch_size / 16,
318	0	epoch_size - epoch_unused_count));
319	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::epoch_check() Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::epoch_check()
320
321		public:
322		/** You must always construct a cache with some elements via a subsequent
323		* call to setup or setup_bytes, otherwise operations may segfault.
324		*/
325	0	cache() : table(), collection_flags(0), epoch_flags(), hash_function()
326	0	{
327	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::cache() Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::cache()
328
329		/** setup initializes the container to store no more than new_size
330		* elements and no less than 2 elements.
331		*
332		* setup should only be called once.
333		*
334		* @param new_size the desired number of elements to store
335		* @returns the maximum number of elements storable
336		*/
337		uint32_t setup(uint32_t new_size)
338	0	{
339		// depth_limit must be at least one otherwise errors can occur.
340	0	size = std::max<uint32_t>(2, new_size);
341	0	depth_limit = static_cast<uint8_t>(std::log2(static_cast<float>(size)));
342	0	table.resize(size);
343	0	collection_flags.setup(size);
344	0	epoch_flags.resize(size);
345		// Set to 45% as described above
346	0	epoch_size = std::max(uint32_t{1}, (45 * size) / 100);
347		// Initially set to wait for a whole epoch
348	0	epoch_heuristic_counter = epoch_size;
349	0	return size;
350	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::setup(unsigned int) Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::setup(unsigned int)
351
352		/** setup_bytes is a convenience function which accounts for internal memory
353		* usage when deciding how many elements to store. It isn't perfect because
354		* it doesn't account for any overhead (struct size, MallocUsage, collection
355		* and epoch flags). This was done to simplify selecting a power of two
356		* size. In the expected use case, an extra two bits per entry should be
357		* negligible compared to the size of the elements.
358		*
359		* @param bytes the approximate number of bytes to use for this data
360		* structure
361		* @returns A pair of the maximum number of elements storable (see setup()
362		* documentation for more detail) and the approximate total size of these
363		* elements in bytes.
364		*/
365		std::pair<uint32_t, size_t> setup_bytes(size_t bytes)
366	0	{
367	0	uint32_t requested_num_elems(std::min<size_t>(
368	0	bytes / sizeof(Element),
369	0	std::numeric_limits<uint32_t>::max()));
370
371	0	auto num_elems = setup(requested_num_elems);
372
373	0	size_t approx_size_bytes = num_elems * sizeof(Element);
374	0	return std::make_pair(num_elems, approx_size_bytes);
375	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::setup_bytes(unsigned long) Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::setup_bytes(unsigned long)
376
377		/** insert loops at most depth_limit times trying to insert a hash
378		* at various locations in the table via a variant of the Cuckoo Algorithm
379		* with eight hash locations.
380		*
381		* It drops the last tried element if it runs out of depth before
382		* encountering an open slot.
383		*
384		* Thus:
385		*
386		* ```
387		* insert(x);
388		* return contains(x, false);
389		* ```
390		*
391		* is not guaranteed to return true.
392		*
393		* @param e the element to insert
394		* @post one of the following: All previously inserted elements and e are
395		* now in the table, one previously inserted element is evicted from the
396		* table, the entry attempted to be inserted is evicted.
397		*/
398		inline void insert(Element e)
399	0	{
400	0	epoch_check();
401	0	uint32_t last_loc = invalid();
402	0	bool last_epoch = true;
403	0	std::array<uint32_t, 8> locs = compute_hashes(e);
404		// Make sure we have not already inserted this element
405		// If we have, make sure that it does not get deleted
406	0	for (const uint32_t loc : locs)
407	0	if (table[loc] == e) {
408	0	please_keep(loc);
409	0	epoch_flags[loc] = last_epoch;
410	0	return;
411	0	}
412	0	for (uint8_t depth = 0; depth < depth_limit; ++depth) {
413		// First try to insert to an empty slot, if one exists
414	0	for (const uint32_t loc : locs) {
415	0	if (!collection_flags.bit_is_set(loc))
416	0	continue;
417	0	table[loc] = std::move(e);
418	0	please_keep(loc);
419	0	epoch_flags[loc] = last_epoch;
420	0	return;
421	0	}
422		/** Swap with the element at the location that was
423		* not the last one looked at. Example:
424		*
425		* 1. On first iteration, last_loc == invalid(), find returns last, so
426		* last_loc defaults to locs[0].
427		* 2. On further iterations, where last_loc == locs[k], last_loc will
428		* go to locs[k+1 % 8], i.e., next of the 8 indices wrapping around
429		* to 0 if needed.
430		*
431		* This prevents moving the element we just put in.
432		*
433		* The swap is not a move -- we must switch onto the evicted element
434		* for the next iteration.
435		*/
436	0	last_loc = locs[(1 + (std::find(locs.begin(), locs.end(), last_loc) - locs.begin())) & 7];
437	0	std::swap(table[last_loc], e);
438		// Can't std::swap a std::vector<bool>::reference and a bool&.
439	0	bool epoch = last_epoch;
440	0	last_epoch = epoch_flags[last_loc];
441	0	epoch_flags[last_loc] = epoch;
442
443		// Recompute the locs -- unfortunately happens one too many times!
444	0	locs = compute_hashes(e);
445	0	}
446	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::insert(int) Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::insert(uint256)
447
448		/** contains iterates through the hash locations for a given element
449		* and checks to see if it is present.
450		*
451		* contains does not check garbage collected state (in other words,
452		* garbage is only collected when the space is needed), so:
453		*
454		* ```
455		* insert(x);
456		* if (contains(x, true))
457		* return contains(x, false);
458		* else
459		* return true;
460		* ```
461		*
462		* executed on a single thread will always return true!
463		*
464		* This is a great property for re-org performance for example.
465		*
466		* contains returns a bool set true if the element was found.
467		*
468		* @param e the element to check
469		* @param erase whether to attempt setting the garbage collect flag
470		*
471		* @post if erase is true and the element is found, then the garbage collect
472		* flag is set
473		* @returns true if the element is found, false otherwise
474		*/
475		inline bool contains(const Element& e, const bool erase) const
476	0	{
477	0	std::array<uint32_t, 8> locs = compute_hashes(e);
478	0	for (const uint32_t loc : locs)
479	0	if (table[loc] == e) {
480	0	if (erase)
481	0	allow_erase(loc);
482	0	return true;
483	0	}
484	0	return false;
485	0	} Unexecuted instantiation: cuckoocache.cpp:CuckooCache::cache<int, (anonymous namespace)::RandomHasher>::contains(int const&, bool) const Unexecuted instantiation: CuckooCache::cache<uint256, SignatureCacheHasher>::contains(uint256 const&, bool) const
486		};
487		} // namespace CuckooCache
488
489		#endif // BITCOIN_CUCKOOCACHE_H