Bitcoin Core Fuzz Coverage Report

Coverage Report

Created: 2026-03-24 13:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/bitcoin/src/util/asmap.cpp
Line
Count
Source
1
// Copyright (c) 2019-present The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
#include <util/asmap.h>
6
7
#include <clientversion.h>
8
#include <hash.h>
9
#include <serialize.h>
10
#include <streams.h>
11
#include <uint256.h>
12
#include <util/fs.h>
13
#include <util/log.h>
14
15
#include <algorithm>
16
#include <bit>
17
#include <cassert>
18
#include <cstddef>
19
#include <cstdio>
20
#include <span>
21
#include <utility>
22
#include <vector>
23
24
/*
25
 * ASMap (Autonomous System Map) Implementation
26
 *
27
 * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
28
 * Uses a binary trie structure encoded as bytecode instructions that are interpreted
29
 * at runtime to find the ASN for a given IP address.
30
 *
31
 * The format of the asmap data is a bit-packed binary format where the entire mapping
32
 * is treated as a continuous sequence of bits. Instructions and their arguments are
33
 * encoded using variable numbers of bits and concatenated together without regard for
34
 * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
35
 *
36
 * The data structure internally represents the mapping as a binary trie where:
37
 * - Unassigned subnets (no ASN mapping present) map to 0
38
 * - Subnets mapped entirely to one ASN become leaf nodes
39
 * - Subnets whose lower and upper halves have different mappings branch into subtrees
40
 *
41
 * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
42
 * MATCH, DEFAULT) to efficiently represent the trie.
43
 */
44
45
namespace {
46
47
// Indicates decoding errors or invalid data
48
constexpr uint32_t INVALID = 0xFFFFFFFF;
49
50
/**
51
 * Extract a single bit from byte array using little-endian bit ordering (LSB first).
52
 * Used for ASMap data.
53
 */
54
inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
55
0
{
56
0
    const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
57
0
    ++bitpos;
58
0
    return bit;
59
0
}
60
61
/**
62
 * Extract a single bit from byte array using big-endian bit ordering (MSB first).
63
 * Used for IP addresses to match network byte order conventions.
64
 */
65
inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
66
0
{
67
0
    const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
68
0
    ++bitpos;
69
0
    return bit;
70
0
}
71
72
/**
73
 * Variable-length integer decoder using a custom encoding scheme.
74
 *
75
 * The encoding is easiest to describe using an example. Let's say minval=100 and
76
 * bit_sizes=[4,2,2,3]. In that case:
77
 * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
78
 * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
79
 * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
80
 * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
81
 *
82
 * In general, every number is encoded as:
83
 * - First, k "1"-bits, where k is the class the number falls in
84
 * - Then, a "0"-bit, unless k is the highest class
85
 * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
86
 */
87
uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
88
0
{
89
0
    uint32_t val = minval;  // Start with minimum encodable value
90
0
    bool bit;
91
0
    for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
92
        // Read continuation bit to determine if we're in this class
93
0
        if (bit_sizes_it + 1 != bit_sizes.end()) {  // Unless we're in the last class
94
0
            if (bitpos >= data.size() * 8) break;
95
0
            bit = ConsumeBitLE(bitpos, data);
96
0
        } else {
97
0
            bit = 0;  // Last class has no continuation bit
98
0
        }
99
0
        if (bit) {
100
            // If the value will not fit in this class, subtract its range from val,
101
            // emit a "1" bit and continue with the next class
102
0
            val += (1 << *bit_sizes_it);  // Add size of this class
103
0
        } else {
104
            // Decode the position within this class in big endian
105
0
            for (int b = 0; b < *bit_sizes_it; b++) {
106
0
                if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
107
0
                bit = ConsumeBitLE(bitpos, data);
108
0
                val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
109
0
            }
110
0
            return val;
111
0
        }
112
0
    }
113
0
    return INVALID; // Reached EOF in exponent
114
0
}
115
116
/**
117
 * Instruction Set
118
 *
119
 * The instruction set is designed to efficiently encode a binary trie
120
 * that maps IP prefixes to ASNs. Each instruction type serves a specific
121
 * role in trie traversal and evaluation.
122
 */
123
enum class Instruction : uint32_t
124
{
125
    // A return instruction, encoded as [0], returns a constant ASN.
126
    // It is followed by an integer using the ASN encoding.
127
    RETURN = 0,
128
    // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
129
    // and either continues execution (if 0), or skips a specified number of bits (if 1).
130
    // It is followed by an integer using jump encoding.
131
    JUMP = 1,
132
    // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
133
    // in the input. If they all match, execution continues. If not, the default ASN is returned
134
    // (or 0 if unset). The match value encodes both the pattern and its length.
135
    MATCH = 2,
136
    // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
137
    // and continues execution. It is followed by an integer in ASN encoding.
138
    DEFAULT = 3,
139
};
140
141
// Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
142
constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
143
Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
144
0
{
145
0
    return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
146
0
}
147
148
// ASN encoding: Can encode ASNs from 1 to ~16.7 million.
149
// Uses variable-length encoding optimized for real-world ASN distribution.
150
// ASN 0 is reserved and used if there isn't a match.
151
constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
152
uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
153
0
{
154
0
    return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
155
0
}
156
157
// MATCH argument: Values in [2, 511]. The highest set bit determines the match length
158
// n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
159
constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
160
uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
161
0
{
162
0
    return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
163
0
}
164
165
// JUMP offset: Minimum value 17. Variable-length coded and may be large
166
// for skipping big subtrees.
167
constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
168
uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
169
0
{
170
0
    return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
171
0
}
172
173
} // anonymous namespace
174
175
/**
176
 * Execute the ASMap bytecode to find the ASN for an IP
177
 *
178
 * This function interprets the asmap bytecode and uses bits from the IP
179
 * address to navigate through the encoded trie structure, ultimately
180
 * returning an ASN value.
181
 */
182
uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
183
0
{
184
0
    size_t pos{0};
185
0
    const size_t endpos{asmap.size() * 8};
186
0
    uint8_t ip_bit{0};
187
0
    const uint8_t ip_bits_end = ip.size() * 8;
188
0
    uint32_t default_asn = 0;
189
0
    while (pos < endpos) {
190
0
        Instruction opcode = DecodeType(pos, asmap);
191
0
        if (opcode == Instruction::RETURN) {
192
            // Found leaf node - return the ASN
193
0
            uint32_t asn = DecodeASN(pos, asmap);
194
0
            if (asn == INVALID) break; // ASN straddles EOF
195
0
            return asn;
196
0
        } else if (opcode == Instruction::JUMP) {
197
            // Binary branch: if IP bit is 1, jump forward; else continue
198
0
            uint32_t jump = DecodeJump(pos, asmap);
199
0
            if (jump == INVALID) break; // Jump offset straddles EOF
200
0
            if (ip_bit == ip_bits_end) break; // No input bits left
201
0
            if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
202
0
            if (ConsumeBitBE(ip_bit, ip)) {  // Check next IP bit (big-endian)
203
0
                pos += jump;  // Bit = 1: skip to right subtree
204
0
            }
205
            // Bit = 0: fall through to left subtree
206
0
        } else if (opcode == Instruction::MATCH) {
207
            // Compare multiple IP bits against a pattern
208
            // The match value encodes both length and pattern:
209
            // - highest set bit position determines length (bit_width - 1)
210
            // - lower bits contain the pattern to compare
211
0
            uint32_t match = DecodeMatch(pos, asmap);
212
0
            if (match == INVALID) break; // Match bits straddle EOF
213
0
            int matchlen = std::bit_width(match) - 1;  // An n-bit value matches n-1 input bits
214
0
            if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
215
0
            for (int bit = 0; bit < matchlen; bit++) {
216
0
                if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
217
0
                    return default_asn;  // Pattern mismatch - use default
218
0
                }
219
0
            }
220
            // Pattern matched - continue execution
221
0
        } else if (opcode == Instruction::DEFAULT) {
222
            // Update the default ASN for subsequent MATCH failures
223
0
            default_asn = DecodeASN(pos, asmap);
224
0
            if (default_asn == INVALID) break; // ASN straddles EOF
225
0
        } else {
226
0
            break; // Instruction straddles EOF
227
0
        }
228
0
    }
229
    // Reached EOF without RETURN, or aborted (see any of the breaks above)
230
    // - should have been caught by SanityCheckAsmap below
231
0
    assert(false);
232
0
    return 0; // 0 is not a valid ASN
233
0
}
234
235
/**
236
 * Validates ASMap structure by simulating all possible execution paths.
237
 * Ensures well-formed bytecode, valid jumps, and proper termination.
238
 */
239
bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
240
0
{
241
0
    size_t pos{0};
242
0
    const size_t endpos{asmap.size() * 8};
243
0
    std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
244
0
    jumps.reserve(bits);
245
0
    Instruction prevopcode = Instruction::JUMP;
246
0
    bool had_incomplete_match = false;  // Track <8 bit matches for efficiency check
247
248
0
    while (pos != endpos) {
249
        // There was a jump into the middle of the previous instruction
250
0
        if (!jumps.empty() && pos >= jumps.back().first) return false;
251
252
0
        Instruction opcode = DecodeType(pos, asmap);
253
0
        if (opcode == Instruction::RETURN) {
254
            // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
255
0
            if (prevopcode == Instruction::DEFAULT) return false;
256
0
            uint32_t asn = DecodeASN(pos, asmap);
257
0
            if (asn == INVALID) return false; // ASN straddles EOF
258
0
            if (jumps.empty()) {
259
                // Nothing to execute anymore
260
0
                if (endpos - pos > 7) return false; // Excessive padding
261
0
                while (pos != endpos) {
262
0
                    if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
263
0
                }
264
0
                return true; // Sanely reached EOF
265
0
            } else {
266
                // Continue by pretending we jumped to the next instruction
267
0
                if (pos != jumps.back().first) return false; // Unreachable code
268
0
                bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
269
0
                jumps.pop_back();
270
0
                prevopcode = Instruction::JUMP;
271
0
            }
272
0
        } else if (opcode == Instruction::JUMP) {
273
0
            uint32_t jump = DecodeJump(pos, asmap);
274
0
            if (jump == INVALID) return false; // Jump offset straddles EOF
275
0
            if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
276
0
            if (bits == 0) return false; // Consuming bits past the end of the input
277
0
            --bits;
278
0
            uint32_t jump_offset = pos + jump;
279
0
            if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
280
0
            jumps.emplace_back(jump_offset, bits);  // Queue jump target for validation
281
0
            prevopcode = Instruction::JUMP;
282
0
        } else if (opcode == Instruction::MATCH) {
283
0
            uint32_t match = DecodeMatch(pos, asmap);
284
0
            if (match == INVALID) return false; // Match bits straddle EOF
285
0
            int matchlen = std::bit_width(match) - 1;
286
0
            if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
287
            // Within a sequence of matches only at most one should be incomplete
288
0
            if (matchlen < 8 && had_incomplete_match) return false;
289
0
            had_incomplete_match = (matchlen < 8);
290
0
            if (bits < matchlen) return false; // Consuming bits past the end of the input
291
0
            bits -= matchlen;
292
0
            prevopcode = Instruction::MATCH;
293
0
        } else if (opcode == Instruction::DEFAULT) {
294
            // There should not be two successive DEFAULTs (they could be combined into one)
295
0
            if (prevopcode == Instruction::DEFAULT) return false;
296
0
            uint32_t asn = DecodeASN(pos, asmap);
297
0
            if (asn == INVALID) return false; // ASN straddles EOF
298
0
            prevopcode = Instruction::DEFAULT;
299
0
        } else {
300
0
            return false; // Instruction straddles EOF
301
0
        }
302
0
    }
303
0
    return false; // Reached EOF without RETURN instruction
304
0
}
305
306
/**
307
 * Provides a safe interface for validating ASMap data before use.
308
 * Returns true if the data is valid for 128 bits long inputs.
309
 */
310
bool CheckStandardAsmap(const std::span<const std::byte> data)
311
0
{
312
0
    if (!SanityCheckAsmap(data, 128)) {
313
0
        LogWarning("Sanity check of asmap data failed\n");
Line
Count
Source
96
0
#define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__)
Line
Count
Source
89
0
#define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__)
314
0
        return false;
315
0
    }
316
0
    return true;
317
0
}
318
319
/**
320
 * Loads an ASMap file from disk and validates it.
321
 */
322
std::vector<std::byte> DecodeAsmap(fs::path path)
323
0
{
324
0
    FILE *filestr = fsbridge::fopen(path, "rb");
325
0
    AutoFile file{filestr};
326
0
    if (file.IsNull()) {
327
0
        LogWarning("Failed to open asmap file from disk");
Line
Count
Source
96
0
#define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__)
Line
Count
Source
89
0
#define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__)
328
0
        return {};
329
0
    }
330
0
    int64_t length{file.size()};
331
0
    LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
Line
Count
Source
95
0
#define LogInfo(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Info, /*should_ratelimit=*/true, __VA_ARGS__)
Line
Count
Source
89
0
#define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__)
332
333
    // Read entire file into memory
334
0
    std::vector<std::byte> buffer(length);
335
0
    file.read(buffer);
336
337
0
    if (!CheckStandardAsmap(buffer)) {
338
0
        LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
Line
Count
Source
96
0
#define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__)
Line
Count
Source
89
0
#define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__)
339
0
        return {};
340
0
    }
341
342
0
    return buffer;
343
0
}
344
345
/**
346
 * Computes SHA256 hash of ASMap data for versioning and consistency checks.
347
 */
348
uint256 AsmapVersion(const std::span<const std::byte> data)
349
0
{
350
0
    if (data.empty()) return {};
351
352
0
    HashWriter asmap_hasher;
353
0
    asmap_hasher << data;
354
0
    return asmap_hasher.GetHash();
355
0
}