/root/bitcoin/src/util/asmap.cpp
Line | Count | Source |
1 | | // Copyright (c) 2019-present The Bitcoin Core developers |
2 | | // Distributed under the MIT software license, see the accompanying |
3 | | // file COPYING or http://www.opensource.org/licenses/mit-license.php. |
4 | | |
5 | | #include <util/asmap.h> |
6 | | |
7 | | #include <clientversion.h> |
8 | | #include <hash.h> |
9 | | #include <serialize.h> |
10 | | #include <streams.h> |
11 | | #include <uint256.h> |
12 | | #include <util/fs.h> |
13 | | #include <util/log.h> |
14 | | |
15 | | #include <algorithm> |
16 | | #include <bit> |
17 | | #include <cassert> |
18 | | #include <cstddef> |
19 | | #include <cstdio> |
20 | | #include <span> |
21 | | #include <utility> |
22 | | #include <vector> |
23 | | |
24 | | /* |
25 | | * ASMap (Autonomous System Map) Implementation |
26 | | * |
27 | | * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs). |
28 | | * Uses a binary trie structure encoded as bytecode instructions that are interpreted |
29 | | * at runtime to find the ASN for a given IP address. |
30 | | * |
31 | | * The format of the asmap data is a bit-packed binary format where the entire mapping |
32 | | * is treated as a continuous sequence of bits. Instructions and their arguments are |
33 | | * encoded using variable numbers of bits and concatenated together without regard for |
34 | | * byte boundaries. The bits are stored in bytes using little-endian bit ordering. |
35 | | * |
36 | | * The data structure internally represents the mapping as a binary trie where: |
37 | | * - Unassigned subnets (no ASN mapping present) map to 0 |
38 | | * - Subnets mapped entirely to one ASN become leaf nodes |
39 | | * - Subnets whose lower and upper halves have different mappings branch into subtrees |
40 | | * |
41 | | * The encoding uses variable-length integers and four instruction types (RETURN, JUMP, |
42 | | * MATCH, DEFAULT) to efficiently represent the trie. |
43 | | */ |
44 | | |
45 | | namespace { |
46 | | |
47 | | // Indicates decoding errors or invalid data |
48 | | constexpr uint32_t INVALID = 0xFFFFFFFF; |
49 | | |
50 | | /** |
51 | | * Extract a single bit from byte array using little-endian bit ordering (LSB first). |
52 | | * Used for ASMap data. |
53 | | */ |
54 | | inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept |
55 | 0 | { |
56 | 0 | const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1; |
57 | 0 | ++bitpos; |
58 | 0 | return bit; |
59 | 0 | } |
60 | | |
61 | | /** |
62 | | * Extract a single bit from byte array using big-endian bit ordering (MSB first). |
63 | | * Used for IP addresses to match network byte order conventions. |
64 | | */ |
65 | | inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept |
66 | 0 | { |
67 | 0 | const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1; |
68 | 0 | ++bitpos; |
69 | 0 | return bit; |
70 | 0 | } |
71 | | |
72 | | /** |
73 | | * Variable-length integer decoder using a custom encoding scheme. |
74 | | * |
75 | | * The encoding is easiest to describe using an example. Let's say minval=100 and |
76 | | * bit_sizes=[4,2,2,3]. In that case: |
77 | | * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)] |
78 | | * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)] |
79 | | * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)] |
80 | | * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)] |
81 | | * |
82 | | * In general, every number is encoded as: |
83 | | * - First, k "1"-bits, where k is the class the number falls in |
84 | | * - Then, a "0"-bit, unless k is the highest class |
85 | | * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class |
86 | | */ |
87 | | uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes) |
88 | 0 | { |
89 | 0 | uint32_t val = minval; // Start with minimum encodable value |
90 | 0 | bool bit; |
91 | 0 | for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) { |
92 | | // Read continuation bit to determine if we're in this class |
93 | 0 | if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class |
94 | 0 | if (bitpos >= data.size() * 8) break; |
95 | 0 | bit = ConsumeBitLE(bitpos, data); |
96 | 0 | } else { |
97 | 0 | bit = 0; // Last class has no continuation bit |
98 | 0 | } |
99 | 0 | if (bit) { |
100 | | // If the value will not fit in this class, subtract its range from val, |
101 | | // emit a "1" bit and continue with the next class |
102 | 0 | val += (1 << *bit_sizes_it); // Add size of this class |
103 | 0 | } else { |
104 | | // Decode the position within this class in big endian |
105 | 0 | for (int b = 0; b < *bit_sizes_it; b++) { |
106 | 0 | if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa |
107 | 0 | bit = ConsumeBitLE(bitpos, data); |
108 | 0 | val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class |
109 | 0 | } |
110 | 0 | return val; |
111 | 0 | } |
112 | 0 | } |
113 | 0 | return INVALID; // Reached EOF in exponent |
114 | 0 | } |
115 | | |
116 | | /** |
117 | | * Instruction Set |
118 | | * |
119 | | * The instruction set is designed to efficiently encode a binary trie |
120 | | * that maps IP prefixes to ASNs. Each instruction type serves a specific |
121 | | * role in trie traversal and evaluation. |
122 | | */ |
123 | | enum class Instruction : uint32_t |
124 | | { |
125 | | // A return instruction, encoded as [0], returns a constant ASN. |
126 | | // It is followed by an integer using the ASN encoding. |
127 | | RETURN = 0, |
128 | | // A jump instruction, encoded as [1,0], inspects the next unused bit in the input |
129 | | // and either continues execution (if 0), or skips a specified number of bits (if 1). |
130 | | // It is followed by an integer using jump encoding. |
131 | | JUMP = 1, |
132 | | // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits |
133 | | // in the input. If they all match, execution continues. If not, the default ASN is returned |
134 | | // (or 0 if unset). The match value encodes both the pattern and its length. |
135 | | MATCH = 2, |
136 | | // A default instruction, encoded as [1,1,1], sets the default variable to its argument, |
137 | | // and continues execution. It is followed by an integer in ASN encoding. |
138 | | DEFAULT = 3, |
139 | | }; |
140 | | |
141 | | // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1] |
142 | | constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1}; |
143 | | Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data) |
144 | 0 | { |
145 | 0 | return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES)); |
146 | 0 | } |
147 | | |
148 | | // ASN encoding: Can encode ASNs from 1 to ~16.7 million. |
149 | | // Uses variable-length encoding optimized for real-world ASN distribution. |
150 | | // ASN 0 is reserved and used if there isn't a match. |
151 | | constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; |
152 | | uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data) |
153 | 0 | { |
154 | 0 | return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES); |
155 | 0 | } |
156 | | |
157 | | // MATCH argument: Values in [2, 511]. The highest set bit determines the match length |
158 | | // n ∈ [1,8]; the lower n-1 bits are the pattern to compare. |
159 | | constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8}; |
160 | | uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data) |
161 | 0 | { |
162 | 0 | return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES); |
163 | 0 | } |
164 | | |
165 | | // JUMP offset: Minimum value 17. Variable-length coded and may be large |
166 | | // for skipping big subtrees. |
167 | | constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; |
168 | | uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data) |
169 | 0 | { |
170 | 0 | return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES); |
171 | 0 | } |
172 | | |
173 | | } // anonymous namespace |
174 | | |
175 | | /** |
176 | | * Execute the ASMap bytecode to find the ASN for an IP |
177 | | * |
178 | | * This function interprets the asmap bytecode and uses bits from the IP |
179 | | * address to navigate through the encoded trie structure, ultimately |
180 | | * returning an ASN value. |
181 | | */ |
182 | | uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip) |
183 | 0 | { |
184 | 0 | size_t pos{0}; |
185 | 0 | const size_t endpos{asmap.size() * 8}; |
186 | 0 | uint8_t ip_bit{0}; |
187 | 0 | const uint8_t ip_bits_end = ip.size() * 8; |
188 | 0 | uint32_t default_asn = 0; |
189 | 0 | while (pos < endpos) { |
190 | 0 | Instruction opcode = DecodeType(pos, asmap); |
191 | 0 | if (opcode == Instruction::RETURN) { |
192 | | // Found leaf node - return the ASN |
193 | 0 | uint32_t asn = DecodeASN(pos, asmap); |
194 | 0 | if (asn == INVALID) break; // ASN straddles EOF |
195 | 0 | return asn; |
196 | 0 | } else if (opcode == Instruction::JUMP) { |
197 | | // Binary branch: if IP bit is 1, jump forward; else continue |
198 | 0 | uint32_t jump = DecodeJump(pos, asmap); |
199 | 0 | if (jump == INVALID) break; // Jump offset straddles EOF |
200 | 0 | if (ip_bit == ip_bits_end) break; // No input bits left |
201 | 0 | if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF |
202 | 0 | if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian) |
203 | 0 | pos += jump; // Bit = 1: skip to right subtree |
204 | 0 | } |
205 | | // Bit = 0: fall through to left subtree |
206 | 0 | } else if (opcode == Instruction::MATCH) { |
207 | | // Compare multiple IP bits against a pattern |
208 | | // The match value encodes both length and pattern: |
209 | | // - highest set bit position determines length (bit_width - 1) |
210 | | // - lower bits contain the pattern to compare |
211 | 0 | uint32_t match = DecodeMatch(pos, asmap); |
212 | 0 | if (match == INVALID) break; // Match bits straddle EOF |
213 | 0 | int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits |
214 | 0 | if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits |
215 | 0 | for (int bit = 0; bit < matchlen; bit++) { |
216 | 0 | if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) { |
217 | 0 | return default_asn; // Pattern mismatch - use default |
218 | 0 | } |
219 | 0 | } |
220 | | // Pattern matched - continue execution |
221 | 0 | } else if (opcode == Instruction::DEFAULT) { |
222 | | // Update the default ASN for subsequent MATCH failures |
223 | 0 | default_asn = DecodeASN(pos, asmap); |
224 | 0 | if (default_asn == INVALID) break; // ASN straddles EOF |
225 | 0 | } else { |
226 | 0 | break; // Instruction straddles EOF |
227 | 0 | } |
228 | 0 | } |
229 | | // Reached EOF without RETURN, or aborted (see any of the breaks above) |
230 | | // - should have been caught by SanityCheckAsmap below |
231 | 0 | assert(false); |
232 | 0 | return 0; // 0 is not a valid ASN |
233 | 0 | } |
234 | | |
235 | | /** |
236 | | * Validates ASMap structure by simulating all possible execution paths. |
237 | | * Ensures well-formed bytecode, valid jumps, and proper termination. |
238 | | */ |
239 | | bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits) |
240 | 0 | { |
241 | 0 | size_t pos{0}; |
242 | 0 | const size_t endpos{asmap.size() * 8}; |
243 | 0 | std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left) |
244 | 0 | jumps.reserve(bits); |
245 | 0 | Instruction prevopcode = Instruction::JUMP; |
246 | 0 | bool had_incomplete_match = false; // Track <8 bit matches for efficiency check |
247 | |
|
248 | 0 | while (pos != endpos) { |
249 | | // There was a jump into the middle of the previous instruction |
250 | 0 | if (!jumps.empty() && pos >= jumps.back().first) return false; |
251 | | |
252 | 0 | Instruction opcode = DecodeType(pos, asmap); |
253 | 0 | if (opcode == Instruction::RETURN) { |
254 | | // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN) |
255 | 0 | if (prevopcode == Instruction::DEFAULT) return false; |
256 | 0 | uint32_t asn = DecodeASN(pos, asmap); |
257 | 0 | if (asn == INVALID) return false; // ASN straddles EOF |
258 | 0 | if (jumps.empty()) { |
259 | | // Nothing to execute anymore |
260 | 0 | if (endpos - pos > 7) return false; // Excessive padding |
261 | 0 | while (pos != endpos) { |
262 | 0 | if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit |
263 | 0 | } |
264 | 0 | return true; // Sanely reached EOF |
265 | 0 | } else { |
266 | | // Continue by pretending we jumped to the next instruction |
267 | 0 | if (pos != jumps.back().first) return false; // Unreachable code |
268 | 0 | bits = jumps.back().second; // Restore the number of bits we would have had left after this jump |
269 | 0 | jumps.pop_back(); |
270 | 0 | prevopcode = Instruction::JUMP; |
271 | 0 | } |
272 | 0 | } else if (opcode == Instruction::JUMP) { |
273 | 0 | uint32_t jump = DecodeJump(pos, asmap); |
274 | 0 | if (jump == INVALID) return false; // Jump offset straddles EOF |
275 | 0 | if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range |
276 | 0 | if (bits == 0) return false; // Consuming bits past the end of the input |
277 | 0 | --bits; |
278 | 0 | uint32_t jump_offset = pos + jump; |
279 | 0 | if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps |
280 | 0 | jumps.emplace_back(jump_offset, bits); // Queue jump target for validation |
281 | 0 | prevopcode = Instruction::JUMP; |
282 | 0 | } else if (opcode == Instruction::MATCH) { |
283 | 0 | uint32_t match = DecodeMatch(pos, asmap); |
284 | 0 | if (match == INVALID) return false; // Match bits straddle EOF |
285 | 0 | int matchlen = std::bit_width(match) - 1; |
286 | 0 | if (prevopcode != Instruction::MATCH) had_incomplete_match = false; |
287 | | // Within a sequence of matches only at most one should be incomplete |
288 | 0 | if (matchlen < 8 && had_incomplete_match) return false; |
289 | 0 | had_incomplete_match = (matchlen < 8); |
290 | 0 | if (bits < matchlen) return false; // Consuming bits past the end of the input |
291 | 0 | bits -= matchlen; |
292 | 0 | prevopcode = Instruction::MATCH; |
293 | 0 | } else if (opcode == Instruction::DEFAULT) { |
294 | | // There should not be two successive DEFAULTs (they could be combined into one) |
295 | 0 | if (prevopcode == Instruction::DEFAULT) return false; |
296 | 0 | uint32_t asn = DecodeASN(pos, asmap); |
297 | 0 | if (asn == INVALID) return false; // ASN straddles EOF |
298 | 0 | prevopcode = Instruction::DEFAULT; |
299 | 0 | } else { |
300 | 0 | return false; // Instruction straddles EOF |
301 | 0 | } |
302 | 0 | } |
303 | 0 | return false; // Reached EOF without RETURN instruction |
304 | 0 | } |
305 | | |
306 | | /** |
307 | | * Provides a safe interface for validating ASMap data before use. |
308 | | * Returns true if the data is valid for 128 bits long inputs. |
309 | | */ |
310 | | bool CheckStandardAsmap(const std::span<const std::byte> data) |
311 | 0 | { |
312 | 0 | if (!SanityCheckAsmap(data, 128)) { |
313 | 0 | LogWarning("Sanity check of asmap data failed\n");Line | Count | Source | 96 | 0 | #define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__) Line | Count | Source | 89 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__) |
|
|
314 | 0 | return false; |
315 | 0 | } |
316 | 0 | return true; |
317 | 0 | } |
318 | | |
319 | | /** |
320 | | * Loads an ASMap file from disk and validates it. |
321 | | */ |
322 | | std::vector<std::byte> DecodeAsmap(fs::path path) |
323 | 0 | { |
324 | 0 | FILE *filestr = fsbridge::fopen(path, "rb"); |
325 | 0 | AutoFile file{filestr}; |
326 | 0 | if (file.IsNull()) { |
327 | 0 | LogWarning("Failed to open asmap file from disk");Line | Count | Source | 96 | 0 | #define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__) Line | Count | Source | 89 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__) |
|
|
328 | 0 | return {}; |
329 | 0 | } |
330 | 0 | int64_t length{file.size()}; |
331 | 0 | LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);Line | Count | Source | 95 | 0 | #define LogInfo(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Info, /*should_ratelimit=*/true, __VA_ARGS__) Line | Count | Source | 89 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__) |
|
|
332 | | |
333 | | // Read entire file into memory |
334 | 0 | std::vector<std::byte> buffer(length); |
335 | 0 | file.read(buffer); |
336 | |
|
337 | 0 | if (!CheckStandardAsmap(buffer)) { |
338 | 0 | LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));Line | Count | Source | 96 | 0 | #define LogWarning(...) LogPrintLevel_(BCLog::LogFlags::ALL, BCLog::Level::Warning, /*should_ratelimit=*/true, __VA_ARGS__) Line | Count | Source | 89 | 0 | #define LogPrintLevel_(category, level, should_ratelimit, ...) LogPrintFormatInternal(SourceLocation{__func__}, category, level, should_ratelimit, __VA_ARGS__) |
|
|
339 | 0 | return {}; |
340 | 0 | } |
341 | | |
342 | 0 | return buffer; |
343 | 0 | } |
344 | | |
345 | | /** |
346 | | * Computes SHA256 hash of ASMap data for versioning and consistency checks. |
347 | | */ |
348 | | uint256 AsmapVersion(const std::span<const std::byte> data) |
349 | 0 | { |
350 | 0 | if (data.empty()) return {}; |
351 | | |
352 | 0 | HashWriter asmap_hasher; |
353 | 0 | asmap_hasher << data; |
354 | 0 | return asmap_hasher.GetHash(); |
355 | 0 | } |