Line data Source code
1 : // Copyright (c) 2018-2022 The Bitcoin Core developers
2 : // Distributed under the MIT software license, see the accompanying
3 : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 :
5 : #include <map>
6 :
7 : #include <clientversion.h>
8 : #include <common/args.h>
9 : #include <dbwrapper.h>
10 : #include <hash.h>
11 : #include <index/blockfilterindex.h>
12 : #include <logging.h>
13 : #include <node/blockstorage.h>
14 : #include <undo.h>
15 : #include <util/fs_helpers.h>
16 : #include <validation.h>
17 2 :
18 2 : /* The index database stores three items for each block: the disk location of the encoded filter,
19 : * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by
20 : * height, and those belonging to blocks that have been reorganized out of the active chain are
21 : * indexed by block hash. This ensures that filter data for any block that becomes part of the
22 : * active chain can always be retrieved, alleviating timing concerns.
23 : *
24 : * The filters themselves are stored in flat files and referenced by the LevelDB entries. This
25 : * minimizes the amount of data written to LevelDB and keeps the database values constant size. The
26 : * disk location of the next block filter to be written (represented as a FlatFilePos) is stored
27 : * under the DB_FILTER_POS key.
28 : *
29 : * Keys for the height index have the type [DB_BLOCK_HEIGHT, uint32 (BE)]. The height is represented
30 : * as big-endian so that sequential reads of filters by height are fast.
31 : * Keys for the hash index have the type [DB_BLOCK_HASH, uint256].
32 : */
33 : constexpr uint8_t DB_BLOCK_HASH{'s'};
34 : constexpr uint8_t DB_BLOCK_HEIGHT{'t'};
35 : constexpr uint8_t DB_FILTER_POS{'P'};
36 :
37 : constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB
38 : /** The pre-allocation chunk size for fltr?????.dat files */
39 : constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB
40 : /** Maximum size of the cfheaders cache
41 : * We have a limit to prevent a bug in filling this cache
42 : * potentially turning into an OOM. At 2000 entries, this cache
43 : * is big enough for a 2,000,000 length block chain, which
44 : * we should be enough until ~2047. */
45 : constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000};
46 :
47 : namespace {
48 :
49 : struct DBVal {
50 : uint256 hash;
51 : uint256 header;
52 : FlatFilePos pos;
53 :
54 0 : SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); }
55 : };
56 :
57 : struct DBHeightKey {
58 : int height;
59 :
60 0 : explicit DBHeightKey(int height_in) : height(height_in) {}
61 :
62 : template<typename Stream>
63 0 : void Serialize(Stream& s) const
64 : {
65 0 : ser_writedata8(s, DB_BLOCK_HEIGHT);
66 0 : ser_writedata32be(s, height);
67 0 : }
68 :
69 : template<typename Stream>
70 0 : void Unserialize(Stream& s)
71 : {
72 0 : const uint8_t prefix{ser_readdata8(s)};
73 0 : if (prefix != DB_BLOCK_HEIGHT) {
74 2 : throw std::ios_base::failure("Invalid format for block filter index DB height key");
75 : }
76 0 : height = ser_readdata32be(s);
77 0 : }
78 : };
79 :
80 : struct DBHashKey {
81 : uint256 hash;
82 :
83 0 : explicit DBHashKey(const uint256& hash_in) : hash(hash_in) {}
84 :
85 0 : SERIALIZE_METHODS(DBHashKey, obj) {
86 0 : uint8_t prefix{DB_BLOCK_HASH};
87 0 : READWRITE(prefix);
88 0 : if (prefix != DB_BLOCK_HASH) {
89 0 : throw std::ios_base::failure("Invalid format for block filter index DB hash key");
90 : }
91 :
92 0 : READWRITE(obj.hash);
93 0 : }
94 : };
95 :
96 : }; // namespace
97 :
98 2 : static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes;
99 :
100 0 : BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type,
101 : size_t n_cache_size, bool f_memory, bool f_wipe)
102 0 : : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index")
103 0 : , m_filter_type(filter_type)
104 0 : {
105 0 : const std::string& filter_name = BlockFilterTypeName(filter_type);
106 0 : if (filter_name.empty()) throw std::invalid_argument("unknown filter_type");
107 :
108 0 : fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name);
109 0 : fs::create_directories(path);
110 :
111 0 : m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe);
112 0 : m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE);
113 0 : }
114 :
115 0 : bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockKey>& block)
116 : {
117 0 : if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) {
118 : // Check that the cause of the read failure is that the key does not exist. Any other errors
119 : // indicate database corruption or a disk failure, and starting the index would cause
120 : // further corruption.
121 0 : if (m_db->Exists(DB_FILTER_POS)) {
122 0 : return error("%s: Cannot read current %s state; index may be corrupted",
123 0 : __func__, GetName());
124 : }
125 :
126 : // If the DB_FILTER_POS is not set, then initialize to the first location.
127 0 : m_next_filter_pos.nFile = 0;
128 0 : m_next_filter_pos.nPos = 0;
129 0 : }
130 0 : return true;
131 0 : }
132 :
133 0 : bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
134 : {
135 0 : const FlatFilePos& pos = m_next_filter_pos;
136 :
137 : // Flush current filter file to disk.
138 0 : AutoFile file{m_filter_fileseq->Open(pos)};
139 0 : if (file.IsNull()) {
140 0 : return error("%s: Failed to open filter file %d", __func__, pos.nFile);
141 : }
142 0 : if (!FileCommit(file.Get())) {
143 0 : return error("%s: Failed to commit filter file %d", __func__, pos.nFile);
144 : }
145 :
146 0 : batch.Write(DB_FILTER_POS, pos);
147 0 : return true;
148 0 : }
149 :
150 0 : bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
151 : {
152 0 : AutoFile filein{m_filter_fileseq->Open(pos, true)};
153 0 : if (filein.IsNull()) {
154 0 : return false;
155 : }
156 :
157 : // Check that the hash of the encoded_filter matches the one stored in the db.
158 0 : uint256 block_hash;
159 0 : std::vector<uint8_t> encoded_filter;
160 : try {
161 0 : filein >> block_hash >> encoded_filter;
162 0 : if (Hash(encoded_filter) != hash) return error("Checksum mismatch in filter decode.");
163 0 : filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
164 0 : }
165 : catch (const std::exception& e) {
166 0 : return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what());
167 0 : }
168 :
169 0 : return true;
170 0 : }
171 :
172 0 : size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter)
173 : {
174 0 : assert(filter.GetFilterType() == GetFilterType());
175 :
176 0 : size_t data_size =
177 0 : GetSerializeSize(filter.GetBlockHash(), CLIENT_VERSION) +
178 0 : GetSerializeSize(filter.GetEncodedFilter(), CLIENT_VERSION);
179 :
180 : // If writing the filter would overflow the file, flush and move to the next one.
181 0 : if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) {
182 0 : AutoFile last_file{m_filter_fileseq->Open(pos)};
183 0 : if (last_file.IsNull()) {
184 0 : LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
185 0 : return 0;
186 : }
187 0 : if (!TruncateFile(last_file.Get(), pos.nPos)) {
188 0 : LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile);
189 0 : return 0;
190 : }
191 0 : if (!FileCommit(last_file.Get())) {
192 0 : LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
193 0 : return 0;
194 : }
195 :
196 0 : pos.nFile++;
197 0 : pos.nPos = 0;
198 0 : }
199 :
200 : // Pre-allocate sufficient space for filter data.
201 : bool out_of_space;
202 0 : m_filter_fileseq->Allocate(pos, data_size, out_of_space);
203 0 : if (out_of_space) {
204 0 : LogPrintf("%s: out of disk space\n", __func__);
205 0 : return 0;
206 : }
207 :
208 0 : AutoFile fileout{m_filter_fileseq->Open(pos)};
209 0 : if (fileout.IsNull()) {
210 0 : LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
211 0 : return 0;
212 : }
213 :
214 0 : fileout << filter.GetBlockHash() << filter.GetEncodedFilter();
215 0 : return data_size;
216 0 : }
217 :
218 0 : bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block)
219 : {
220 0 : CBlockUndo block_undo;
221 0 : uint256 prev_header;
222 :
223 0 : if (block.height > 0) {
224 : // pindex variable gives indexing code access to node internals. It
225 : // will be removed in upcoming commit
226 0 : const CBlockIndex* pindex = WITH_LOCK(cs_main, return m_chainstate->m_blockman.LookupBlockIndex(block.hash));
227 0 : if (!m_chainstate->m_blockman.UndoReadFromDisk(block_undo, *pindex)) {
228 0 : return false;
229 : }
230 :
231 0 : std::pair<uint256, DBVal> read_out;
232 0 : if (!m_db->Read(DBHeightKey(block.height - 1), read_out)) {
233 0 : return false;
234 : }
235 :
236 0 : uint256 expected_block_hash = *Assert(block.prev_hash);
237 0 : if (read_out.first != expected_block_hash) {
238 0 : return error("%s: previous block header belongs to unexpected block %s; expected %s",
239 0 : __func__, read_out.first.ToString(), expected_block_hash.ToString());
240 : }
241 :
242 0 : prev_header = read_out.second.header;
243 0 : }
244 :
245 0 : BlockFilter filter(m_filter_type, *Assert(block.data), block_undo);
246 :
247 0 : size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter);
248 0 : if (bytes_written == 0) return false;
249 :
250 0 : std::pair<uint256, DBVal> value;
251 0 : value.first = block.hash;
252 0 : value.second.hash = filter.GetHash();
253 0 : value.second.header = filter.ComputeHeader(prev_header);
254 0 : value.second.pos = m_next_filter_pos;
255 :
256 0 : if (!m_db->Write(DBHeightKey(block.height), value)) {
257 0 : return false;
258 : }
259 :
260 0 : m_next_filter_pos.nPos += bytes_written;
261 0 : return true;
262 0 : }
263 :
264 0 : [[nodiscard]] static bool CopyHeightIndexToHashIndex(CDBIterator& db_it, CDBBatch& batch,
265 : const std::string& index_name,
266 : int start_height, int stop_height)
267 : {
268 0 : DBHeightKey key(start_height);
269 0 : db_it.Seek(key);
270 :
271 0 : for (int height = start_height; height <= stop_height; ++height) {
272 0 : if (!db_it.GetKey(key) || key.height != height) {
273 0 : return error("%s: unexpected key in %s: expected (%c, %d)",
274 0 : __func__, index_name, DB_BLOCK_HEIGHT, height);
275 : }
276 :
277 0 : std::pair<uint256, DBVal> value;
278 0 : if (!db_it.GetValue(value)) {
279 0 : return error("%s: unable to read value in %s at key (%c, %d)",
280 0 : __func__, index_name, DB_BLOCK_HEIGHT, height);
281 : }
282 :
283 0 : batch.Write(DBHashKey(value.first), std::move(value.second));
284 :
285 0 : db_it.Next();
286 0 : }
287 0 : return true;
288 0 : }
289 :
290 0 : bool BlockFilterIndex::CustomRewind(const interfaces::BlockKey& current_tip, const interfaces::BlockKey& new_tip)
291 : {
292 0 : CDBBatch batch(*m_db);
293 0 : std::unique_ptr<CDBIterator> db_it(m_db->NewIterator());
294 :
295 : // During a reorg, we need to copy all filters for blocks that are getting disconnected from the
296 : // height index to the hash index so we can still find them when the height index entries are
297 : // overwritten.
298 0 : if (!CopyHeightIndexToHashIndex(*db_it, batch, m_name, new_tip.height, current_tip.height)) {
299 0 : return false;
300 : }
301 :
302 : // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind.
303 : // But since this creates new references to the filter, the position should get updated here
304 : // atomically as well in case Commit fails.
305 0 : batch.Write(DB_FILTER_POS, m_next_filter_pos);
306 0 : if (!m_db->WriteBatch(batch)) return false;
307 :
308 0 : return true;
309 0 : }
310 :
311 0 : static bool LookupOne(const CDBWrapper& db, const CBlockIndex* block_index, DBVal& result)
312 : {
313 : // First check if the result is stored under the height index and the value there matches the
314 : // block hash. This should be the case if the block is on the active chain.
315 0 : std::pair<uint256, DBVal> read_out;
316 0 : if (!db.Read(DBHeightKey(block_index->nHeight), read_out)) {
317 0 : return false;
318 : }
319 0 : if (read_out.first == block_index->GetBlockHash()) {
320 0 : result = std::move(read_out.second);
321 0 : return true;
322 : }
323 :
324 : // If value at the height index corresponds to an different block, the result will be stored in
325 : // the hash index.
326 0 : return db.Read(DBHashKey(block_index->GetBlockHash()), result);
327 0 : }
328 :
329 0 : static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height,
330 : const CBlockIndex* stop_index, std::vector<DBVal>& results)
331 : {
332 0 : if (start_height < 0) {
333 0 : return error("%s: start height (%d) is negative", __func__, start_height);
334 : }
335 0 : if (start_height > stop_index->nHeight) {
336 0 : return error("%s: start height (%d) is greater than stop height (%d)",
337 0 : __func__, start_height, stop_index->nHeight);
338 : }
339 :
340 0 : size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1);
341 0 : std::vector<std::pair<uint256, DBVal>> values(results_size);
342 :
343 0 : DBHeightKey key(start_height);
344 0 : std::unique_ptr<CDBIterator> db_it(db.NewIterator());
345 0 : db_it->Seek(DBHeightKey(start_height));
346 0 : for (int height = start_height; height <= stop_index->nHeight; ++height) {
347 0 : if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) {
348 0 : return false;
349 : }
350 :
351 0 : size_t i = static_cast<size_t>(height - start_height);
352 0 : if (!db_it->GetValue(values[i])) {
353 0 : return error("%s: unable to read value in %s at key (%c, %d)",
354 0 : __func__, index_name, DB_BLOCK_HEIGHT, height);
355 : }
356 :
357 0 : db_it->Next();
358 0 : }
359 :
360 0 : results.resize(results_size);
361 :
362 : // Iterate backwards through block indexes collecting results in order to access the block hash
363 : // of each entry in case we need to look it up in the hash index.
364 0 : for (const CBlockIndex* block_index = stop_index;
365 0 : block_index && block_index->nHeight >= start_height;
366 0 : block_index = block_index->pprev) {
367 0 : uint256 block_hash = block_index->GetBlockHash();
368 :
369 0 : size_t i = static_cast<size_t>(block_index->nHeight - start_height);
370 0 : if (block_hash == values[i].first) {
371 0 : results[i] = std::move(values[i].second);
372 0 : continue;
373 : }
374 :
375 0 : if (!db.Read(DBHashKey(block_hash), results[i])) {
376 0 : return error("%s: unable to read value in %s at key (%c, %s)",
377 0 : __func__, index_name, DB_BLOCK_HASH, block_hash.ToString());
378 : }
379 0 : }
380 :
381 0 : return true;
382 0 : }
383 :
384 0 : bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const
385 : {
386 0 : DBVal entry;
387 0 : if (!LookupOne(*m_db, block_index, entry)) {
388 0 : return false;
389 : }
390 :
391 0 : return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
392 0 : }
393 :
394 0 : bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
395 : {
396 0 : LOCK(m_cs_headers_cache);
397 :
398 0 : bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0};
399 :
400 0 : if (is_checkpoint) {
401 : // Try to find the block in the headers cache if this is a checkpoint height.
402 0 : auto header = m_headers_cache.find(block_index->GetBlockHash());
403 0 : if (header != m_headers_cache.end()) {
404 0 : header_out = header->second;
405 0 : return true;
406 : }
407 0 : }
408 :
409 0 : DBVal entry;
410 0 : if (!LookupOne(*m_db, block_index, entry)) {
411 0 : return false;
412 : }
413 :
414 0 : if (is_checkpoint &&
415 0 : m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) {
416 : // Add to the headers cache if this is a checkpoint height.
417 0 : m_headers_cache.emplace(block_index->GetBlockHash(), entry.header);
418 0 : }
419 :
420 0 : header_out = entry.header;
421 0 : return true;
422 0 : }
423 :
424 0 : bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index,
425 : std::vector<BlockFilter>& filters_out) const
426 : {
427 0 : std::vector<DBVal> entries;
428 0 : if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
429 0 : return false;
430 : }
431 :
432 0 : filters_out.resize(entries.size());
433 0 : auto filter_pos_it = filters_out.begin();
434 0 : for (const auto& entry : entries) {
435 0 : if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
436 0 : return false;
437 : }
438 0 : ++filter_pos_it;
439 : }
440 :
441 0 : return true;
442 0 : }
443 :
444 0 : bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index,
445 : std::vector<uint256>& hashes_out) const
446 :
447 : {
448 0 : std::vector<DBVal> entries;
449 0 : if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
450 0 : return false;
451 : }
452 :
453 0 : hashes_out.clear();
454 0 : hashes_out.reserve(entries.size());
455 0 : for (const auto& entry : entries) {
456 0 : hashes_out.push_back(entry.hash);
457 : }
458 0 : return true;
459 0 : }
460 :
461 0 : BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type)
462 : {
463 0 : auto it = g_filter_indexes.find(filter_type);
464 0 : return it != g_filter_indexes.end() ? &it->second : nullptr;
465 : }
466 :
467 0 : void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn)
468 : {
469 0 : for (auto& entry : g_filter_indexes) fn(entry.second);
470 0 : }
471 :
472 0 : bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type,
473 : size_t n_cache_size, bool f_memory, bool f_wipe)
474 : {
475 0 : auto result = g_filter_indexes.emplace(std::piecewise_construct,
476 0 : std::forward_as_tuple(filter_type),
477 0 : std::forward_as_tuple(make_chain(), filter_type,
478 : n_cache_size, f_memory, f_wipe));
479 0 : return result.second;
480 0 : }
481 :
482 0 : bool DestroyBlockFilterIndex(BlockFilterType filter_type)
483 : {
484 0 : return g_filter_indexes.erase(filter_type);
485 : }
486 :
487 0 : void DestroyAllBlockFilterIndexes()
488 : {
489 0 : g_filter_indexes.clear();
490 0 : }
|