Rewrite previous database scan in terms of database crawlers

This commit is contained in:
Piotr Wójcik 2024-09-26 16:50:14 +02:00
commit fb12ea9e47
2 changed files with 18 additions and 44 deletions

View file

@ -1,5 +1,6 @@
#include <nano/lib/utility.hpp>
#include <nano/node/bootstrap_ascending/database_scan.hpp>
#include <nano/node/bootstrap_ascending/iterators.hpp>
#include <nano/secure/common.hpp>
#include <nano/secure/ledger.hpp>
#include <nano/secure/ledger_set_any.hpp>
@ -64,24 +65,24 @@ std::unique_ptr<nano::container_info_component> nano::bootstrap_ascending::datab
}
/*
* account_database_iterator
* account_database_scanner
*/
std::deque<nano::account> nano::bootstrap_ascending::account_database_scanner::next_batch (nano::store::transaction & transaction, size_t batch_size)
{
std::deque<nano::account> result;
auto it = ledger.store.account.begin (transaction, next);
auto const end = ledger.store.account.end ();
account_database_crawler crawler{ ledger.store, transaction, next };
for (size_t count = 0; it != end && count < batch_size; ++it, ++count)
for (size_t count = 0; crawler.current && count < batch_size; crawler.advance (), ++count)
{
auto const & account = it->first;
auto const & [account, info] = crawler.current.value ();
result.push_back (account);
next = account.number () + 1;
next = account.number () + 1; // TODO: Handle account number overflow
}
if (it == end)
// Empty current value indicates the end of the table
if (!crawler.current)
{
// Reset for the next ledger iteration
next = { 0 };
@ -92,54 +93,27 @@ std::deque<nano::account> nano::bootstrap_ascending::account_database_scanner::n
}
/*
* pending_database_iterator
* pending_database_scanner
*/
std::deque<nano::account> nano::bootstrap_ascending::pending_database_scanner::next_batch (nano::store::transaction & transaction, size_t batch_size)
{
std::deque<nano::account> result;
auto it = ledger.store.pending.begin (transaction, next);
auto const end = ledger.store.pending.end ();
pending_database_crawler crawler{ ledger.store, transaction, next };
// TODO: This pending iteration heuristic should be encapsulated in a pending_iterator class and reused across other components
// The heuristic is to advance the iterator sequentially until we reach a new account or perform a fresh lookup if the account has too many pending blocks
// This is to avoid the overhead of performing a fresh lookup for every pending account as majority of accounts have only a few pending blocks
auto advance_iterator = [&] () {
auto const starting_account = it->first.account;
// For RocksDB, sequential access is ~10x faster than performing a fresh lookup (tested on my machine)
const size_t sequential_attempts = 10;
// First try advancing sequentially
for (size_t count = 0; count < sequential_attempts && it != end; ++count, ++it)
{
if (it->first.account != starting_account)
{
break;
}
}
// If we didn't advance to the next account, perform a fresh lookup
if (it != end && it->first.account != starting_account)
{
it = ledger.store.pending.begin (transaction, { starting_account.number () + 1, 0 });
}
debug_assert (it == end || it->first.account != starting_account);
};
for (size_t count = 0; it != end && count < batch_size; advance_iterator (), ++count)
for (size_t count = 0; crawler.current && count < batch_size; crawler.advance (), ++count)
{
auto const & account = it->first.account;
result.push_back (account);
next = { account.number () + 1, 0 };
auto const & [key, info] = crawler.current.value ();
result.push_back (key.account);
next = key.account.number () + 1; // TODO: Handle account number overflow
}
if (it == end)
// Empty current value indicates the end of the table
if (!crawler.current)
{
// Reset for the next ledger iteration
next = { 0, 0 };
next = { 0 };
++completed;
}

View file

@ -24,7 +24,7 @@ struct pending_database_scanner
std::deque<nano::account> next_batch (nano::store::transaction &, size_t batch_size);
nano::pending_key next{ 0, 0 };
nano::account next{ 0 };
size_t completed{ 0 };
};