Merge pull request #4872 from pwojcikdev/bootstrap-stale-elections

Bootstrap stale elections
This commit is contained in:
Piotr Wójcik 2025-04-11 13:46:17 +02:00 committed by Piotr Wójcik
commit 0827da7183
12 changed files with 128 additions and 60 deletions

View file

@ -1482,3 +1482,40 @@ TEST (active_elections, broadcast_block_on_activation)
ASSERT_TIMELY (5s, node1->active.active (send1->qualified_root ()));
ASSERT_TIMELY (5s, node2->block_or_pruned_exists (send1->hash ()));
}
TEST (active_elections, bootstrap_stale)
{
nano::test::system system;
// Configure node with short stale threshold for testing
nano::node_config node_config = system.default_config ();
node_config.active_elections.bootstrap_stale_threshold = 2s; // Short threshold for faster testing
auto & node = *system.add_node (node_config);
// Create a test block
nano::keypair key;
nano::state_block_builder builder;
auto send = builder.make_block ()
.account (nano::dev::genesis_key.pub)
.previous (nano::dev::genesis->hash ())
.representative (nano::dev::genesis_key.pub)
.balance (nano::dev::constants.genesis_amount - nano::Knano_ratio)
.link (key.pub)
.sign (nano::dev::genesis_key.prv, nano::dev::genesis_key.pub)
.work (*system.work.generate (nano::dev::genesis->hash ()))
.build ();
// Process the block and start an election
node.process_active (send);
// Ensure election starts
std::shared_ptr<nano::election> election;
ASSERT_TIMELY (5s, (election = node.active.election (send->qualified_root ())) != nullptr);
// Check initial state
ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale));
// Wait for bootstrap_stale_threshold to pass and the statistic to be incremented
ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale) > 0);
}

View file

@ -84,7 +84,7 @@ public:
default_rpc_port (45000),
default_ipc_port (46000),
default_websocket_port (47000),
aec_loop_interval_ms (300), // Update AEC ~3 times per second
aec_loop_interval (300ms), // Update AEC ~3 times per second
cleanup_period (default_cleanup_period),
merge_period (std::chrono::milliseconds (250)),
keepalive_period (std::chrono::seconds (15)),
@ -120,7 +120,7 @@ public:
}
else if (is_dev_network ())
{
aec_loop_interval_ms = 20;
aec_loop_interval = 20ms;
cleanup_period = std::chrono::seconds (1);
merge_period = std::chrono::milliseconds (10);
keepalive_period = std::chrono::seconds (1);
@ -147,7 +147,7 @@ public:
uint16_t default_rpc_port;
uint16_t default_ipc_port;
uint16_t default_websocket_port;
unsigned aec_loop_interval_ms;
std::chrono::milliseconds aec_loop_interval;
std::chrono::seconds cleanup_period;
std::chrono::milliseconds cleanup_period_half () const

View file

@ -490,6 +490,7 @@ enum class detail
stopped,
confirm_dependent,
forks_cached,
bootstrap_stale,
// unchecked
put,

View file

@ -46,8 +46,8 @@ std::string nano::thread_role::get_string (nano::thread_role::name role)
case nano::thread_role::name::ledger_notifications:
thread_role_name_string = "Ledger notif";
break;
case nano::thread_role::name::request_loop:
thread_role_name_string = "Request loop";
case nano::thread_role::name::aec_loop:
thread_role_name_string = "AEC";
break;
case nano::thread_role::name::wallet_actions:
thread_role_name_string = "Wallet actions";

View file

@ -20,7 +20,7 @@ enum class name
vote_rebroadcasting,
block_processing,
ledger_notifications,
request_loop,
aec_loop,
wallet_actions,
bootstrap_initiator,
bootstrap_connections,

View file

@ -4,6 +4,7 @@
#include <nano/lib/numbers.hpp>
#include <nano/lib/threading.hpp>
#include <nano/node/active_elections.hpp>
#include <nano/node/bootstrap/bootstrap_service.hpp>
#include <nano/node/confirmation_solicitor.hpp>
#include <nano/node/confirming_set.hpp>
#include <nano/node/election.hpp>
@ -94,8 +95,8 @@ void nano::active_elections::start ()
debug_assert (!thread.joinable ());
thread = std::thread ([this] () {
nano::thread_role::set (nano::thread_role::name::request_loop);
request_loop ();
nano::thread_role::set (nano::thread_role::name::aec_loop);
run ();
});
}
@ -110,6 +111,28 @@ void nano::active_elections::stop ()
clear ();
}
void nano::active_elections::run ()
{
nano::unique_lock<nano::mutex> lock{ mutex };
while (!stopped)
{
auto const stamp = std::chrono::steady_clock::now ();
node.stats.inc (nano::stat::type::active, nano::stat::detail::loop);
tick_elections (lock);
debug_assert (!lock.owns_lock ());
lock.lock ();
auto const min_sleep = node.network_params.network.aec_loop_interval / 2;
auto const wakeup = std::max (stamp + node.network_params.network.aec_loop_interval, std::chrono::steady_clock::now () + min_sleep);
condition.wait_until (lock, wakeup, [this, wakeup] {
return stopped || std::chrono::steady_clock::now () >= wakeup;
});
}
}
auto nano::active_elections::block_cemented (std::shared_ptr<nano::block> const & block, nano::block_hash const & confirmation_root, std::shared_ptr<nano::election> const & source_election) -> block_cemented_result
{
debug_assert (!mutex.try_lock ());
@ -245,41 +268,43 @@ int64_t nano::active_elections::vacancy (nano::election_behavior behavior) const
return std::min (election_vacancy (behavior), election_winners_vacancy ());
}
void nano::active_elections::request_confirm (nano::unique_lock<nano::mutex> & lock_a)
void nano::active_elections::tick_elections (nano::unique_lock<nano::mutex> & lock)
{
debug_assert (lock_a.owns_lock ());
debug_assert (lock.owns_lock ());
std::size_t const this_loop_target_l (roots.size ());
auto const elections_l{ list_active_impl (this_loop_target_l) };
auto const election_list = list_active_impl ();
lock_a.unlock ();
lock.unlock ();
nano::confirmation_solicitor solicitor (node.network, node.config);
solicitor.prepare (node.rep_crawler.principal_representatives (std::numeric_limits<std::size_t>::max ()));
std::size_t unconfirmed_count_l (0);
nano::timer<std::chrono::milliseconds> elapsed (nano::timer_state::started);
/*
* Loop through active elections in descending order of proof-of-work difficulty, requesting confirmation
*
* Only up to a certain amount of elections are queued for confirmation request and block rebroadcasting. The remaining elections can still be confirmed if votes arrive
* Elections extending the soft config.size limit are flushed after a certain time-to-live cutoff
* Flushed elections are later re-activated via frontier confirmation
*/
for (auto const & election_l : elections_l)
std::deque<std::shared_ptr<nano::election>> stale_elections;
for (auto const & election : election_list)
{
bool const confirmed_l (election_l->confirmed ());
unconfirmed_count_l += !confirmed_l;
if (election_l->transition_time (solicitor))
if (election->transition_time (solicitor))
{
erase (election_l->qualified_root);
erase (election->qualified_root);
}
else if (election->duration () > config.bootstrap_stale_threshold)
{
stale_elections.push_back (election);
}
}
solicitor.flush ();
lock_a.lock ();
if (bootstrap_stale_interval.elapse (config.bootstrap_stale_threshold / 2))
{
node.stats.add (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale, stale_elections.size ());
for (auto const & election : stale_elections)
{
node.bootstrap.prioritize (election->account ());
}
}
}
void nano::active_elections::cleanup_election (nano::unique_lock<nano::mutex> & lock_a, std::shared_ptr<nano::election> election)
@ -342,20 +367,19 @@ void nano::active_elections::cleanup_election (nano::unique_lock<nano::mutex> &
}
}
std::vector<std::shared_ptr<nano::election>> nano::active_elections::list_active (std::size_t max_a)
std::vector<std::shared_ptr<nano::election>> nano::active_elections::list_active (std::size_t max_count)
{
nano::lock_guard<nano::mutex> guard{ mutex };
return list_active_impl (max_a);
return list_active_impl (max_count);
}
std::vector<std::shared_ptr<nano::election>> nano::active_elections::list_active_impl (std::size_t max_a) const
std::vector<std::shared_ptr<nano::election>> nano::active_elections::list_active_impl (std::size_t max_count) const
{
std::vector<std::shared_ptr<nano::election>> result_l;
result_l.reserve (std::min (max_a, roots.size ()));
result_l.reserve (std::min (max_count, roots.size ()));
{
auto & sorted_roots_l (roots.get<tag_sequenced> ());
std::size_t count_l{ 0 };
for (auto i = sorted_roots_l.begin (), n = sorted_roots_l.end (); i != n && count_l < max_a; ++i, ++count_l)
for (auto i = sorted_roots_l.begin (), n = sorted_roots_l.end (); i != n && result_l.size () < max_count; ++i)
{
result_l.push_back (i->election);
}
@ -363,27 +387,6 @@ std::vector<std::shared_ptr<nano::election>> nano::active_elections::list_active
return result_l;
}
void nano::active_elections::request_loop ()
{
nano::unique_lock<nano::mutex> lock{ mutex };
while (!stopped)
{
auto const stamp_l = std::chrono::steady_clock::now ();
node.stats.inc (nano::stat::type::active, nano::stat::detail::loop);
request_confirm (lock);
debug_assert (lock.owns_lock ());
if (!stopped)
{
auto const min_sleep_l = std::chrono::milliseconds (node.network_params.network.aec_loop_interval_ms / 2);
auto const wakeup_l = std::max (stamp_l + std::chrono::milliseconds (node.network_params.network.aec_loop_interval_ms), std::chrono::steady_clock::now () + min_sleep_l);
condition.wait_until (lock, wakeup_l, [&wakeup_l, &stopped = stopped] { return stopped || std::chrono::steady_clock::now () >= wakeup_l; });
}
}
}
nano::election_insertion_result nano::active_elections::insert (std::shared_ptr<nano::block> const & block_a, nano::election_behavior election_behavior_a, erased_callback_t erased_callback_a)
{
debug_assert (block_a);
@ -639,7 +642,8 @@ nano::error nano::active_elections_config::serialize (nano::tomlconfig & toml) c
toml.put ("optimistic_limit_percentage", optimistic_limit_percentage, "Limit of optimistic elections as percentage of `active_elections_size`. \ntype:uint64");
toml.put ("confirmation_history_size", confirmation_history_size, "Maximum confirmation history size. If tracking the rate of block confirmations, the websocket feature is recommended instead. \ntype:uint64");
toml.put ("confirmation_cache", confirmation_cache, "Maximum number of confirmed elections kept in cache to prevent restarting an election. \ntype:uint64");
toml.put ("max_election_winners", max_election_winners, "Maximum size of election winner details set. \ntype:uint64");
toml.put ("bootstrap_stale_threshold", bootstrap_stale_threshold.count (), "Time after which additional bootstrap attempts are made to find missing blocks for an election. \ntype:seconds");
return toml.get_error ();
}
@ -650,6 +654,8 @@ nano::error nano::active_elections_config::deserialize (nano::tomlconfig & toml)
toml.get ("optimistic_limit_percentage", optimistic_limit_percentage);
toml.get ("confirmation_history_size", confirmation_history_size);
toml.get ("confirmation_cache", confirmation_cache);
toml.get ("max_election_winners", max_election_winners);
toml.get_duration ("bootstrap_stale_threshold", bootstrap_stale_threshold);
return toml.get_error ();
}

View file

@ -1,6 +1,7 @@
#pragma once
#include <nano/lib/enum_util.hpp>
#include <nano/lib/interval.hpp>
#include <nano/lib/numbers.hpp>
#include <nano/lib/observer_set.hpp>
#include <nano/node/election_behavior.hpp>
@ -50,6 +51,8 @@ public:
std::size_t confirmation_cache{ 65536 };
// Maximum size of election winner details set
std::size_t max_election_winners{ 1024 * 16 };
std::chrono::seconds bootstrap_stale_threshold{ 60s };
};
/**
@ -129,8 +132,9 @@ public: // Events
nano::observer_set<> vacancy_updated;
private:
void request_loop ();
void request_confirm (nano::unique_lock<nano::mutex> &);
void run ();
void tick_elections (nano::unique_lock<nano::mutex> &);
// Erase all blocks from active and, if not confirmed, clear digests from network filters
void cleanup_election (nano::unique_lock<nano::mutex> & lock_a, std::shared_ptr<nano::election>);
@ -139,7 +143,7 @@ private:
void notify_observers (nano::secure::transaction const &, nano::election_status const & status, std::vector<nano::vote_with_weight_info> const & votes) const;
std::shared_ptr<nano::election> election_impl (nano::qualified_root const &) const;
std::vector<std::shared_ptr<nano::election>> list_active_impl (std::size_t max_count) const;
std::vector<std::shared_ptr<nano::election>> list_active_impl (std::size_t max_count = std::numeric_limits<std::size_t>::max ()) const;
private: // Dependencies
active_elections_config const & config;
@ -163,6 +167,8 @@ private:
bool stopped{ false };
std::thread thread;
nano::interval bootstrap_stale_interval;
friend class election;
public: // Tests

View file

@ -163,6 +163,12 @@ void nano::bootstrap_service::reset ()
throttle.reset ();
}
void nano::bootstrap_service::prioritize (nano::account const & account)
{
nano::lock_guard<nano::mutex> lock{ mutex };
accounts.priority_set (account);
}
bool nano::bootstrap_service::send (std::shared_ptr<nano::transport::channel> const & channel, async_tag tag)
{
debug_assert (tag.type != query_type::invalid);

View file

@ -46,6 +46,11 @@ public:
*/
void reset ();
/**
* Adds an account to the priority set
*/
void prioritize (nano::account const & account);
std::size_t blocked_size () const;
std::size_t priority_size () const;
std::size_t score_size () const;

View file

@ -805,6 +805,12 @@ void nano::election::force_confirm ()
confirm_once (lock);
}
nano::account nano::election::account () const
{
nano::lock_guard<nano::mutex> guard{ mutex };
return status.winner->account ();
}
std::unordered_map<nano::block_hash, std::shared_ptr<nano::block>> nano::election::blocks () const
{
nano::lock_guard<nano::mutex> guard{ mutex };

View file

@ -142,6 +142,7 @@ public: // Information
nano::root const root;
nano::qualified_root const qualified_root;
nano::account account () const;
std::vector<nano::vote_with_weight_info> votes_with_weight () const;
nano::election_behavior behavior () const;
nano::election_state state () const;

View file

@ -1776,7 +1776,7 @@ TEST (node, mass_block_new)
nano::node_config node_config = system.default_config ();
node_config.backlog_scan.enable = false;
auto & node = *system.add_node (node_config);
node.network_params.network.aec_loop_interval_ms = 500;
node.network_params.network.aec_loop_interval = 500ms;
#ifndef NDEBUG
auto const num_blocks = 5000;