Fix stale election notification

This commit is contained in:
Piotr Wójcik 2025-09-07 12:26:55 +02:00
commit ccd3affbb5
7 changed files with 117 additions and 46 deletions

View file

@ -1580,13 +1580,13 @@ TEST (active_elections, broadcast_block_on_activation)
ASSERT_TIMELY (5s, node2->block_or_pruned_exists (send1->hash ()));
}
TEST (active_elections, bootstrap_stale)
TEST (active_elections, stale_election)
{
nano::test::system system;
// Configure node with short stale threshold for testing
nano::node_config node_config = system.default_config ();
node_config.active_elections.bootstrap_stale_threshold = 2s; // Short threshold for faster testing
node_config.active_elections.stale_threshold = 2s; // Short threshold for faster testing
auto & node = *system.add_node (node_config);
@ -1603,6 +1603,12 @@ TEST (active_elections, bootstrap_stale)
.work (*system.work.generate (nano::dev::genesis->hash ()))
.build ();
std::atomic<bool> stale_detected{ false };
node.active.election_stale.add ([&] (auto const & election) {
EXPECT_EQ (send->qualified_root (), election->qualified_root);
stale_detected = true;
});
// Process the block and start an election
node.process_active (send);
@ -1611,10 +1617,59 @@ TEST (active_elections, bootstrap_stale)
ASSERT_TIMELY (5s, (election = node.active.election (send->qualified_root ())) != nullptr);
// Check initial state
ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale));
ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale));
// Wait for bootstrap_stale_threshold to pass and the statistic to be incremented
ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale) > 0);
// Wait for stale_threshold to pass and stats to be incremented
ASSERT_TIMELY (5s, stale_detected);
ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale) > 0);
}
TEST (active_elections, stale_election_multiple)
{
nano::test::system system;
// Configure node with short stale threshold for testing
nano::node_config node_config = system.default_config ();
node_config.active_elections.stale_threshold = 2s; // Short threshold for faster testing
auto & node = *system.add_node (node_config);
// Create 10 independent blocks that will each have their own election
auto blocks = nano::test::setup_independent_blocks (system, node, 10);
// Track which elections had stale events fired
nano::locked<std::set<nano::qualified_root>> stale_detected;
node.active.election_stale.add ([&] (auto const & election) {
stale_detected.lock ()->insert (election->qualified_root);
});
// Start elections for all blocks
ASSERT_TRUE (nano::test::start_elections (system, node, blocks));
// Ensure all elections are active
ASSERT_TIMELY_EQ (5s, node.active.size (), blocks.size ());
for (auto const & block : blocks)
{
ASSERT_TRUE (node.active.active (block->qualified_root ()));
}
// Check initial state
ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale));
// Wait for stale_threshold to pass (2s) plus some buffer
// The stale event should fire for ALL elections that are stale
ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale) >= blocks.size ());
// Check that all elections had their stale event fired
ASSERT_TIMELY_EQ (5s, blocks.size (), stale_detected.lock ()->size ());
// Verify each block's election was marked as stale
for (auto const & block : blocks)
{
ASSERT_TRUE (stale_detected.lock ()->count (block->qualified_root ()) > 0)
<< "Election for block " << block->hash ().to_string () << " was not marked as stale";
}
}
TEST (active_elections, transition_optimistic_to_priority)

View file

@ -519,6 +519,7 @@ enum class detail
cancel_dependent,
cancel_checkup,
forks_cached,
stale,
bootstrap_stale,
// unchecked

View file

@ -4,7 +4,6 @@
#include <nano/lib/numbers.hpp>
#include <nano/lib/threading.hpp>
#include <nano/node/active_elections.hpp>
#include <nano/node/bootstrap/bootstrap_service.hpp>
#include <nano/node/cementing_set.hpp>
#include <nano/node/confirmation_solicitor.hpp>
#include <nano/node/election.hpp>
@ -507,9 +506,6 @@ void nano::active_elections::tick_elections (nano::unique_lock<nano::mutex> & lo
nano::confirmation_solicitor solicitor (node.network, node.config);
solicitor.prepare (node.rep_crawler.principal_representatives (std::numeric_limits<std::size_t>::max ()));
nano::timer<std::chrono::milliseconds> elapsed (nano::timer_state::started);
std::deque<std::shared_ptr<nano::election>> stale_elections;
for (auto const & election : election_list)
{
bool tick_result = election->tick (solicitor);
@ -517,33 +513,9 @@ void nano::active_elections::tick_elections (nano::unique_lock<nano::mutex> & lo
{
erase (election->qualified_root);
}
else if (election->duration () > config.stale_threshold)
{
stale_elections.push_back (election);
}
}
solicitor.flush ();
if (stale_interval.elapse (config.stale_threshold / 2))
{
node.stats.add (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale, stale_elections.size ());
for (auto const & election : stale_elections)
{
node.logger.debug (nano::log::type::active_elections, "Bootstrapping account: {} with stale election with root: {}, blocks: {} (behavior: {}, state: {}, voters: {}, blocks: {}, duration: {}ms)",
election->account,
election->qualified_root,
fmt::join (election->blocks_hashes (), ", "), // TODO: Lazy eval
to_string (election->behavior ()),
to_string (election->state ()),
election->voter_count (),
election->block_count (),
election->duration ().count ());
node.bootstrap.prioritize (election->account);
}
}
}
bool nano::active_elections::predicate () const
@ -601,6 +573,8 @@ void nano::active_elections::checkup_elections (nano::unique_lock<nano::mutex> &
auto const now = std::chrono::steady_clock::now ();
auto const min_duration = node.network_params.network.aec_loop_interval * 3;
std::deque<std::shared_ptr<nano::election>> stale_elections;
for (auto const & election : all_elections)
{
// Only cancel elections if they have been running for a minimum duration of time
@ -621,6 +595,33 @@ void nano::active_elections::checkup_elections (nano::unique_lock<nano::mutex> &
election->duration ().count ());
}
}
else if (election->duration () > config.stale_threshold)
{
stale_elections.push_back (election);
}
}
node.logger.debug (nano::log::type::active_elections, "Checkup found {} stale elections", stale_elections.size ());
// Notify about stale elections at most once per half stale threshold, avoid too frequent notifications
if (stale_interval.elapse (config.stale_threshold / 2))
{
node.stats.add (nano::stat::type::active_elections, nano::stat::detail::stale, stale_elections.size ());
for (auto const & election : stale_elections)
{
node.logger.debug (nano::log::type::active_elections, "Stale election for account: {} with root: {} blocks: {} (behavior: {}, state: {}, voters: {}, blocks: {}, duration: {}ms)",
election->account,
election->qualified_root,
fmt::join (election->blocks_hashes (), ", "), // TODO: Lazy eval
to_string (election->behavior ()),
to_string (election->state ()),
election->voter_count (),
election->block_count (),
election->duration ().count ());
election_stale.notify (election);
}
}
}

View file

@ -111,6 +111,7 @@ public:
public: // Events
nano::observer_set<> vacancy_updated;
nano::observer_set<std::shared_ptr<nano::election>> election_stale;
private:
bool predicate () const;

View file

@ -18,6 +18,7 @@
#include <nano/node/bucketing.hpp>
#include <nano/node/cementing_set.hpp>
#include <nano/node/daemonconfig.hpp>
#include <nano/node/election.hpp>
#include <nano/node/election_status.hpp>
#include <nano/node/endpoint.hpp>
#include <nano/node/fork_cache.hpp>
@ -217,6 +218,11 @@ nano::node::node (std::shared_ptr<boost::asio::io_context> io_ctx_a, std::filesy
return ledger.weight (rep);
};
// Prioritize bootstrapping accounts with stale elections to find alternative forks
active.election_stale.add ([this] (auto const & election) {
bootstrap.prioritize (election->account);
});
// TODO: Hook this direclty in the schedulers
backlog_scan.batch_activated.add ([this] (auto const & batch) {
auto transaction = ledger.tx_begin_read ();

View file

@ -337,6 +337,8 @@ std::error_code nano::test::system::poll (std::chrono::nanoseconds const & wait_
}
}
std::this_thread::yield ();
std::error_code ec;
if (std::chrono::steady_clock::now () > deadline)
{

View file

@ -57,25 +57,30 @@
}
/** Expects that the condition becomes true within the deadline */
#define EXPECT_TIMELY(time, condition) \
system.deadline_set (time); \
{ \
std::error_code _ec; \
while (!(condition) && !(_ec = system.poll ())) \
{ \
} \
EXPECT_NO_ERROR (_ec); \
#define EXPECT_TIMELY(time, condition) \
system.deadline_set (time); \
{ \
std::error_code _ec; \
while (!_ec && !(condition)) \
{ \
_ec = system.poll (); \
} \
EXPECT_NO_ERROR (_ec); \
}
/*
* Asserts that the `val1 == val2` condition becomes true within the deadline
* Condition must hold for at least 2 consecutive reads
*/
#define ASSERT_TIMELY_EQ(time, val1, val2) \
system.deadline_set (time); \
while (!((val1) == (val2)) && !system.poll ()) \
{ \
} \
#define ASSERT_TIMELY_EQ(time, val1, val2) \
system.deadline_set (time); \
{ \
std::error_code _ec; \
while (!_ec && !((val1) == (val2))) \
{ \
_ec = system.poll (); \
} \
} \
ASSERT_EQ (val1, val2);
/*