From ccd3affbb5239e5d0c42902c424badca9ddeb189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Wo=CC=81jcik?= <3044353+pwojcikdev@users.noreply.github.com> Date: Sun, 7 Sep 2025 12:26:55 +0200 Subject: [PATCH] Fix stale election notification --- nano/core_test/active_elections.cpp | 65 ++++++++++++++++++++++++++--- nano/lib/stats_enums.hpp | 1 + nano/node/active_elections.cpp | 57 ++++++++++++------------- nano/node/active_elections.hpp | 1 + nano/node/node.cpp | 6 +++ nano/test_common/system.cpp | 2 + nano/test_common/testutil.hpp | 31 ++++++++------ 7 files changed, 117 insertions(+), 46 deletions(-) diff --git a/nano/core_test/active_elections.cpp b/nano/core_test/active_elections.cpp index ef6b1f228..bf319a828 100644 --- a/nano/core_test/active_elections.cpp +++ b/nano/core_test/active_elections.cpp @@ -1580,13 +1580,13 @@ TEST (active_elections, broadcast_block_on_activation) ASSERT_TIMELY (5s, node2->block_or_pruned_exists (send1->hash ())); } -TEST (active_elections, bootstrap_stale) +TEST (active_elections, stale_election) { nano::test::system system; // Configure node with short stale threshold for testing nano::node_config node_config = system.default_config (); - node_config.active_elections.bootstrap_stale_threshold = 2s; // Short threshold for faster testing + node_config.active_elections.stale_threshold = 2s; // Short threshold for faster testing auto & node = *system.add_node (node_config); @@ -1603,6 +1603,12 @@ TEST (active_elections, bootstrap_stale) .work (*system.work.generate (nano::dev::genesis->hash ())) .build (); + std::atomic stale_detected{ false }; + node.active.election_stale.add ([&] (auto const & election) { + EXPECT_EQ (send->qualified_root (), election->qualified_root); + stale_detected = true; + }); + // Process the block and start an election node.process_active (send); @@ -1611,10 +1617,59 @@ TEST (active_elections, bootstrap_stale) ASSERT_TIMELY (5s, (election = node.active.election (send->qualified_root ())) != nullptr); // Check initial state - ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale)); + ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale)); - // Wait for bootstrap_stale_threshold to pass and the statistic to be incremented - ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale) > 0); + // Wait for stale_threshold to pass and stats to be incremented + ASSERT_TIMELY (5s, stale_detected); + ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale) > 0); +} + +TEST (active_elections, stale_election_multiple) +{ + nano::test::system system; + + // Configure node with short stale threshold for testing + nano::node_config node_config = system.default_config (); + node_config.active_elections.stale_threshold = 2s; // Short threshold for faster testing + + auto & node = *system.add_node (node_config); + + // Create 10 independent blocks that will each have their own election + auto blocks = nano::test::setup_independent_blocks (system, node, 10); + + // Track which elections had stale events fired + nano::locked> stale_detected; + + node.active.election_stale.add ([&] (auto const & election) { + stale_detected.lock ()->insert (election->qualified_root); + }); + + // Start elections for all blocks + ASSERT_TRUE (nano::test::start_elections (system, node, blocks)); + + // Ensure all elections are active + ASSERT_TIMELY_EQ (5s, node.active.size (), blocks.size ()); + for (auto const & block : blocks) + { + ASSERT_TRUE (node.active.active (block->qualified_root ())); + } + + // Check initial state + ASSERT_EQ (0, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale)); + + // Wait for stale_threshold to pass (2s) plus some buffer + // The stale event should fire for ALL elections that are stale + ASSERT_TIMELY (5s, node.stats.count (nano::stat::type::active_elections, nano::stat::detail::stale) >= blocks.size ()); + + // Check that all elections had their stale event fired + ASSERT_TIMELY_EQ (5s, blocks.size (), stale_detected.lock ()->size ()); + + // Verify each block's election was marked as stale + for (auto const & block : blocks) + { + ASSERT_TRUE (stale_detected.lock ()->count (block->qualified_root ()) > 0) + << "Election for block " << block->hash ().to_string () << " was not marked as stale"; + } } TEST (active_elections, transition_optimistic_to_priority) diff --git a/nano/lib/stats_enums.hpp b/nano/lib/stats_enums.hpp index a8c7d3fc8..1542205c3 100644 --- a/nano/lib/stats_enums.hpp +++ b/nano/lib/stats_enums.hpp @@ -519,6 +519,7 @@ enum class detail cancel_dependent, cancel_checkup, forks_cached, + stale, bootstrap_stale, // unchecked diff --git a/nano/node/active_elections.cpp b/nano/node/active_elections.cpp index 0a0688387..0eb7f69a9 100644 --- a/nano/node/active_elections.cpp +++ b/nano/node/active_elections.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -507,9 +506,6 @@ void nano::active_elections::tick_elections (nano::unique_lock & lo nano::confirmation_solicitor solicitor (node.network, node.config); solicitor.prepare (node.rep_crawler.principal_representatives (std::numeric_limits::max ())); - nano::timer elapsed (nano::timer_state::started); - - std::deque> stale_elections; for (auto const & election : election_list) { bool tick_result = election->tick (solicitor); @@ -517,33 +513,9 @@ void nano::active_elections::tick_elections (nano::unique_lock & lo { erase (election->qualified_root); } - else if (election->duration () > config.stale_threshold) - { - stale_elections.push_back (election); - } } solicitor.flush (); - - if (stale_interval.elapse (config.stale_threshold / 2)) - { - node.stats.add (nano::stat::type::active_elections, nano::stat::detail::bootstrap_stale, stale_elections.size ()); - - for (auto const & election : stale_elections) - { - node.logger.debug (nano::log::type::active_elections, "Bootstrapping account: {} with stale election with root: {}, blocks: {} (behavior: {}, state: {}, voters: {}, blocks: {}, duration: {}ms)", - election->account, - election->qualified_root, - fmt::join (election->blocks_hashes (), ", "), // TODO: Lazy eval - to_string (election->behavior ()), - to_string (election->state ()), - election->voter_count (), - election->block_count (), - election->duration ().count ()); - - node.bootstrap.prioritize (election->account); - } - } } bool nano::active_elections::predicate () const @@ -601,6 +573,8 @@ void nano::active_elections::checkup_elections (nano::unique_lock & auto const now = std::chrono::steady_clock::now (); auto const min_duration = node.network_params.network.aec_loop_interval * 3; + std::deque> stale_elections; + for (auto const & election : all_elections) { // Only cancel elections if they have been running for a minimum duration of time @@ -621,6 +595,33 @@ void nano::active_elections::checkup_elections (nano::unique_lock & election->duration ().count ()); } } + else if (election->duration () > config.stale_threshold) + { + stale_elections.push_back (election); + } + } + + node.logger.debug (nano::log::type::active_elections, "Checkup found {} stale elections", stale_elections.size ()); + + // Notify about stale elections at most once per half stale threshold, avoid too frequent notifications + if (stale_interval.elapse (config.stale_threshold / 2)) + { + node.stats.add (nano::stat::type::active_elections, nano::stat::detail::stale, stale_elections.size ()); + + for (auto const & election : stale_elections) + { + node.logger.debug (nano::log::type::active_elections, "Stale election for account: {} with root: {} blocks: {} (behavior: {}, state: {}, voters: {}, blocks: {}, duration: {}ms)", + election->account, + election->qualified_root, + fmt::join (election->blocks_hashes (), ", "), // TODO: Lazy eval + to_string (election->behavior ()), + to_string (election->state ()), + election->voter_count (), + election->block_count (), + election->duration ().count ()); + + election_stale.notify (election); + } } } diff --git a/nano/node/active_elections.hpp b/nano/node/active_elections.hpp index f82792542..2ab810728 100644 --- a/nano/node/active_elections.hpp +++ b/nano/node/active_elections.hpp @@ -111,6 +111,7 @@ public: public: // Events nano::observer_set<> vacancy_updated; + nano::observer_set> election_stale; private: bool predicate () const; diff --git a/nano/node/node.cpp b/nano/node/node.cpp index d0616746c..98edb92b4 100644 --- a/nano/node/node.cpp +++ b/nano/node/node.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -217,6 +218,11 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy return ledger.weight (rep); }; + // Prioritize bootstrapping accounts with stale elections to find alternative forks + active.election_stale.add ([this] (auto const & election) { + bootstrap.prioritize (election->account); + }); + // TODO: Hook this direclty in the schedulers backlog_scan.batch_activated.add ([this] (auto const & batch) { auto transaction = ledger.tx_begin_read (); diff --git a/nano/test_common/system.cpp b/nano/test_common/system.cpp index 5a947ac92..3bdc8a485 100644 --- a/nano/test_common/system.cpp +++ b/nano/test_common/system.cpp @@ -337,6 +337,8 @@ std::error_code nano::test::system::poll (std::chrono::nanoseconds const & wait_ } } + std::this_thread::yield (); + std::error_code ec; if (std::chrono::steady_clock::now () > deadline) { diff --git a/nano/test_common/testutil.hpp b/nano/test_common/testutil.hpp index 7dc58f0b1..ddf3e162f 100644 --- a/nano/test_common/testutil.hpp +++ b/nano/test_common/testutil.hpp @@ -57,25 +57,30 @@ } /** Expects that the condition becomes true within the deadline */ -#define EXPECT_TIMELY(time, condition) \ - system.deadline_set (time); \ - { \ - std::error_code _ec; \ - while (!(condition) && !(_ec = system.poll ())) \ - { \ - } \ - EXPECT_NO_ERROR (_ec); \ +#define EXPECT_TIMELY(time, condition) \ + system.deadline_set (time); \ + { \ + std::error_code _ec; \ + while (!_ec && !(condition)) \ + { \ + _ec = system.poll (); \ + } \ + EXPECT_NO_ERROR (_ec); \ } /* * Asserts that the `val1 == val2` condition becomes true within the deadline * Condition must hold for at least 2 consecutive reads */ -#define ASSERT_TIMELY_EQ(time, val1, val2) \ - system.deadline_set (time); \ - while (!((val1) == (val2)) && !system.poll ()) \ - { \ - } \ +#define ASSERT_TIMELY_EQ(time, val1, val2) \ + system.deadline_set (time); \ + { \ + std::error_code _ec; \ + while (!_ec && !((val1) == (val2))) \ + { \ + _ec = system.poll (); \ + } \ + } \ ASSERT_EQ (val1, val2); /*