Blockprocessor stall workaround (#4240)
This commit is contained in:
parent
b1c1d88394
commit
b2ab8cf413
8 changed files with 67 additions and 2 deletions
|
@ -38,4 +38,33 @@ TEST (block_processor, broadcast_block_on_arrival)
|
|||
node1->process_active (send1);
|
||||
// Checks whether the block was broadcast.
|
||||
ASSERT_TIMELY (5s, node2->ledger.block_or_pruned_exists (send1->hash ()));
|
||||
}
|
||||
|
||||
TEST (block_processor, add_blocking_invalid_block)
|
||||
{
|
||||
nano::test::system system;
|
||||
|
||||
nano::node_config config = system.default_config ();
|
||||
config.block_process_timeout = std::chrono::seconds{ 1 };
|
||||
auto & node = *system.add_node (config);
|
||||
|
||||
nano::state_block_builder builder;
|
||||
auto send1 = builder.make_block ()
|
||||
.account (nano::dev::genesis_key.pub)
|
||||
.previous (nano::dev::genesis->hash ())
|
||||
.representative (nano::dev::genesis_key.pub)
|
||||
.balance (nano::dev::constants.genesis_amount - nano::Gxrb_ratio)
|
||||
.link (nano::dev::genesis_key.pub)
|
||||
.sign (nano::dev::genesis_key.prv, nano::dev::genesis_key.pub)
|
||||
.work (*system.work.generate (nano::dev::genesis->hash ()))
|
||||
.build_shared ();
|
||||
|
||||
send1->signature.clear ();
|
||||
|
||||
auto background = std::async (std::launch::async, [&] () {
|
||||
return node.process_local (send1);
|
||||
});
|
||||
|
||||
ASSERT_TIMELY (5s, background.wait_for (std::chrono::seconds (0)) == std::future_status::ready);
|
||||
ASSERT_FALSE (background.get ().has_value ());
|
||||
}
|
|
@ -155,6 +155,7 @@ TEST (toml, daemon_config_deserialize_defaults)
|
|||
ASSERT_EQ (conf.node.bootstrap_bandwidth_limit, defaults.node.bootstrap_bandwidth_limit);
|
||||
ASSERT_EQ (conf.node.bootstrap_bandwidth_burst_ratio, defaults.node.bootstrap_bandwidth_burst_ratio);
|
||||
ASSERT_EQ (conf.node.block_processor_batch_max_time, defaults.node.block_processor_batch_max_time);
|
||||
ASSERT_EQ (conf.node.block_process_timeout, defaults.node.block_process_timeout);
|
||||
ASSERT_EQ (conf.node.bootstrap_connections, defaults.node.bootstrap_connections);
|
||||
ASSERT_EQ (conf.node.bootstrap_connections_max, defaults.node.bootstrap_connections_max);
|
||||
ASSERT_EQ (conf.node.bootstrap_initiator_threads, defaults.node.bootstrap_initiator_threads);
|
||||
|
@ -169,6 +170,7 @@ TEST (toml, daemon_config_deserialize_defaults)
|
|||
ASSERT_EQ (conf.node.io_threads, defaults.node.io_threads);
|
||||
ASSERT_EQ (conf.node.max_work_generate_multiplier, defaults.node.max_work_generate_multiplier);
|
||||
ASSERT_EQ (conf.node.network_threads, defaults.node.network_threads);
|
||||
ASSERT_EQ (conf.node.background_threads, defaults.node.background_threads);
|
||||
ASSERT_EQ (conf.node.secondary_work_peers, defaults.node.secondary_work_peers);
|
||||
ASSERT_EQ (conf.node.online_weight_minimum, defaults.node.online_weight_minimum);
|
||||
ASSERT_EQ (conf.node.rep_crawler_weight_minimum, defaults.node.rep_crawler_weight_minimum);
|
||||
|
@ -403,6 +405,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
|
|||
bootstrap_bandwidth_limit = 999
|
||||
bootstrap_bandwidth_burst_ratio = 999.9
|
||||
block_processor_batch_max_time = 999
|
||||
block_process_timeout = 999
|
||||
bootstrap_connections = 999
|
||||
bootstrap_connections_max = 999
|
||||
bootstrap_initiator_threads = 999
|
||||
|
@ -417,6 +420,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
|
|||
io_threads = 999
|
||||
lmdb_max_dbs = 999
|
||||
network_threads = 999
|
||||
background_threads = 999
|
||||
online_weight_minimum = "999"
|
||||
rep_crawler_weight_minimum = "999"
|
||||
election_hint_weight_percent = 19
|
||||
|
@ -578,6 +582,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
|
|||
ASSERT_NE (conf.node.bootstrap_bandwidth_limit, defaults.node.bootstrap_bandwidth_limit);
|
||||
ASSERT_NE (conf.node.bootstrap_bandwidth_burst_ratio, defaults.node.bootstrap_bandwidth_burst_ratio);
|
||||
ASSERT_NE (conf.node.block_processor_batch_max_time, defaults.node.block_processor_batch_max_time);
|
||||
ASSERT_NE (conf.node.block_process_timeout, defaults.node.block_process_timeout);
|
||||
ASSERT_NE (conf.node.bootstrap_connections, defaults.node.bootstrap_connections);
|
||||
ASSERT_NE (conf.node.bootstrap_connections_max, defaults.node.bootstrap_connections_max);
|
||||
ASSERT_NE (conf.node.bootstrap_initiator_threads, defaults.node.bootstrap_initiator_threads);
|
||||
|
@ -593,6 +598,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
|
|||
ASSERT_NE (conf.node.max_work_generate_multiplier, defaults.node.max_work_generate_multiplier);
|
||||
ASSERT_NE (conf.node.frontiers_confirmation, defaults.node.frontiers_confirmation);
|
||||
ASSERT_NE (conf.node.network_threads, defaults.node.network_threads);
|
||||
ASSERT_NE (conf.node.background_threads, defaults.node.background_threads);
|
||||
ASSERT_NE (conf.node.secondary_work_peers, defaults.node.secondary_work_peers);
|
||||
ASSERT_NE (conf.node.max_pruning_age, defaults.node.max_pruning_age);
|
||||
ASSERT_NE (conf.node.max_pruning_depth, defaults.node.max_pruning_depth);
|
||||
|
|
|
@ -50,3 +50,13 @@ bool nano::blocking_observer::exists (std::shared_ptr<nano::block> block)
|
|||
auto existing = blocking.find (block);
|
||||
return existing != blocking.end ();
|
||||
}
|
||||
|
||||
void nano::blocking_observer::erase (std::shared_ptr<nano::block> block)
|
||||
{
|
||||
nano::lock_guard<nano::mutex> lock{ mutex };
|
||||
auto existing = blocking.find (block);
|
||||
if (existing != blocking.end ())
|
||||
{
|
||||
blocking.erase (existing);
|
||||
}
|
||||
}
|
|
@ -22,6 +22,7 @@ public:
|
|||
void observe (nano::process_return const & result, std::shared_ptr<nano::block> block);
|
||||
[[nodiscard]] std::future<nano::process_return> insert (std::shared_ptr<nano::block> block);
|
||||
bool exists (std::shared_ptr<nano::block> block);
|
||||
void erase (std::shared_ptr<nano::block> block);
|
||||
|
||||
private:
|
||||
std::unordered_multimap<std::shared_ptr<nano::block>, std::promise<nano::process_return>> blocking;
|
||||
|
|
|
@ -106,7 +106,16 @@ std::optional<nano::process_return> nano::block_processor::add_blocking (std::sh
|
|||
std::optional<nano::process_return> result;
|
||||
try
|
||||
{
|
||||
result = future.get ();
|
||||
auto status = future.wait_for (node.config.block_process_timeout);
|
||||
debug_assert (status != std::future_status::deferred);
|
||||
if (status == std::future_status::ready)
|
||||
{
|
||||
result = future.get ();
|
||||
}
|
||||
else
|
||||
{
|
||||
blocking.erase (block);
|
||||
}
|
||||
}
|
||||
catch (std::future_error const &)
|
||||
{
|
||||
|
|
|
@ -148,7 +148,7 @@ nano::node::node (boost::asio::io_context & io_ctx_a, boost::filesystem::path co
|
|||
config (config_a),
|
||||
network_params{ config.network_params },
|
||||
stats (config.stats_config),
|
||||
workers (std::max (3u, config.io_threads / 4), nano::thread_role::name::worker),
|
||||
workers{ config.background_threads, nano::thread_role::name::worker },
|
||||
bootstrap_workers{ config.bootstrap_serving_threads, nano::thread_role::name::bootstrap_worker },
|
||||
flags (flags_a),
|
||||
work (work_a),
|
||||
|
|
|
@ -94,6 +94,7 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const
|
|||
toml.put ("io_threads", io_threads, "Number of threads dedicated to I/O operations. Defaults to the number of CPU threads, and at least 4.\ntype:uint64");
|
||||
toml.put ("network_threads", network_threads, "Number of threads dedicated to processing network messages. Defaults to the number of CPU threads, and at least 4.\ntype:uint64");
|
||||
toml.put ("work_threads", work_threads, "Number of threads dedicated to CPU generated work. Defaults to all available CPU threads.\ntype:uint64");
|
||||
toml.put ("background_threads", background_threads, "Number of threads dedicated to background node work, including handling of RPC requests. Defaults to all available CPU threads.\ntype:uint64");
|
||||
toml.put ("signature_checker_threads", signature_checker_threads, "Number of additional threads dedicated to signature verification. Defaults to number of CPU threads / 2.\ntype:uint64");
|
||||
toml.put ("enable_voting", enable_voting, "Enable or disable voting. Enabling this option requires additional system resources, namely increased CPU, bandwidth and disk usage.\ntype:bool");
|
||||
toml.put ("bootstrap_connections", bootstrap_connections, "Number of outbound bootstrap connections. Must be a power of 2. Defaults to 4.\nWarning: a larger amount of connections may use substantially more system memory.\ntype:uint64");
|
||||
|
@ -102,6 +103,7 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const
|
|||
toml.put ("bootstrap_serving_threads", bootstrap_serving_threads, "Number of threads dedicated to serving bootstrap data to other peers. Defaults to half the number of CPU threads, and at least 2.\ntype:uint64");
|
||||
toml.put ("bootstrap_frontier_request_count", bootstrap_frontier_request_count, "Number frontiers per bootstrap frontier request. Defaults to 1048576.\ntype:uint32,[1024..4294967295]");
|
||||
toml.put ("block_processor_batch_max_time", block_processor_batch_max_time.count (), "The maximum time the block processor can continuously process blocks for.\ntype:milliseconds");
|
||||
toml.put ("block_process_timeout", block_process_timeout.count (), "Time to wait for block processing result.\ntype:seconds");
|
||||
toml.put ("allow_local_peers", allow_local_peers, "Enable or disable local host peering.\ntype:bool");
|
||||
toml.put ("vote_minimum", vote_minimum.to_string_dec (), "Local representatives do not vote if the delegated weight is under this threshold. Saves on system resources.\ntype:string,amount,raw");
|
||||
toml.put ("vote_generator_delay", vote_generator_delay.count (), "Delay before votes are sent to allow for efficient bundling of hashes in votes.\ntype:milliseconds");
|
||||
|
@ -339,6 +341,10 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml)
|
|||
toml.get ("block_processor_batch_max_time", block_processor_batch_max_time_l);
|
||||
block_processor_batch_max_time = std::chrono::milliseconds (block_processor_batch_max_time_l);
|
||||
|
||||
auto block_process_timeout_l = block_process_timeout.count ();
|
||||
toml.get ("block_process_timeout", block_process_timeout_l);
|
||||
block_process_timeout = std::chrono::seconds{ block_process_timeout_l };
|
||||
|
||||
auto unchecked_cutoff_time_l = static_cast<unsigned long> (unchecked_cutoff_time.count ());
|
||||
toml.get ("unchecked_cutoff_time", unchecked_cutoff_time_l);
|
||||
unchecked_cutoff_time = std::chrono::seconds (unchecked_cutoff_time_l);
|
||||
|
@ -360,6 +366,7 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml)
|
|||
toml.get<unsigned> ("io_threads", io_threads);
|
||||
toml.get<unsigned> ("work_threads", work_threads);
|
||||
toml.get<unsigned> ("network_threads", network_threads);
|
||||
toml.get<unsigned> ("background_threads", background_threads);
|
||||
toml.get<unsigned> ("bootstrap_connections", bootstrap_connections);
|
||||
toml.get<unsigned> ("bootstrap_connections_max", bootstrap_connections_max);
|
||||
toml.get<unsigned> ("bootstrap_initiator_threads", bootstrap_initiator_threads);
|
||||
|
|
|
@ -64,6 +64,7 @@ public:
|
|||
unsigned io_threads{ std::max (4u, nano::hardware_concurrency ()) };
|
||||
unsigned network_threads{ std::max (4u, nano::hardware_concurrency ()) };
|
||||
unsigned work_threads{ std::max (4u, nano::hardware_concurrency ()) };
|
||||
unsigned background_threads{ std::max (4u, nano::hardware_concurrency ()) };
|
||||
/* Use half available threads on the system for signature checking. The calling thread does checks as well, so these are extra worker threads */
|
||||
unsigned signature_checker_threads{ std::max (2u, nano::hardware_concurrency () / 2) };
|
||||
bool enable_voting{ false };
|
||||
|
@ -84,6 +85,8 @@ public:
|
|||
std::string external_address;
|
||||
uint16_t external_port{ 0 };
|
||||
std::chrono::milliseconds block_processor_batch_max_time{ std::chrono::milliseconds (500) };
|
||||
/** Time to wait for block processing result */
|
||||
std::chrono::seconds block_process_timeout{ 15 };
|
||||
std::chrono::seconds unchecked_cutoff_time{ std::chrono::seconds (4 * 60 * 60) }; // 4 hours
|
||||
/** Timeout for initiated async operations */
|
||||
std::chrono::seconds tcp_io_timeout{ (network_params.network.is_dev_network () && !is_sanitizer_build ()) ? std::chrono::seconds (5) : std::chrono::seconds (15) };
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue