Fix vote_generator loop inconsistency during high load (#2095)

* Fix vote_generator loop inconsistency during high load. * Increase deadline for bulk_pull_account.basics test * Revert "Increase deadline for bulk_pull_account.basics test" This reverts commit 4404c4c5f49b2f12eb3d0ea3b27aaf6a0338e696. * Simplify config option * Use a flag as the wait predicate to prevent getting stuck with hashes between threshold and 11
2019-06-27 01:17:35 +01:00 · 2019-06-27 01:17:35 +01:00 · d404dcf3d0
commit d404dcf3d0
parent 305d2bab34
5 changed files with 35 additions and 29 deletions
--- a/nano/core_test/node.cpp
+++ b/nano/core_test/node.cpp
@ -716,6 +716,7 @@ TEST (node_config, v16_v17_upgrade)
 	ASSERT_FALSE (tree.get_optional_child ("external_port"));
 	ASSERT_FALSE (tree.get_optional_child ("tcp_incoming_connections_max"));
 	ASSERT_FALSE (tree.get_optional_child ("vote_generator_delay"));
+	ASSERT_FALSE (tree.get_optional_child ("vote_generator_threshold"));
 	ASSERT_FALSE (tree.get_optional_child ("diagnostics"));
 	ASSERT_FALSE (tree.get_optional_child ("use_memory_pools"));
 	ASSERT_FALSE (tree.get_optional_child ("confirmation_history_size"));
@ -731,6 +732,7 @@ TEST (node_config, v16_v17_upgrade)
 	ASSERT_TRUE (!!tree.get_optional_child ("external_port"));
 	ASSERT_TRUE (!!tree.get_optional_child ("tcp_incoming_connections_max"));
 	ASSERT_TRUE (!!tree.get_optional_child ("vote_generator_delay"));
+	ASSERT_TRUE (!!tree.get_optional_child ("vote_generator_threshold"));
 	ASSERT_TRUE (!!tree.get_optional_child ("diagnostics"));
 	ASSERT_TRUE (!!tree.get_optional_child ("use_memory_pools"));
 	ASSERT_TRUE (!!tree.get_optional_child ("confirmation_history_size"));
@ -763,6 +765,7 @@ TEST (node_config, v17_values)
 		tree.put ("external_port", 0);
 		tree.put ("tcp_incoming_connections_max", 1);
 		tree.put ("vote_generator_delay", 50);
+		tree.put ("vote_generator_threshold", 3);
 		nano::jsonconfig txn_tracking_l;
 		txn_tracking_l.put ("enable", false);
 		txn_tracking_l.put ("min_read_txn_time", 0);
@ -802,6 +805,7 @@ TEST (node_config, v17_values)
 	tree.put ("external_port", std::numeric_limits<uint16_t>::max () - 1);
 	tree.put ("tcp_incoming_connections_max", std::numeric_limits<unsigned>::max ());
 	tree.put ("vote_generator_delay", std::numeric_limits<unsigned long>::max () - 100);
+	tree.put ("vote_generator_threshold", 10);
 	nano::jsonconfig txn_tracking_l;
 	txn_tracking_l.put ("enable", true);
 	txn_tracking_l.put ("min_read_txn_time", 1234);
@ -825,6 +829,7 @@ TEST (node_config, v17_values)
 	ASSERT_EQ (config.external_port, std::numeric_limits<uint16_t>::max () - 1);
 	ASSERT_EQ (config.tcp_incoming_connections_max, std::numeric_limits<unsigned>::max ());
 	ASSERT_EQ (config.vote_generator_delay.count (), std::numeric_limits<unsigned long>::max () - 100);
+	ASSERT_EQ (config.vote_generator_threshold, 10);
 	ASSERT_TRUE (config.diagnostics_config.txn_tracking.enable);
 	ASSERT_EQ (config.diagnostics_config.txn_tracking.min_read_txn_time.count (), 1234);
 	ASSERT_EQ (config.tcp_incoming_connections_max, std::numeric_limits<unsigned>::max ());
@ -2862,7 +2867,7 @@ TEST (confirmation_height, prioritize_frontiers)
 	nano::send_block send5 (send4.hash (), key3.pub, node->config.online_weight_minimum.number () + 6500, nano::test_genesis_key.prv, nano::test_genesis_key.pub, system.work.generate (send4.hash ()));
 	nano::send_block send6 (send5.hash (), key4.pub, node->config.online_weight_minimum.number () + 6000, nano::test_genesis_key.prv, nano::test_genesis_key.pub, system.work.generate (send5.hash ()));

-	// Open all accounts and add other sends to get different uncemented counts (as well as some which are the same) 
+	// Open all accounts and add other sends to get different uncemented counts (as well as some which are the same)
 	nano::open_block open1 (send1.hash (), nano::genesis_account, key1.pub, key1.prv, key1.pub, system.work.generate (key1.pub));
 	nano::send_block send7 (open1.hash (), nano::test_genesis_key.pub, 500, key1.prv, key1.pub, system.work.generate (open1.hash ()));

--- a/nano/node/nodeconfig.cpp
+++ b/nano/node/nodeconfig.cpp
@ -114,6 +114,7 @@ nano::error nano::node_config::serialize_json (nano::jsonconfig & json) const
 	json.put ("allow_local_peers", allow_local_peers);
 	json.put ("vote_minimum", vote_minimum.to_string_dec ());
 	json.put ("vote_generator_delay", vote_generator_delay.count ());
+	json.put ("vote_generator_threshold", vote_generator_threshold);
 	json.put ("unchecked_cutoff_time", unchecked_cutoff_time.count ());
 	json.put ("tcp_io_timeout", tcp_io_timeout.count ());
 	json.put ("pow_sleep_interval", pow_sleep_interval.count ());
@ -249,6 +250,7 @@ bool nano::node_config::upgrade_json (unsigned version_a, nano::jsonconfig & jso
 			json.put ("external_port", external_port);
 			json.put ("tcp_incoming_connections_max", tcp_incoming_connections_max);
 			json.put ("vote_generator_delay", vote_generator_delay.count ());
+			json.put ("vote_generator_threshold", vote_generator_threshold);
 			json.put ("use_memory_pools", use_memory_pools);
 			json.put ("confirmation_history_size", confirmation_history_size);
 			json.put ("active_elections_size", active_elections_size);
@ -353,6 +355,8 @@ nano::error nano::node_config::deserialize_json (bool & upgraded_a, nano::jsonco
 		json.get<unsigned long> ("vote_generator_delay", delay_l);
 		vote_generator_delay = std::chrono::milliseconds (delay_l);

+		json.get<unsigned> ("vote_generator_threshold", vote_generator_threshold);
+
 		auto block_processor_batch_max_time_l (json.get<unsigned long> ("block_processor_batch_max_time"));
 		block_processor_batch_max_time = std::chrono::milliseconds (block_processor_batch_max_time_l);
 		auto unchecked_cutoff_time_l = static_cast<unsigned long> (unchecked_cutoff_time.count ());
@ -418,7 +422,7 @@ nano::error nano::node_config::deserialize_json (bool & upgraded_a, nano::jsonco
 		}
 		if (password_fanout < 16 || password_fanout > 1024 * 1024)
 		{
-			json.get_error ().set ("password_fanout must a number between 16 and 1048576");
+			json.get_error ().set ("password_fanout must be a number between 16 and 1048576");
 		}
 		if (io_threads == 0)
 		{
@ -432,6 +436,10 @@ nano::error nano::node_config::deserialize_json (bool & upgraded_a, nano::jsonco
 		{
 			json.get_error ().set ("bandwidth_limit unbounded = 0, default = 1572864, max = 18446744073709551615");
 		}
+		if (vote_generator_threshold < 1 || vote_generator_threshold > 12)
+		{
+			json.get_error ().set ("vote_generator_threshold must be a number between 1 and 12");
+		}
 	}
 	catch (std::runtime_error const & ex)
 	{
--- a/nano/node/nodeconfig.hpp
+++ b/nano/node/nodeconfig.hpp
@ -38,6 +38,7 @@ public:
 	nano::amount receive_minimum{ nano::xrb_ratio };
 	nano::amount vote_minimum{ nano::Gxrb_ratio };
 	std::chrono::milliseconds vote_generator_delay{ std::chrono::milliseconds (50) };
+	unsigned vote_generator_threshold{ 3 };
 	nano::amount online_weight_minimum{ 60000 * nano::Gxrb_ratio };
 	unsigned online_weight_quorum{ 50 };
 	unsigned password_fanout{ 1024 };
--- a/nano/node/voting.cpp
+++ b/nano/node/voting.cpp
@ -5,8 +5,6 @@

 nano::vote_generator::vote_generator (nano::node & node_a) :
 node (node_a),
-stopped (false),
-started (false),
 thread ([this]() { run (); })
 {
 	std::unique_lock<std::mutex> lock (mutex);
@ -18,17 +16,22 @@ thread ([this]() { run (); })

 void nano::vote_generator::add (nano::block_hash const & hash_a)
 {
+	std::unique_lock<std::mutex> lock (mutex);
+	hashes.push_back (hash_a);
+	if (hashes.size () >= node.config.vote_generator_threshold)
 	{
-		std::lock_guard<std::mutex> lock (mutex);
-		hashes.push_back (hash_a);
+		// Potentially high load, notify to wait for more hashes
+		wakeup = true;
+		lock.unlock ();
+		condition.notify_all ();
 	}
-	condition.notify_all ();
 }

 void nano::vote_generator::stop ()
 {
 	std::unique_lock<std::mutex> lock (mutex);
 	stopped = true;
+	wakeup = true;

 	lock.unlock ();
 	condition.notify_all ();
@ -68,34 +71,22 @@ void nano::vote_generator::run ()
 	lock.unlock ();
 	condition.notify_all ();
 	lock.lock ();
-	auto min (std::numeric_limits<std::chrono::steady_clock::time_point>::min ());
-	auto cutoff (min);
 	while (!stopped)
 	{
-		auto now (std::chrono::steady_clock::now ());
 		if (hashes.size () >= 12)
 		{
 			send (lock);
 		}
-		else if (cutoff == min) // && hashes.size () < 12
+		else
 		{
-			cutoff = now + node.config.vote_generator_delay;
-			condition.wait_until (lock, cutoff);
-		}
-		else if (now < cutoff) // && hashes.size () < 12
-		{
-			condition.wait_until (lock, cutoff);
-		}
-		else // now >= cutoff && hashes.size () < 12
-		{
-			cutoff = min;
-			if (!hashes.empty ())
+			wakeup = false;
+			if (!condition.wait_for (lock, node.config.vote_generator_delay, [this]() { return this->wakeup; }))
 			{
-				send (lock);
-			}
-			else
-			{
-				condition.wait (lock);
+				// Did not wake up early. Likely not under high load, ok to send lower number of hashes
+				if (!hashes.empty ())
+				{
+					send (lock);
+				}
 			}
 		}
 	}
--- a/nano/node/voting.hpp
+++ b/nano/node/voting.hpp
@ -34,8 +34,9 @@ private:
 	std::condition_variable condition;
 	std::deque<nano::block_hash> hashes;
 	nano::network_params network_params;
-	bool stopped;
-	bool started;
+	bool stopped{ false };
+	bool started{ false };
+	bool wakeup{ false };
 	boost::thread thread;

 	friend std::unique_ptr<seq_con_info_component> collect_seq_con_info (vote_generator & vote_generator, const std::string & name);