Fix intermittent node telemetry test failures (#2576)
This commit is contained in:
		
					parent
					
						
							
								ae3c3e396a
							
						
					
				
			
			
				commit
				
					
						8c3ae38f1c
					
				
			
		
					 7 changed files with 30 additions and 22 deletions
				
			
		| 
						 | 
					@ -258,10 +258,11 @@ namespace nano
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
TEST (node_telemetry, basic)
 | 
					TEST (node_telemetry, basic)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	nano::system system (2);
 | 
						nano::system system;
 | 
				
			||||||
 | 
						nano::node_flags node_flags;
 | 
				
			||||||
	auto node_client = system.nodes.front ();
 | 
						node_flags.disable_ongoing_telemetry_requests = true;
 | 
				
			||||||
	auto node_server = system.nodes.back ();
 | 
						auto node_client = system.add_node (node_flags);
 | 
				
			||||||
 | 
						auto node_server = system.add_node (node_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	wait_peer_connections (system);
 | 
						wait_peer_connections (system);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -304,8 +305,6 @@ TEST (node_telemetry, basic)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Wait the cache period and check cache is not used
 | 
						// Wait the cache period and check cache is not used
 | 
				
			||||||
	std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
 | 
						std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
 | 
				
			||||||
	// Arbitrarily change something so that we can confirm different metrics were used
 | 
					 | 
				
			||||||
	node_server->ledger.cache.block_count = 100;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	std::atomic<bool> done{ false };
 | 
						std::atomic<bool> done{ false };
 | 
				
			||||||
	node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) {
 | 
						node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) {
 | 
				
			||||||
| 
						 | 
					@ -448,10 +447,12 @@ namespace nano
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
TEST (node_telemetry, single_request)
 | 
					TEST (node_telemetry, single_request)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	nano::system system (2);
 | 
						nano::system system;
 | 
				
			||||||
 | 
						nano::node_flags node_flags;
 | 
				
			||||||
 | 
						node_flags.disable_ongoing_telemetry_requests = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	auto node_client = system.nodes.front ();
 | 
						auto node_client = system.add_node (node_flags);
 | 
				
			||||||
	auto node_server = system.nodes.back ();
 | 
						auto node_server = system.add_node (node_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	wait_peer_connections (system);
 | 
						wait_peer_connections (system);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -702,10 +703,11 @@ TEST (node_telemetry, disconnects)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TEST (node_telemetry, batch_use_single_request_cache)
 | 
					TEST (node_telemetry, batch_use_single_request_cache)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	nano::system system (2);
 | 
						nano::system system;
 | 
				
			||||||
 | 
						nano::node_flags node_flags;
 | 
				
			||||||
	auto node_client = system.nodes.front ();
 | 
						node_flags.disable_ongoing_telemetry_requests = true;
 | 
				
			||||||
	auto node_server = system.nodes.back ();
 | 
						auto node_client = system.add_node (node_flags);
 | 
				
			||||||
 | 
						auto node_server = system.add_node (node_flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	wait_peer_connections (system);
 | 
						wait_peer_connections (system);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -756,6 +758,8 @@ TEST (node_telemetry, batch_use_single_request_cache)
 | 
				
			||||||
		ASSERT_NO_ERROR (system.poll ());
 | 
							ASSERT_NO_ERROR (system.poll ());
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	system.deadline_set (10s);
 | 
						system.deadline_set (10s);
 | 
				
			||||||
	std::atomic<bool> done{ false };
 | 
						std::atomic<bool> done{ false };
 | 
				
			||||||
	node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) {
 | 
						node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -456,7 +456,7 @@ public:
 | 
				
			||||||
class telemetry_cache_cutoffs
 | 
					class telemetry_cache_cutoffs
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
	static std::chrono::seconds constexpr test{ 2 };
 | 
						static std::chrono::seconds constexpr test{ 3 };
 | 
				
			||||||
	static std::chrono::seconds constexpr beta{ 15 };
 | 
						static std::chrono::seconds constexpr beta{ 15 };
 | 
				
			||||||
	static std::chrono::seconds constexpr live{ 60 };
 | 
						static std::chrono::seconds constexpr live{ 60 };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -126,7 +126,7 @@ gap_cache (*this),
 | 
				
			||||||
ledger (store, stats, flags_a.generate_cache),
 | 
					ledger (store, stats, flags_a.generate_cache),
 | 
				
			||||||
checker (config.signature_checker_threads),
 | 
					checker (config.signature_checker_threads),
 | 
				
			||||||
network (*this, config.peering_port),
 | 
					network (*this, config.peering_port),
 | 
				
			||||||
telemetry (network, alarm, worker),
 | 
					telemetry (network, alarm, worker, flags.disable_ongoing_telemetry_requests),
 | 
				
			||||||
bootstrap_initiator (*this),
 | 
					bootstrap_initiator (*this),
 | 
				
			||||||
bootstrap (config.peering_port, *this),
 | 
					bootstrap (config.peering_port, *this),
 | 
				
			||||||
application_path (application_path_a),
 | 
					application_path (application_path_a),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -126,6 +126,7 @@ public:
 | 
				
			||||||
	bool disable_unchecked_drop{ true };
 | 
						bool disable_unchecked_drop{ true };
 | 
				
			||||||
	bool disable_providing_telemetry_metrics{ false };
 | 
						bool disable_providing_telemetry_metrics{ false };
 | 
				
			||||||
	bool disable_block_processor_unchecked_deletion{ false };
 | 
						bool disable_block_processor_unchecked_deletion{ false };
 | 
				
			||||||
 | 
						bool disable_ongoing_telemetry_requests{ false };
 | 
				
			||||||
	bool fast_bootstrap{ false };
 | 
						bool fast_bootstrap{ false };
 | 
				
			||||||
	bool read_only{ false };
 | 
						bool read_only{ false };
 | 
				
			||||||
	nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic };
 | 
						nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic };
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -14,9 +14,7 @@
 | 
				
			||||||
#include <numeric>
 | 
					#include <numeric>
 | 
				
			||||||
#include <set>
 | 
					#include <set>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
std::chrono::seconds constexpr nano::telemetry_impl::alarm_cutoff;
 | 
					nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a) :
 | 
				
			||||||
 | 
					 | 
				
			||||||
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
 | 
					 | 
				
			||||||
network (network_a),
 | 
					network (network_a),
 | 
				
			||||||
alarm (alarm_a),
 | 
					alarm (alarm_a),
 | 
				
			||||||
worker (worker_a),
 | 
					worker (worker_a),
 | 
				
			||||||
| 
						 | 
					@ -59,7 +57,10 @@ batch_request (std::make_shared<nano::telemetry_impl> (network, alarm, worker))
 | 
				
			||||||
		finished_single_requests.clear ();
 | 
							finished_single_requests.clear ();
 | 
				
			||||||
	};
 | 
						};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (!disable_ongoing_requests_a)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
		ongoing_req_all_peers ();
 | 
							ongoing_req_all_peers ();
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void nano::telemetry::stop ()
 | 
					void nano::telemetry::stop ()
 | 
				
			||||||
| 
						 | 
					@ -286,6 +287,7 @@ size_t nano::telemetry::finished_single_requests_size ()
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
 | 
					nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
 | 
				
			||||||
 | 
					alarm_cutoff (is_sanitizer_build || nano::running_within_valgrind () ? 6 : 3),
 | 
				
			||||||
network (network_a),
 | 
					network (network_a),
 | 
				
			||||||
alarm (alarm_a),
 | 
					alarm (alarm_a),
 | 
				
			||||||
worker (worker_a)
 | 
					worker (worker_a)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,7 +68,7 @@ private:
 | 
				
			||||||
	nano::network_params network_params;
 | 
						nano::network_params network_params;
 | 
				
			||||||
	// Anything older than this requires requesting metrics from other nodes.
 | 
						// Anything older than this requires requesting metrics from other nodes.
 | 
				
			||||||
	std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) };
 | 
						std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) };
 | 
				
			||||||
	static std::chrono::seconds constexpr alarm_cutoff{ 3 };
 | 
						std::chrono::seconds const alarm_cutoff;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// All data in this chunk is protected by this mutex
 | 
						// All data in this chunk is protected by this mutex
 | 
				
			||||||
	std::mutex mutex;
 | 
						std::mutex mutex;
 | 
				
			||||||
| 
						 | 
					@ -116,7 +116,7 @@ std::unique_ptr<nano::container_info_component> collect_container_info (telemetr
 | 
				
			||||||
class telemetry
 | 
					class telemetry
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
	telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a);
 | 
						telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Add telemetry metrics received from this endpoint.
 | 
						 * Add telemetry metrics received from this endpoint.
 | 
				
			||||||
| 
						 | 
					@ -188,6 +188,7 @@ private:
 | 
				
			||||||
	void ongoing_req_all_peers ();
 | 
						void ongoing_req_all_peers ();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	friend class node_telemetry_multiple_single_request_clearing_Test;
 | 
						friend class node_telemetry_multiple_single_request_clearing_Test;
 | 
				
			||||||
 | 
						friend class node_telemetry_ongoing_requests_Test;
 | 
				
			||||||
	friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &);
 | 
						friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -908,7 +908,7 @@ TEST (node_telemetry, ongoing_requests)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
 | 
						// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
 | 
				
			||||||
	auto time = std::chrono::steady_clock::now ();
 | 
						auto time = std::chrono::steady_clock::now ();
 | 
				
			||||||
	while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + nano::telemetry_impl::alarm_cutoff + 1s))
 | 
						while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + node_client->telemetry.batch_request->alarm_cutoff + 1s))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		ASSERT_NO_ERROR (system.poll ());
 | 
							ASSERT_NO_ERROR (system.poll ());
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue