Fix intermittent node telemetry test failures (#2576)
This commit is contained in:
		
					parent
					
						
							
								ae3c3e396a
							
						
					
				
			
			
				commit
				
					
						8c3ae38f1c
					
				
			
		
					 7 changed files with 30 additions and 22 deletions
				
			
		|  | @ -258,10 +258,11 @@ namespace nano | |||
| { | ||||
| TEST (node_telemetry, basic) | ||||
| { | ||||
| 	nano::system system (2); | ||||
| 
 | ||||
| 	auto node_client = system.nodes.front (); | ||||
| 	auto node_server = system.nodes.back (); | ||||
| 	nano::system system; | ||||
| 	nano::node_flags node_flags; | ||||
| 	node_flags.disable_ongoing_telemetry_requests = true; | ||||
| 	auto node_client = system.add_node (node_flags); | ||||
| 	auto node_server = system.add_node (node_flags); | ||||
| 
 | ||||
| 	wait_peer_connections (system); | ||||
| 
 | ||||
|  | @ -304,8 +305,6 @@ TEST (node_telemetry, basic) | |||
| 
 | ||||
| 	// Wait the cache period and check cache is not used
 | ||||
| 	std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test); | ||||
| 	// Arbitrarily change something so that we can confirm different metrics were used
 | ||||
| 	node_server->ledger.cache.block_count = 100; | ||||
| 
 | ||||
| 	std::atomic<bool> done{ false }; | ||||
| 	node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) { | ||||
|  | @ -448,10 +447,12 @@ namespace nano | |||
| { | ||||
| TEST (node_telemetry, single_request) | ||||
| { | ||||
| 	nano::system system (2); | ||||
| 	nano::system system; | ||||
| 	nano::node_flags node_flags; | ||||
| 	node_flags.disable_ongoing_telemetry_requests = true; | ||||
| 
 | ||||
| 	auto node_client = system.nodes.front (); | ||||
| 	auto node_server = system.nodes.back (); | ||||
| 	auto node_client = system.add_node (node_flags); | ||||
| 	auto node_server = system.add_node (node_flags); | ||||
| 
 | ||||
| 	wait_peer_connections (system); | ||||
| 
 | ||||
|  | @ -702,10 +703,11 @@ TEST (node_telemetry, disconnects) | |||
| 
 | ||||
| TEST (node_telemetry, batch_use_single_request_cache) | ||||
| { | ||||
| 	nano::system system (2); | ||||
| 
 | ||||
| 	auto node_client = system.nodes.front (); | ||||
| 	auto node_server = system.nodes.back (); | ||||
| 	nano::system system; | ||||
| 	nano::node_flags node_flags; | ||||
| 	node_flags.disable_ongoing_telemetry_requests = true; | ||||
| 	auto node_client = system.add_node (node_flags); | ||||
| 	auto node_server = system.add_node (node_flags); | ||||
| 
 | ||||
| 	wait_peer_connections (system); | ||||
| 
 | ||||
|  | @ -756,6 +758,8 @@ TEST (node_telemetry, batch_use_single_request_cache) | |||
| 		ASSERT_NO_ERROR (system.poll ()); | ||||
| 	} | ||||
| 
 | ||||
| 	std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test); | ||||
| 
 | ||||
| 	system.deadline_set (10s); | ||||
| 	std::atomic<bool> done{ false }; | ||||
| 	node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) { | ||||
|  |  | |||
|  | @ -456,7 +456,7 @@ public: | |||
| class telemetry_cache_cutoffs | ||||
| { | ||||
| public: | ||||
| 	static std::chrono::seconds constexpr test{ 2 }; | ||||
| 	static std::chrono::seconds constexpr test{ 3 }; | ||||
| 	static std::chrono::seconds constexpr beta{ 15 }; | ||||
| 	static std::chrono::seconds constexpr live{ 60 }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -126,7 +126,7 @@ gap_cache (*this), | |||
| ledger (store, stats, flags_a.generate_cache), | ||||
| checker (config.signature_checker_threads), | ||||
| network (*this, config.peering_port), | ||||
| telemetry (network, alarm, worker), | ||||
| telemetry (network, alarm, worker, flags.disable_ongoing_telemetry_requests), | ||||
| bootstrap_initiator (*this), | ||||
| bootstrap (config.peering_port, *this), | ||||
| application_path (application_path_a), | ||||
|  |  | |||
|  | @ -126,6 +126,7 @@ public: | |||
| 	bool disable_unchecked_drop{ true }; | ||||
| 	bool disable_providing_telemetry_metrics{ false }; | ||||
| 	bool disable_block_processor_unchecked_deletion{ false }; | ||||
| 	bool disable_ongoing_telemetry_requests{ false }; | ||||
| 	bool fast_bootstrap{ false }; | ||||
| 	bool read_only{ false }; | ||||
| 	nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic }; | ||||
|  |  | |||
|  | @ -14,9 +14,7 @@ | |||
| #include <numeric> | ||||
| #include <set> | ||||
| 
 | ||||
| std::chrono::seconds constexpr nano::telemetry_impl::alarm_cutoff; | ||||
| 
 | ||||
| nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) : | ||||
| nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a) : | ||||
| network (network_a), | ||||
| alarm (alarm_a), | ||||
| worker (worker_a), | ||||
|  | @ -59,7 +57,10 @@ batch_request (std::make_shared<nano::telemetry_impl> (network, alarm, worker)) | |||
| 		finished_single_requests.clear (); | ||||
| 	}; | ||||
| 
 | ||||
| 	ongoing_req_all_peers (); | ||||
| 	if (!disable_ongoing_requests_a) | ||||
| 	{ | ||||
| 		ongoing_req_all_peers (); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void nano::telemetry::stop () | ||||
|  | @ -286,6 +287,7 @@ size_t nano::telemetry::finished_single_requests_size () | |||
| } | ||||
| 
 | ||||
| nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) : | ||||
| alarm_cutoff (is_sanitizer_build || nano::running_within_valgrind () ? 6 : 3), | ||||
| network (network_a), | ||||
| alarm (alarm_a), | ||||
| worker (worker_a) | ||||
|  |  | |||
|  | @ -68,7 +68,7 @@ private: | |||
| 	nano::network_params network_params; | ||||
| 	// Anything older than this requires requesting metrics from other nodes.
 | ||||
| 	std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) }; | ||||
| 	static std::chrono::seconds constexpr alarm_cutoff{ 3 }; | ||||
| 	std::chrono::seconds const alarm_cutoff; | ||||
| 
 | ||||
| 	// All data in this chunk is protected by this mutex
 | ||||
| 	std::mutex mutex; | ||||
|  | @ -116,7 +116,7 @@ std::unique_ptr<nano::container_info_component> collect_container_info (telemetr | |||
| class telemetry | ||||
| { | ||||
| public: | ||||
| 	telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a); | ||||
| 	telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Add telemetry metrics received from this endpoint. | ||||
|  | @ -188,6 +188,7 @@ private: | |||
| 	void ongoing_req_all_peers (); | ||||
| 
 | ||||
| 	friend class node_telemetry_multiple_single_request_clearing_Test; | ||||
| 	friend class node_telemetry_ongoing_requests_Test; | ||||
| 	friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &); | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -908,7 +908,7 @@ TEST (node_telemetry, ongoing_requests) | |||
| 
 | ||||
| 	// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
 | ||||
| 	auto time = std::chrono::steady_clock::now (); | ||||
| 	while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + nano::telemetry_impl::alarm_cutoff + 1s)) | ||||
| 	while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + node_client->telemetry.batch_request->alarm_cutoff + 1s)) | ||||
| 	{ | ||||
| 		ASSERT_NO_ERROR (system.poll ()); | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Wesley Shillingford
				Wesley Shillingford