Fix intermittent node telemetry test failures (#2576)
This commit is contained in:
parent
ae3c3e396a
commit
8c3ae38f1c
7 changed files with 30 additions and 22 deletions
|
@ -258,10 +258,11 @@ namespace nano
|
|||
{
|
||||
TEST (node_telemetry, basic)
|
||||
{
|
||||
nano::system system (2);
|
||||
|
||||
auto node_client = system.nodes.front ();
|
||||
auto node_server = system.nodes.back ();
|
||||
nano::system system;
|
||||
nano::node_flags node_flags;
|
||||
node_flags.disable_ongoing_telemetry_requests = true;
|
||||
auto node_client = system.add_node (node_flags);
|
||||
auto node_server = system.add_node (node_flags);
|
||||
|
||||
wait_peer_connections (system);
|
||||
|
||||
|
@ -304,8 +305,6 @@ TEST (node_telemetry, basic)
|
|||
|
||||
// Wait the cache period and check cache is not used
|
||||
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
|
||||
// Arbitrarily change something so that we can confirm different metrics were used
|
||||
node_server->ledger.cache.block_count = 100;
|
||||
|
||||
std::atomic<bool> done{ false };
|
||||
node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) {
|
||||
|
@ -448,10 +447,12 @@ namespace nano
|
|||
{
|
||||
TEST (node_telemetry, single_request)
|
||||
{
|
||||
nano::system system (2);
|
||||
nano::system system;
|
||||
nano::node_flags node_flags;
|
||||
node_flags.disable_ongoing_telemetry_requests = true;
|
||||
|
||||
auto node_client = system.nodes.front ();
|
||||
auto node_server = system.nodes.back ();
|
||||
auto node_client = system.add_node (node_flags);
|
||||
auto node_server = system.add_node (node_flags);
|
||||
|
||||
wait_peer_connections (system);
|
||||
|
||||
|
@ -702,10 +703,11 @@ TEST (node_telemetry, disconnects)
|
|||
|
||||
TEST (node_telemetry, batch_use_single_request_cache)
|
||||
{
|
||||
nano::system system (2);
|
||||
|
||||
auto node_client = system.nodes.front ();
|
||||
auto node_server = system.nodes.back ();
|
||||
nano::system system;
|
||||
nano::node_flags node_flags;
|
||||
node_flags.disable_ongoing_telemetry_requests = true;
|
||||
auto node_client = system.add_node (node_flags);
|
||||
auto node_server = system.add_node (node_flags);
|
||||
|
||||
wait_peer_connections (system);
|
||||
|
||||
|
@ -756,6 +758,8 @@ TEST (node_telemetry, batch_use_single_request_cache)
|
|||
ASSERT_NO_ERROR (system.poll ());
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
|
||||
|
||||
system.deadline_set (10s);
|
||||
std::atomic<bool> done{ false };
|
||||
node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) {
|
||||
|
|
|
@ -456,7 +456,7 @@ public:
|
|||
class telemetry_cache_cutoffs
|
||||
{
|
||||
public:
|
||||
static std::chrono::seconds constexpr test{ 2 };
|
||||
static std::chrono::seconds constexpr test{ 3 };
|
||||
static std::chrono::seconds constexpr beta{ 15 };
|
||||
static std::chrono::seconds constexpr live{ 60 };
|
||||
|
||||
|
|
|
@ -126,7 +126,7 @@ gap_cache (*this),
|
|||
ledger (store, stats, flags_a.generate_cache),
|
||||
checker (config.signature_checker_threads),
|
||||
network (*this, config.peering_port),
|
||||
telemetry (network, alarm, worker),
|
||||
telemetry (network, alarm, worker, flags.disable_ongoing_telemetry_requests),
|
||||
bootstrap_initiator (*this),
|
||||
bootstrap (config.peering_port, *this),
|
||||
application_path (application_path_a),
|
||||
|
|
|
@ -126,6 +126,7 @@ public:
|
|||
bool disable_unchecked_drop{ true };
|
||||
bool disable_providing_telemetry_metrics{ false };
|
||||
bool disable_block_processor_unchecked_deletion{ false };
|
||||
bool disable_ongoing_telemetry_requests{ false };
|
||||
bool fast_bootstrap{ false };
|
||||
bool read_only{ false };
|
||||
nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic };
|
||||
|
|
|
@ -14,9 +14,7 @@
|
|||
#include <numeric>
|
||||
#include <set>
|
||||
|
||||
std::chrono::seconds constexpr nano::telemetry_impl::alarm_cutoff;
|
||||
|
||||
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
|
||||
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a) :
|
||||
network (network_a),
|
||||
alarm (alarm_a),
|
||||
worker (worker_a),
|
||||
|
@ -59,7 +57,10 @@ batch_request (std::make_shared<nano::telemetry_impl> (network, alarm, worker))
|
|||
finished_single_requests.clear ();
|
||||
};
|
||||
|
||||
ongoing_req_all_peers ();
|
||||
if (!disable_ongoing_requests_a)
|
||||
{
|
||||
ongoing_req_all_peers ();
|
||||
}
|
||||
}
|
||||
|
||||
void nano::telemetry::stop ()
|
||||
|
@ -286,6 +287,7 @@ size_t nano::telemetry::finished_single_requests_size ()
|
|||
}
|
||||
|
||||
nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
|
||||
alarm_cutoff (is_sanitizer_build || nano::running_within_valgrind () ? 6 : 3),
|
||||
network (network_a),
|
||||
alarm (alarm_a),
|
||||
worker (worker_a)
|
||||
|
|
|
@ -68,7 +68,7 @@ private:
|
|||
nano::network_params network_params;
|
||||
// Anything older than this requires requesting metrics from other nodes.
|
||||
std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) };
|
||||
static std::chrono::seconds constexpr alarm_cutoff{ 3 };
|
||||
std::chrono::seconds const alarm_cutoff;
|
||||
|
||||
// All data in this chunk is protected by this mutex
|
||||
std::mutex mutex;
|
||||
|
@ -116,7 +116,7 @@ std::unique_ptr<nano::container_info_component> collect_container_info (telemetr
|
|||
class telemetry
|
||||
{
|
||||
public:
|
||||
telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a);
|
||||
telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a);
|
||||
|
||||
/*
|
||||
* Add telemetry metrics received from this endpoint.
|
||||
|
@ -188,6 +188,7 @@ private:
|
|||
void ongoing_req_all_peers ();
|
||||
|
||||
friend class node_telemetry_multiple_single_request_clearing_Test;
|
||||
friend class node_telemetry_ongoing_requests_Test;
|
||||
friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &);
|
||||
};
|
||||
|
||||
|
|
|
@ -908,7 +908,7 @@ TEST (node_telemetry, ongoing_requests)
|
|||
|
||||
// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
|
||||
auto time = std::chrono::steady_clock::now ();
|
||||
while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + nano::telemetry_impl::alarm_cutoff + 1s))
|
||||
while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + node_client->telemetry.batch_request->alarm_cutoff + 1s))
|
||||
{
|
||||
ASSERT_NO_ERROR (system.poll ());
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue