Fix intermittent node telemetry test failures (#2576)
This commit is contained in:
parent
ae3c3e396a
commit
8c3ae38f1c
7 changed files with 30 additions and 22 deletions
|
|
@ -258,10 +258,11 @@ namespace nano
|
||||||
{
|
{
|
||||||
TEST (node_telemetry, basic)
|
TEST (node_telemetry, basic)
|
||||||
{
|
{
|
||||||
nano::system system (2);
|
nano::system system;
|
||||||
|
nano::node_flags node_flags;
|
||||||
auto node_client = system.nodes.front ();
|
node_flags.disable_ongoing_telemetry_requests = true;
|
||||||
auto node_server = system.nodes.back ();
|
auto node_client = system.add_node (node_flags);
|
||||||
|
auto node_server = system.add_node (node_flags);
|
||||||
|
|
||||||
wait_peer_connections (system);
|
wait_peer_connections (system);
|
||||||
|
|
||||||
|
|
@ -304,8 +305,6 @@ TEST (node_telemetry, basic)
|
||||||
|
|
||||||
// Wait the cache period and check cache is not used
|
// Wait the cache period and check cache is not used
|
||||||
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
|
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
|
||||||
// Arbitrarily change something so that we can confirm different metrics were used
|
|
||||||
node_server->ledger.cache.block_count = 100;
|
|
||||||
|
|
||||||
std::atomic<bool> done{ false };
|
std::atomic<bool> done{ false };
|
||||||
node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) {
|
node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) {
|
||||||
|
|
@ -448,10 +447,12 @@ namespace nano
|
||||||
{
|
{
|
||||||
TEST (node_telemetry, single_request)
|
TEST (node_telemetry, single_request)
|
||||||
{
|
{
|
||||||
nano::system system (2);
|
nano::system system;
|
||||||
|
nano::node_flags node_flags;
|
||||||
|
node_flags.disable_ongoing_telemetry_requests = true;
|
||||||
|
|
||||||
auto node_client = system.nodes.front ();
|
auto node_client = system.add_node (node_flags);
|
||||||
auto node_server = system.nodes.back ();
|
auto node_server = system.add_node (node_flags);
|
||||||
|
|
||||||
wait_peer_connections (system);
|
wait_peer_connections (system);
|
||||||
|
|
||||||
|
|
@ -702,10 +703,11 @@ TEST (node_telemetry, disconnects)
|
||||||
|
|
||||||
TEST (node_telemetry, batch_use_single_request_cache)
|
TEST (node_telemetry, batch_use_single_request_cache)
|
||||||
{
|
{
|
||||||
nano::system system (2);
|
nano::system system;
|
||||||
|
nano::node_flags node_flags;
|
||||||
auto node_client = system.nodes.front ();
|
node_flags.disable_ongoing_telemetry_requests = true;
|
||||||
auto node_server = system.nodes.back ();
|
auto node_client = system.add_node (node_flags);
|
||||||
|
auto node_server = system.add_node (node_flags);
|
||||||
|
|
||||||
wait_peer_connections (system);
|
wait_peer_connections (system);
|
||||||
|
|
||||||
|
|
@ -756,6 +758,8 @@ TEST (node_telemetry, batch_use_single_request_cache)
|
||||||
ASSERT_NO_ERROR (system.poll ());
|
ASSERT_NO_ERROR (system.poll ());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
|
||||||
|
|
||||||
system.deadline_set (10s);
|
system.deadline_set (10s);
|
||||||
std::atomic<bool> done{ false };
|
std::atomic<bool> done{ false };
|
||||||
node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) {
|
node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) {
|
||||||
|
|
|
||||||
|
|
@ -456,7 +456,7 @@ public:
|
||||||
class telemetry_cache_cutoffs
|
class telemetry_cache_cutoffs
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static std::chrono::seconds constexpr test{ 2 };
|
static std::chrono::seconds constexpr test{ 3 };
|
||||||
static std::chrono::seconds constexpr beta{ 15 };
|
static std::chrono::seconds constexpr beta{ 15 };
|
||||||
static std::chrono::seconds constexpr live{ 60 };
|
static std::chrono::seconds constexpr live{ 60 };
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,7 @@ gap_cache (*this),
|
||||||
ledger (store, stats, flags_a.generate_cache),
|
ledger (store, stats, flags_a.generate_cache),
|
||||||
checker (config.signature_checker_threads),
|
checker (config.signature_checker_threads),
|
||||||
network (*this, config.peering_port),
|
network (*this, config.peering_port),
|
||||||
telemetry (network, alarm, worker),
|
telemetry (network, alarm, worker, flags.disable_ongoing_telemetry_requests),
|
||||||
bootstrap_initiator (*this),
|
bootstrap_initiator (*this),
|
||||||
bootstrap (config.peering_port, *this),
|
bootstrap (config.peering_port, *this),
|
||||||
application_path (application_path_a),
|
application_path (application_path_a),
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,7 @@ public:
|
||||||
bool disable_unchecked_drop{ true };
|
bool disable_unchecked_drop{ true };
|
||||||
bool disable_providing_telemetry_metrics{ false };
|
bool disable_providing_telemetry_metrics{ false };
|
||||||
bool disable_block_processor_unchecked_deletion{ false };
|
bool disable_block_processor_unchecked_deletion{ false };
|
||||||
|
bool disable_ongoing_telemetry_requests{ false };
|
||||||
bool fast_bootstrap{ false };
|
bool fast_bootstrap{ false };
|
||||||
bool read_only{ false };
|
bool read_only{ false };
|
||||||
nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic };
|
nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic };
|
||||||
|
|
|
||||||
|
|
@ -14,9 +14,7 @@
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
std::chrono::seconds constexpr nano::telemetry_impl::alarm_cutoff;
|
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a) :
|
||||||
|
|
||||||
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
|
|
||||||
network (network_a),
|
network (network_a),
|
||||||
alarm (alarm_a),
|
alarm (alarm_a),
|
||||||
worker (worker_a),
|
worker (worker_a),
|
||||||
|
|
@ -59,7 +57,10 @@ batch_request (std::make_shared<nano::telemetry_impl> (network, alarm, worker))
|
||||||
finished_single_requests.clear ();
|
finished_single_requests.clear ();
|
||||||
};
|
};
|
||||||
|
|
||||||
ongoing_req_all_peers ();
|
if (!disable_ongoing_requests_a)
|
||||||
|
{
|
||||||
|
ongoing_req_all_peers ();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void nano::telemetry::stop ()
|
void nano::telemetry::stop ()
|
||||||
|
|
@ -286,6 +287,7 @@ size_t nano::telemetry::finished_single_requests_size ()
|
||||||
}
|
}
|
||||||
|
|
||||||
nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
|
nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
|
||||||
|
alarm_cutoff (is_sanitizer_build || nano::running_within_valgrind () ? 6 : 3),
|
||||||
network (network_a),
|
network (network_a),
|
||||||
alarm (alarm_a),
|
alarm (alarm_a),
|
||||||
worker (worker_a)
|
worker (worker_a)
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ private:
|
||||||
nano::network_params network_params;
|
nano::network_params network_params;
|
||||||
// Anything older than this requires requesting metrics from other nodes.
|
// Anything older than this requires requesting metrics from other nodes.
|
||||||
std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) };
|
std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) };
|
||||||
static std::chrono::seconds constexpr alarm_cutoff{ 3 };
|
std::chrono::seconds const alarm_cutoff;
|
||||||
|
|
||||||
// All data in this chunk is protected by this mutex
|
// All data in this chunk is protected by this mutex
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
|
@ -116,7 +116,7 @@ std::unique_ptr<nano::container_info_component> collect_container_info (telemetr
|
||||||
class telemetry
|
class telemetry
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a);
|
telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add telemetry metrics received from this endpoint.
|
* Add telemetry metrics received from this endpoint.
|
||||||
|
|
@ -188,6 +188,7 @@ private:
|
||||||
void ongoing_req_all_peers ();
|
void ongoing_req_all_peers ();
|
||||||
|
|
||||||
friend class node_telemetry_multiple_single_request_clearing_Test;
|
friend class node_telemetry_multiple_single_request_clearing_Test;
|
||||||
|
friend class node_telemetry_ongoing_requests_Test;
|
||||||
friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &);
|
friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -908,7 +908,7 @@ TEST (node_telemetry, ongoing_requests)
|
||||||
|
|
||||||
// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
|
// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
|
||||||
auto time = std::chrono::steady_clock::now ();
|
auto time = std::chrono::steady_clock::now ();
|
||||||
while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + nano::telemetry_impl::alarm_cutoff + 1s))
|
while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + node_client->telemetry.batch_request->alarm_cutoff + 1s))
|
||||||
{
|
{
|
||||||
ASSERT_NO_ERROR (system.poll ());
|
ASSERT_NO_ERROR (system.poll ());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue