Fix intermittent node telemetry test failures (#2576)

This commit is contained in:
Wesley Shillingford 2020-02-20 19:28:58 +00:00 committed by GitHub
commit 8c3ae38f1c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 30 additions and 22 deletions

View file

@ -258,10 +258,11 @@ namespace nano
{
TEST (node_telemetry, basic)
{
nano::system system (2);
auto node_client = system.nodes.front ();
auto node_server = system.nodes.back ();
nano::system system;
nano::node_flags node_flags;
node_flags.disable_ongoing_telemetry_requests = true;
auto node_client = system.add_node (node_flags);
auto node_server = system.add_node (node_flags);
wait_peer_connections (system);
@ -304,8 +305,6 @@ TEST (node_telemetry, basic)
// Wait the cache period and check cache is not used
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
// Arbitrarily change something so that we can confirm different metrics were used
node_server->ledger.cache.block_count = 100;
std::atomic<bool> done{ false };
node_client->telemetry.get_metrics_peers_async ([&done, &all_telemetry_data_time_pairs](nano::telemetry_data_responses const & responses_a) {
@ -448,10 +447,12 @@ namespace nano
{
TEST (node_telemetry, single_request)
{
nano::system system (2);
nano::system system;
nano::node_flags node_flags;
node_flags.disable_ongoing_telemetry_requests = true;
auto node_client = system.nodes.front ();
auto node_server = system.nodes.back ();
auto node_client = system.add_node (node_flags);
auto node_server = system.add_node (node_flags);
wait_peer_connections (system);
@ -702,10 +703,11 @@ TEST (node_telemetry, disconnects)
TEST (node_telemetry, batch_use_single_request_cache)
{
nano::system system (2);
auto node_client = system.nodes.front ();
auto node_server = system.nodes.back ();
nano::system system;
nano::node_flags node_flags;
node_flags.disable_ongoing_telemetry_requests = true;
auto node_client = system.add_node (node_flags);
auto node_server = system.add_node (node_flags);
wait_peer_connections (system);
@ -756,6 +758,8 @@ TEST (node_telemetry, batch_use_single_request_cache)
ASSERT_NO_ERROR (system.poll ());
}
std::this_thread::sleep_for (nano::telemetry_cache_cutoffs::test);
system.deadline_set (10s);
std::atomic<bool> done{ false };
node_client->telemetry.get_metrics_peers_async ([&done, &telemetry_data_time_pair](nano::telemetry_data_responses const & responses_a) {

View file

@ -456,7 +456,7 @@ public:
class telemetry_cache_cutoffs
{
public:
static std::chrono::seconds constexpr test{ 2 };
static std::chrono::seconds constexpr test{ 3 };
static std::chrono::seconds constexpr beta{ 15 };
static std::chrono::seconds constexpr live{ 60 };

View file

@ -126,7 +126,7 @@ gap_cache (*this),
ledger (store, stats, flags_a.generate_cache),
checker (config.signature_checker_threads),
network (*this, config.peering_port),
telemetry (network, alarm, worker),
telemetry (network, alarm, worker, flags.disable_ongoing_telemetry_requests),
bootstrap_initiator (*this),
bootstrap (config.peering_port, *this),
application_path (application_path_a),

View file

@ -126,6 +126,7 @@ public:
bool disable_unchecked_drop{ true };
bool disable_providing_telemetry_metrics{ false };
bool disable_block_processor_unchecked_deletion{ false };
bool disable_ongoing_telemetry_requests{ false };
bool fast_bootstrap{ false };
bool read_only{ false };
nano::confirmation_height_mode confirmation_height_processor_mode{ nano::confirmation_height_mode::automatic };

View file

@ -14,9 +14,7 @@
#include <numeric>
#include <set>
std::chrono::seconds constexpr nano::telemetry_impl::alarm_cutoff;
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
nano::telemetry::telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a) :
network (network_a),
alarm (alarm_a),
worker (worker_a),
@ -59,7 +57,10 @@ batch_request (std::make_shared<nano::telemetry_impl> (network, alarm, worker))
finished_single_requests.clear ();
};
ongoing_req_all_peers ();
if (!disable_ongoing_requests_a)
{
ongoing_req_all_peers ();
}
}
void nano::telemetry::stop ()
@ -286,6 +287,7 @@ size_t nano::telemetry::finished_single_requests_size ()
}
nano::telemetry_impl::telemetry_impl (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a) :
alarm_cutoff (is_sanitizer_build || nano::running_within_valgrind () ? 6 : 3),
network (network_a),
alarm (alarm_a),
worker (worker_a)

View file

@ -68,7 +68,7 @@ private:
nano::network_params network_params;
// Anything older than this requires requesting metrics from other nodes.
std::chrono::seconds const cache_cutoff{ nano::telemetry_cache_cutoffs::network_to_time (network_params.network) };
static std::chrono::seconds constexpr alarm_cutoff{ 3 };
std::chrono::seconds const alarm_cutoff;
// All data in this chunk is protected by this mutex
std::mutex mutex;
@ -116,7 +116,7 @@ std::unique_ptr<nano::container_info_component> collect_container_info (telemetr
class telemetry
{
public:
telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a);
telemetry (nano::network & network_a, nano::alarm & alarm_a, nano::worker & worker_a, bool disable_ongoing_requests_a);
/*
* Add telemetry metrics received from this endpoint.
@ -188,6 +188,7 @@ private:
void ongoing_req_all_peers ();
friend class node_telemetry_multiple_single_request_clearing_Test;
friend class node_telemetry_ongoing_requests_Test;
friend std::unique_ptr<container_info_component> collect_container_info (telemetry &, const std::string &);
};

View file

@ -908,7 +908,7 @@ TEST (node_telemetry, ongoing_requests)
// Wait till the next ongoing will be called, and add a 1s buffer for the actual processing
auto time = std::chrono::steady_clock::now ();
while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + nano::telemetry_impl::alarm_cutoff + 1s))
while (std::chrono::steady_clock::now () < (time + nano::telemetry_cache_cutoffs::test + node_client->telemetry.batch_request->alarm_cutoff + 1s))
{
ASSERT_NO_ERROR (system.poll ());
}