Increase server timeout to receive TCP header (#2022)

- increase server timeout to receive TCP header to prevent TCP realtime network server disconnections
- increase idle timeout for bootstrap client & realtime TCP sockets
- make idle_timeout node_constant
This commit is contained in:
Sergey Kroshnin 2019-05-25 22:16:05 +03:00 committed by GitHub
commit 9e7d2c0fd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 31 additions and 31 deletions

View file

@ -2109,7 +2109,6 @@ TEST (bootstrap, tcp_listener_timeout_empty)
{
nano::system system (24000, 1);
auto node0 (system.nodes[0]);
node0->config.tcp_idle_timeout = std::chrono::seconds (1);
auto socket (std::make_shared<nano::socket> (node0));
std::atomic<bool> connected (false);
socket->async_connect (node0->bootstrap.endpoint (), [&connected](boost::system::error_code const & ec) {
@ -2137,7 +2136,6 @@ TEST (bootstrap, tcp_listener_timeout_node_id_handshake)
{
nano::system system (24000, 1);
auto node0 (system.nodes[0]);
node0->config.tcp_idle_timeout = std::chrono::seconds (1);
auto socket (std::make_shared<nano::socket> (node0));
auto cookie (node0->network.tcp_channels.assign_syn_cookie (node0->bootstrap.endpoint ()));
nano::node_id_handshake node_id_handshake (cookie, boost::none);

View file

@ -701,7 +701,6 @@ TEST (node_config, v16_v17_upgrade)
config.logging.init (path);
// These config options should not be present
ASSERT_FALSE (tree.get_optional_child ("tcp_io_timeout"));
ASSERT_FALSE (tree.get_optional_child ("tcp_idle_timeout"));
ASSERT_FALSE (tree.get_optional_child ("pow_sleep_interval"));
ASSERT_FALSE (tree.get_optional_child ("external_address"));
ASSERT_FALSE (tree.get_optional_child ("external_port"));
@ -711,7 +710,6 @@ TEST (node_config, v16_v17_upgrade)
config.deserialize_json (upgraded, tree);
// The config options should be added after the upgrade
ASSERT_TRUE (!!tree.get_optional_child ("tcp_io_timeout"));
ASSERT_TRUE (!!tree.get_optional_child ("tcp_idle_timeout"));
ASSERT_TRUE (!!tree.get_optional_child ("pow_sleep_interval"));
ASSERT_TRUE (!!tree.get_optional_child ("external_address"));
ASSERT_TRUE (!!tree.get_optional_child ("external_port"));
@ -738,7 +736,6 @@ TEST (node_config, v17_values)
// Check config is correct
{
tree.put ("tcp_io_timeout", 1);
tree.put ("tcp_idle_timeout", 0);
tree.put ("pow_sleep_interval", 0);
tree.put ("external_address", "::1");
tree.put ("external_port", 0);
@ -756,7 +753,6 @@ TEST (node_config, v17_values)
config.deserialize_json (upgraded, tree);
ASSERT_FALSE (upgraded);
ASSERT_EQ (config.tcp_io_timeout.count (), 1);
ASSERT_EQ (config.tcp_idle_timeout.count (), 0);
ASSERT_EQ (config.pow_sleep_interval.count (), 0);
ASSERT_EQ (config.external_address, boost::asio::ip::address_v6::from_string ("::1"));
ASSERT_EQ (config.external_port, 0);
@ -768,7 +764,6 @@ TEST (node_config, v17_values)
// Check config is correct with other values
tree.put ("tcp_io_timeout", std::numeric_limits<unsigned long>::max () - 100);
tree.put ("tcp_idle_timeout", std::numeric_limits<unsigned>::max ());
tree.put ("pow_sleep_interval", std::numeric_limits<unsigned long>::max () - 100);
tree.put ("external_address", "::ffff:192.168.1.1");
tree.put ("external_port", std::numeric_limits<uint16_t>::max () - 1);
@ -786,7 +781,6 @@ TEST (node_config, v17_values)
config.deserialize_json (upgraded, tree);
ASSERT_FALSE (upgraded);
ASSERT_EQ (config.tcp_io_timeout.count (), std::numeric_limits<unsigned long>::max () - 100);
ASSERT_EQ (config.tcp_idle_timeout.count (), std::numeric_limits<unsigned>::max ());
ASSERT_EQ (config.pow_sleep_interval.count (), std::numeric_limits<unsigned long>::max () - 100);
ASSERT_EQ (config.external_address, boost::asio::ip::address_v6::from_string ("::ffff:192.168.1.1"));
ASSERT_EQ (config.external_port, std::numeric_limits<uint16_t>::max () - 1);

View file

@ -1162,6 +1162,9 @@ void nano::bootstrap_attempt::pool_connection (std::shared_ptr<nano::bootstrap_c
std::lock_guard<std::mutex> lock (mutex);
if (!stopped && !client_a->pending_stop)
{
// Idle bootstrap client socket
client_a->channel->socket->start_timer (node->network_params.node.idle_timeout);
// Push into idle deque
idle.push_front (client_a);
}
condition.notify_all ();
@ -1930,6 +1933,8 @@ node (node_a)
void nano::bootstrap_server::receive ()
{
// Increase timeout to receive TCP header (idle server socket)
socket->set_timeout (node->network_params.node.idle_timeout);
auto this_l (shared_from_this ());
socket->async_read (receive_buffer, 8, [this_l](boost::system::error_code const & ec, size_t size_a) {
// Set remote_endpoint
@ -1937,6 +1942,8 @@ void nano::bootstrap_server::receive ()
{
this_l->remote_endpoint = this_l->socket->remote_endpoint ();
}
// Decrease timeout to default
this_l->socket->set_timeout (this_l->node->config.tcp_io_timeout);
// Receive header
this_l->receive_header_action (ec, size_a);
});

View file

@ -115,7 +115,6 @@ nano::error nano::node_config::serialize_json (nano::jsonconfig & json) const
json.put ("vote_minimum", vote_minimum.to_string_dec ());
json.put ("unchecked_cutoff_time", unchecked_cutoff_time.count ());
json.put ("tcp_io_timeout", tcp_io_timeout.count ());
json.put ("tcp_idle_timeout", tcp_idle_timeout.count ());
json.put ("pow_sleep_interval", pow_sleep_interval.count ());
json.put ("external_address", external_address.to_string ());
json.put ("external_port", external_port);
@ -240,7 +239,6 @@ bool nano::node_config::upgrade_json (unsigned version_a, nano::jsonconfig & jso
diagnostics_config.serialize_json (diagnostics_l);
json.put_child ("diagnostics", diagnostics_l);
json.put ("tcp_io_timeout", tcp_io_timeout.count ());
json.put ("tcp_idle_timeout", tcp_idle_timeout.count ());
json.put (pow_sleep_interval_key, pow_sleep_interval.count ());
json.put ("external_address", external_address.to_string ());
json.put ("external_port", external_port);
@ -349,9 +347,6 @@ nano::error nano::node_config::deserialize_json (bool & upgraded_a, nano::jsonco
auto tcp_io_timeout_l = static_cast<unsigned long> (tcp_io_timeout.count ());
json.get ("tcp_io_timeout", tcp_io_timeout_l);
tcp_io_timeout = std::chrono::seconds (tcp_io_timeout_l);
auto tcp_idle_timeout_l = static_cast<unsigned long> (tcp_idle_timeout.count ());
json.get ("tcp_idle_timeout", tcp_idle_timeout_l);
tcp_idle_timeout = std::chrono::seconds (tcp_idle_timeout_l);
auto ipc_config_l (json.get_optional_child ("ipc"));
if (ipc_config_l)

View file

@ -64,8 +64,6 @@ public:
std::chrono::seconds unchecked_cutoff_time{ std::chrono::seconds (4 * 60 * 60) }; // 4 hours
/** Timeout for initiated async operations */
std::chrono::seconds tcp_io_timeout{ network_params.network.is_test_network () ? std::chrono::seconds (5) : std::chrono::seconds (15) };
/** Default maximum idle time for a socket before it's automatically closed */
std::chrono::seconds tcp_idle_timeout{ std::chrono::minutes (2) };
std::chrono::nanoseconds pow_sleep_interval{ 0 };
/** Default maximum incoming TCP connections, including realtime network & bootstrap */
unsigned tcp_incoming_connections_max{ 1024 };

View file

@ -3,18 +3,18 @@
#include <limits>
nano::socket::socket (std::shared_ptr<nano::node> node_a, boost::optional<std::chrono::seconds> max_idle_time_a, nano::socket::concurrency concurrency_a) :
nano::socket::socket (std::shared_ptr<nano::node> node_a, boost::optional<std::chrono::seconds> io_timeout_a, nano::socket::concurrency concurrency_a) :
strand (node_a->io_ctx.get_executor ()),
tcp_socket (node_a->io_ctx),
node (node_a),
writer_concurrency (concurrency_a),
next_deadline (std::numeric_limits<uint64_t>::max ()),
last_completion_time (0),
max_idle_time (max_idle_time_a)
io_timeout (io_timeout_a)
{
if (!max_idle_time)
if (!io_timeout)
{
max_idle_time = node_a->config.tcp_idle_timeout;
io_timeout = node_a->config.tcp_io_timeout;
}
}
@ -125,6 +125,11 @@ void nano::socket::write_queued_messages ()
{
this_l->write_queued_messages ();
}
else if (this_l->send_queue.empty ())
{
// Idle TCP realtime client socket after writes
this_l->start_timer (node->network_params.node.idle_timeout);
}
}
}
}
@ -136,7 +141,7 @@ void nano::socket::start_timer ()
{
if (auto node_l = node.lock ())
{
start_timer (node_l->config.tcp_io_timeout);
start_timer (io_timeout.get ());
}
}
@ -185,11 +190,11 @@ bool nano::socket::has_timed_out () const
return timed_out;
}
void nano::socket::set_max_idle_timeout (std::chrono::seconds max_idle_time_a)
void nano::socket::set_timeout (std::chrono::seconds io_timeout_a)
{
auto this_l (shared_from_this ());
boost::asio::dispatch (strand, boost::asio::bind_executor (strand, [this_l, max_idle_time_a]() {
this_l->max_idle_time = max_idle_time_a;
boost::asio::dispatch (strand, boost::asio::bind_executor (strand, [this_l, io_timeout_a]() {
this_l->io_timeout = io_timeout_a;
}));
}
@ -207,7 +212,7 @@ void nano::socket::close_internal ()
if (!closed)
{
closed = true;
max_idle_time = boost::none;
io_timeout = boost::none;
boost::system::error_code ec;
// Ignore error code for shutdown as it is best-effort
@ -292,7 +297,7 @@ void nano::server_socket::on_connection (std::function<bool(std::shared_ptr<nano
// Make sure the new connection doesn't idle. Note that in most cases, the callback is going to start
// an IO operation immediately, which will start a timer.
new_connection->checkup ();
new_connection->start_timer (node_l->network_params.network.is_test_network () ? std::chrono::seconds (2) : node_l->config.tcp_idle_timeout);
new_connection->start_timer (node_l->network_params.network.is_test_network () ? std::chrono::seconds (2) : node_l->network_params.node.idle_timeout);
node_l->stats.inc (nano::stat::type::tcp, nano::stat::detail::tcp_accept_success, nano::stat::dir::in);
this_l->connections.push_back (new_connection);
this_l->evict_dead_connections ();

View file

@ -35,10 +35,10 @@ public:
/**
* Constructor
* @param node Owning node
* @param max_idle_time If no activity occurs within the idle time, the socket is closed. If not set, the tcp_idle_time config option is used.
* @param io_timeout If tcp async operation is not completed within the timeout, the socket is closed. If not set, the tcp_io_timeout config option is used.
* @param concurrency write concurrency
*/
explicit socket (std::shared_ptr<nano::node> node, boost::optional<std::chrono::seconds> max_idle_time = boost::none, concurrency = concurrency::single_writer);
explicit socket (std::shared_ptr<nano::node> node, boost::optional<std::chrono::seconds> io_timeout = boost::none, concurrency = concurrency::single_writer);
virtual ~socket ();
void async_connect (boost::asio::ip::tcp::endpoint const &, std::function<void(boost::system::error_code const &)>);
void async_read (std::shared_ptr<std::vector<uint8_t>>, size_t, std::function<void(boost::system::error_code const &, size_t)>);
@ -49,7 +49,8 @@ public:
/** Returns true if the socket has timed out */
bool has_timed_out () const;
/** This can be called to change the maximum idle time, e.g. based on the type of traffic detected. */
void set_max_idle_timeout (std::chrono::seconds max_idle_time_a);
void set_timeout (std::chrono::seconds io_timeout_a);
void start_timer (std::chrono::seconds deadline_a);
/** Change write concurrent */
void set_writer_concurrency (concurrency writer_concurrency_a);
@ -75,14 +76,13 @@ protected:
std::atomic<uint64_t> next_deadline;
std::atomic<uint64_t> last_completion_time;
std::atomic<bool> timed_out{ false };
boost::optional<std::chrono::seconds> max_idle_time;
boost::optional<std::chrono::seconds> io_timeout;
/** Set by close() - completion handlers must check this. This is more reliable than checking
error codes as the OS may have already completed the async operation. */
std::atomic<bool> closed{ false };
void close_internal ();
void write_queued_messages ();
void start_timer (std::chrono::seconds deadline_a);
void start_timer ();
void stop_timer ();
void checkup ();

View file

@ -465,7 +465,7 @@ void nano::transport::tcp_channels::ongoing_keepalive ()
channel->send (message);
}
std::weak_ptr<nano::node> node_w (node.shared ());
node.alarm.add (std::chrono::steady_clock::now () + node.network_params.node.period, [node_w]() {
node.alarm.add (std::chrono::steady_clock::now () + node.network_params.node.period / 2, [node_w]() {
if (auto node_l = node_w.lock ())
{
node_l->network.tcp_channels.ongoing_keepalive ();

View file

@ -103,6 +103,7 @@ nano::random_constants::random_constants ()
nano::node_constants::node_constants (nano::network_constants & network_constants)
{
period = network_constants.is_test_network () ? std::chrono::seconds (1) : std::chrono::seconds (60);
idle_timeout = network_constants.is_test_network () ? period * 15 : period * 2;
cutoff = period * 5;
syn_cookie_cutoff = std::chrono::seconds (5);
backup_interval = std::chrono::minutes (5);

View file

@ -348,6 +348,8 @@ class node_constants
public:
node_constants (nano::network_constants & network_constants);
std::chrono::seconds period;
/** Default maximum idle time for a socket before it's automatically closed */
std::chrono::seconds idle_timeout;
std::chrono::seconds cutoff;
std::chrono::seconds syn_cookie_cutoff;
std::chrono::minutes backup_interval;