Add some RocksDB TOML config options to tune memory usage (#2316)
* Add some rocksdb config options to control memory usage * Fix some comments
This commit is contained in:
parent
b50c9d74f3
commit
58cacfe886
7 changed files with 138 additions and 51 deletions
|
@ -239,6 +239,15 @@ TEST (toml, daemon_config_deserialize_defaults)
|
|||
ASSERT_EQ (conf.node.stat_config.log_samples_filename, defaults.node.stat_config.log_samples_filename);
|
||||
|
||||
ASSERT_EQ (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.bloom_filter_bits, defaults.node.rocksdb_config.bloom_filter_bits);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.block_cache, defaults.node.rocksdb_config.block_cache);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.enable_pipelined_write, defaults.node.rocksdb_config.enable_pipelined_write);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.cache_index_and_filter_blocks, defaults.node.rocksdb_config.cache_index_and_filter_blocks);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.block_size, defaults.node.rocksdb_config.block_size);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.memtable_size, defaults.node.rocksdb_config.memtable_size);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.num_memtables, defaults.node.rocksdb_config.num_memtables);
|
||||
ASSERT_EQ (conf.node.rocksdb_config.total_memtable_size, defaults.node.rocksdb_config.total_memtable_size);
|
||||
}
|
||||
|
||||
TEST (toml, optional_child)
|
||||
|
@ -473,6 +482,15 @@ TEST (toml, daemon_config_deserialize_no_defaults)
|
|||
|
||||
[node.rocksdb]
|
||||
enable = true
|
||||
bloom_filter_bits = 10
|
||||
block_cache = 512
|
||||
io_threads = 99
|
||||
enable_pipelined_write = true
|
||||
cache_index_and_filter_blocks = true
|
||||
block_size = 16
|
||||
memtable_size = 128
|
||||
num_memtables = 3
|
||||
total_memtable_size = 0
|
||||
|
||||
[opencl]
|
||||
device = 999
|
||||
|
@ -603,6 +621,15 @@ TEST (toml, daemon_config_deserialize_no_defaults)
|
|||
ASSERT_NE (conf.node.stat_config.log_samples_filename, defaults.node.stat_config.log_samples_filename);
|
||||
|
||||
ASSERT_NE (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable);
|
||||
ASSERT_NE (conf.node.rocksdb_config.bloom_filter_bits, defaults.node.rocksdb_config.bloom_filter_bits);
|
||||
ASSERT_NE (conf.node.rocksdb_config.block_cache, defaults.node.rocksdb_config.block_cache);
|
||||
ASSERT_NE (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads);
|
||||
ASSERT_NE (conf.node.rocksdb_config.enable_pipelined_write, defaults.node.rocksdb_config.enable_pipelined_write);
|
||||
ASSERT_NE (conf.node.rocksdb_config.cache_index_and_filter_blocks, defaults.node.rocksdb_config.cache_index_and_filter_blocks);
|
||||
ASSERT_NE (conf.node.rocksdb_config.block_size, defaults.node.rocksdb_config.block_size);
|
||||
ASSERT_NE (conf.node.rocksdb_config.memtable_size, defaults.node.rocksdb_config.memtable_size);
|
||||
ASSERT_NE (conf.node.rocksdb_config.num_memtables, defaults.node.rocksdb_config.num_memtables);
|
||||
ASSERT_NE (conf.node.rocksdb_config.total_memtable_size, defaults.node.rocksdb_config.total_memtable_size);
|
||||
}
|
||||
|
||||
/** There should be no required values **/
|
||||
|
|
|
@ -4,11 +4,56 @@
|
|||
nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const
|
||||
{
|
||||
toml.put ("enable", enable, "Whether to use the RocksDB backend for the ledger database\ntype:bool");
|
||||
toml.put ("enable_pipelined_write", enable_pipelined_write, "Whether to use 2 separate write queues for memtable/WAL, true is recommended.\ntype:bool");
|
||||
toml.put ("cache_index_and_filter_blocks", cache_index_and_filter_blocks, "Whether index and filter blocks are stored in block_cache, true is recommended.\ntype:bool");
|
||||
toml.put ("bloom_filter_bits", bloom_filter_bits, "Number of bits to use with a bloom filter. Helps with point reads but uses more memory. 0 disables the bloom filter, 10 is recommended\ntype:uint32");
|
||||
toml.put ("block_cache", block_cache, "Size (MB) of the block cache; A larger number will increase performance of read operations. At least 512MB is recommended.\ntype:uint64");
|
||||
toml.put ("io_threads", io_threads, "Number of threads to use with the background compaction and flushing. Number of hardware threads is recommended.\ntype:uint32");
|
||||
toml.put ("block_size", block_size, "Uncompressed data (KBs) per block. Increasing block size decreases memory usage and space amplification, but increases read amplification. 16 is recommended.\ntype:uint32");
|
||||
toml.put ("num_memtables", num_memtables, "Number of memtables to keep in memory per column family. 2 is the minimum, 3 is recommended.\ntype:uint32");
|
||||
toml.put ("memtable_size", memtable_size, "Amount of memory (MB) to build up before flushing to disk for an individual column family. Large values increase performance. 64 or 128 is recommended\ntype:uint32");
|
||||
toml.put ("total_memtable_size", total_memtable_size, "Total memory (MB) which can be used across all memtables, set to 0 for unconstrained.\ntype:uint32");
|
||||
return toml.get_error ();
|
||||
}
|
||||
|
||||
nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml)
|
||||
{
|
||||
toml.get_optional<bool> ("enable", enable);
|
||||
toml.get_optional<bool> ("enable_pipelined_write", enable_pipelined_write);
|
||||
toml.get_optional<bool> ("cache_index_and_filter_blocks", cache_index_and_filter_blocks);
|
||||
toml.get_optional<unsigned> ("bloom_filter_bits", bloom_filter_bits);
|
||||
toml.get_optional<uint64_t> ("block_cache", block_cache);
|
||||
toml.get_optional<unsigned> ("io_threads", io_threads);
|
||||
toml.get_optional<unsigned> ("block_size", block_size);
|
||||
toml.get_optional<unsigned> ("num_memtables", num_memtables);
|
||||
toml.get_optional<unsigned> ("memtable_size", memtable_size);
|
||||
toml.get_optional<unsigned> ("total_memtable_size", total_memtable_size);
|
||||
|
||||
// Validate ranges
|
||||
if (bloom_filter_bits > 100)
|
||||
{
|
||||
toml.get_error ().set ("bloom_filter_bits is too high");
|
||||
}
|
||||
if (num_memtables < 2)
|
||||
{
|
||||
toml.get_error ().set ("num_memtables must be at least 2");
|
||||
}
|
||||
if (memtable_size == 0)
|
||||
{
|
||||
toml.get_error ().set ("memtable_size must be non-zero");
|
||||
}
|
||||
if ((total_memtable_size < memtable_size * 8) && (total_memtable_size != 0))
|
||||
{
|
||||
toml.get_error ().set ("total_memtable_size should be at least 8 times greater than memtable_size or be set to 0");
|
||||
}
|
||||
if (io_threads == 0)
|
||||
{
|
||||
toml.get_error ().set ("io_threads must be non-zero");
|
||||
}
|
||||
if (block_size == 0)
|
||||
{
|
||||
toml.get_error ().set ("block_size must be non-zero");
|
||||
}
|
||||
|
||||
return toml.get_error ();
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#include <nano/lib/errors.hpp>
|
||||
|
||||
#include <thread>
|
||||
|
||||
namespace nano
|
||||
{
|
||||
class tomlconfig;
|
||||
|
@ -14,5 +16,14 @@ public:
|
|||
nano::error deserialize_toml (nano::tomlconfig & toml_a);
|
||||
|
||||
bool enable{ false };
|
||||
unsigned bloom_filter_bits{ 0 };
|
||||
uint64_t block_cache{ 64 }; // MB
|
||||
unsigned io_threads{ std::thread::hardware_concurrency () };
|
||||
bool enable_pipelined_write{ false };
|
||||
bool cache_index_and_filter_blocks{ false };
|
||||
unsigned block_size{ 4 }; // KB
|
||||
unsigned memtable_size{ 32 }; // MB
|
||||
unsigned num_memtables{ 2 }; // Need a minimum of 2
|
||||
unsigned total_memtable_size{ 512 }; // MB
|
||||
};
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ alarm (alarm_a),
|
|||
work (work_a),
|
||||
distributed_work (*this),
|
||||
logger (config_a.logging.min_time_between_log_output),
|
||||
store_impl (nano::make_store (logger, application_path_a, flags.read_only, true, config_a.diagnostics_config.txn_tracking, config_a.block_processor_batch_max_time, config_a.lmdb_max_dbs, flags.sideband_batch_size, config_a.backup_before_upgrade, config_a.rocksdb_config.enable)),
|
||||
store_impl (nano::make_store (logger, application_path_a, flags.read_only, true, config_a.rocksdb_config, config_a.diagnostics_config.txn_tracking, config_a.block_processor_batch_max_time, config_a.lmdb_max_dbs, flags.sideband_batch_size, config_a.backup_before_upgrade, config_a.rocksdb_config.enable)),
|
||||
store (*store_impl),
|
||||
wallets_store_impl (std::make_unique<nano::mdb_wallets_store> (application_path_a / "wallets.ldb", config_a.lmdb_max_dbs)),
|
||||
wallets_store (*wallets_store_impl),
|
||||
|
@ -1356,11 +1356,11 @@ nano::node_flags const & nano::inactive_node_flag_defaults ()
|
|||
return node_flags;
|
||||
}
|
||||
|
||||
std::unique_ptr<nano::block_store> nano::make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool read_only, bool add_db_postfix, nano::txn_tracking_config const & txn_tracking_config_a, std::chrono::milliseconds block_processor_batch_max_time_a, int lmdb_max_dbs, size_t batch_size, bool backup_before_upgrade, bool use_rocksdb_backend)
|
||||
std::unique_ptr<nano::block_store> nano::make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool read_only, bool add_db_postfix, nano::rocksdb_config const & rocksdb_config, nano::txn_tracking_config const & txn_tracking_config_a, std::chrono::milliseconds block_processor_batch_max_time_a, int lmdb_max_dbs, size_t batch_size, bool backup_before_upgrade, bool use_rocksdb_backend)
|
||||
{
|
||||
#if NANO_ROCKSDB
|
||||
auto make_rocksdb = [&logger, add_db_postfix, &path, read_only]() {
|
||||
return std::make_unique<nano::rocksdb_store> (logger, add_db_postfix ? path / "rocksdb" : path, read_only);
|
||||
auto make_rocksdb = [&logger, add_db_postfix, &path, &rocksdb_config, read_only]() {
|
||||
return std::make_unique<nano::rocksdb_store> (logger, add_db_postfix ? path / "rocksdb" : path, rocksdb_config, read_only);
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include <nano/crypto_lib/random_pool.hpp>
|
||||
#include <nano/lib/rocksdbconfig.hpp>
|
||||
#include <nano/node/rocksdb/rocksdb.hpp>
|
||||
#include <nano/node/rocksdb/rocksdb_iterator.hpp>
|
||||
#include <nano/node/rocksdb/rocksdb_txn.hpp>
|
||||
|
@ -40,8 +41,9 @@ void rocksdb_val::convert_buffer_to_value ()
|
|||
}
|
||||
}
|
||||
|
||||
nano::rocksdb_store::rocksdb_store (nano::logger_mt & logger_a, boost::filesystem::path const & path_a, bool open_read_only_a) :
|
||||
logger (logger_a)
|
||||
nano::rocksdb_store::rocksdb_store (nano::logger_mt & logger_a, boost::filesystem::path const & path_a, nano::rocksdb_config const & rocksdb_config_a, bool open_read_only_a) :
|
||||
logger (logger_a),
|
||||
rocksdb_config (rocksdb_config_a)
|
||||
{
|
||||
boost::system::error_code error_mkdir, error_chmod;
|
||||
boost::filesystem::create_directories (path_a, error_mkdir);
|
||||
|
@ -466,38 +468,40 @@ rocksdb::Options nano::rocksdb_store::get_db_options () const
|
|||
// Start agressively flushing WAL files when they reach over 1GB
|
||||
db_options.max_total_wal_size = 1 * 1024 * 1024 * 1024LL;
|
||||
|
||||
if (!low_end_system ())
|
||||
{
|
||||
// Adds a separate write queue for memtable/WAL
|
||||
db_options.enable_pipelined_write = true;
|
||||
// Optimize RocksDB. This is the easiest way to get RocksDB to perform well
|
||||
db_options.IncreaseParallelism (rocksdb_config.io_threads);
|
||||
db_options.OptimizeLevelStyleCompaction ();
|
||||
|
||||
// Adds a separate write queue for memtable/WAL
|
||||
db_options.enable_pipelined_write = rocksdb_config.enable_pipelined_write;
|
||||
|
||||
// Total size of memtables across column families. This can be used to manage the total memory used by memtables.
|
||||
db_options.db_write_buffer_size = rocksdb_config.total_memtable_size;
|
||||
|
||||
// Optimize RocksDB. This is the easiest way to get RocksDB to perform well
|
||||
db_options.IncreaseParallelism (std::thread::hardware_concurrency ());
|
||||
db_options.OptimizeLevelStyleCompaction ();
|
||||
}
|
||||
return db_options;
|
||||
}
|
||||
|
||||
/** As options are currently hardcoded, a heuristic is taken based off the number of cores to modify config options */
|
||||
bool nano::rocksdb_store::low_end_system () const
|
||||
{
|
||||
return (std::thread::hardware_concurrency () < 2);
|
||||
}
|
||||
|
||||
rocksdb::BlockBasedTableOptions nano::rocksdb_store::get_table_options () const
|
||||
{
|
||||
rocksdb::BlockBasedTableOptions table_options;
|
||||
if (!low_end_system ())
|
||||
{
|
||||
// 512MB block cache
|
||||
table_options.block_cache = rocksdb::NewLRUCache (512 * 1024 * 1024LL);
|
||||
|
||||
// Bloom filter to help with point reads
|
||||
table_options.filter_policy.reset (rocksdb::NewBloomFilterPolicy (10, false));
|
||||
table_options.block_size = 16 * 1024;
|
||||
table_options.cache_index_and_filter_blocks = true;
|
||||
table_options.pin_l0_filter_and_index_blocks_in_cache = true;
|
||||
// Block cache for reads
|
||||
table_options.block_cache = rocksdb::NewLRUCache (rocksdb_config.block_cache * 1024 * 1024ULL);
|
||||
|
||||
// Bloom filter to help with point reads
|
||||
auto bloom_filter_bits = rocksdb_config.bloom_filter_bits;
|
||||
if (bloom_filter_bits > 0)
|
||||
{
|
||||
table_options.filter_policy.reset (rocksdb::NewBloomFilterPolicy (bloom_filter_bits, false));
|
||||
}
|
||||
|
||||
// Increasing block_size decreases memory usage and space amplification, but increases read amplification.
|
||||
table_options.block_size = rocksdb_config.block_size * 1024ULL;
|
||||
|
||||
// Whether index and filter blocks are stored in block_cache. These settings should be synced
|
||||
table_options.cache_index_and_filter_blocks = rocksdb_config.cache_index_and_filter_blocks;
|
||||
table_options.pin_l0_filter_and_index_blocks_in_cache = rocksdb_config.cache_index_and_filter_blocks;
|
||||
|
||||
return table_options;
|
||||
}
|
||||
|
||||
|
@ -506,31 +510,29 @@ rocksdb::ColumnFamilyOptions nano::rocksdb_store::get_cf_options () const
|
|||
rocksdb::ColumnFamilyOptions cf_options;
|
||||
cf_options.table_factory = table_factory;
|
||||
|
||||
if (!low_end_system ())
|
||||
{
|
||||
cf_options.level_compaction_dynamic_level_bytes = true;
|
||||
// Number of files in level which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded
|
||||
cf_options.level0_file_num_compaction_trigger = 4;
|
||||
|
||||
// Number of files in level which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded
|
||||
cf_options.level0_file_num_compaction_trigger = 4;
|
||||
// L1 size, compaction is triggered for L0 at this size (4 SST files in L1)
|
||||
cf_options.max_bytes_for_level_base = 1024ULL * 1024 * 4 * rocksdb_config.memtable_size;
|
||||
|
||||
// L1 size, compaction is triggered for L0 at this size (512MB)
|
||||
cf_options.max_bytes_for_level_base = 512 * 1024 * 1024LL;
|
||||
// Each level is a multiple of the above. If L1 is 512MB. L2 will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on...
|
||||
cf_options.max_bytes_for_level_multiplier = 8;
|
||||
|
||||
// Each level is a multiple of the above. L1 will be 512MB. Le will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on...
|
||||
cf_options.max_bytes_for_level_multiplier = 8;
|
||||
// Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however.
|
||||
cf_options.ttl = 1 * 24 * 60 * 60;
|
||||
|
||||
// Size of level 1 sst files (128MB)
|
||||
cf_options.target_file_size_base = 128 * 1024 * 1024LL;
|
||||
// Size of level 1 sst files
|
||||
cf_options.target_file_size_base = 1024ULL * 1024 * rocksdb_config.memtable_size;
|
||||
|
||||
// Size of each memtable (128MB)
|
||||
cf_options.write_buffer_size = 128 * 1024 * 1024LL;
|
||||
// Size of each memtable
|
||||
cf_options.write_buffer_size = 1024ULL * 1024 * rocksdb_config.memtable_size;
|
||||
|
||||
// Number of memtables to keep in memory (1 active, rest inactive/immutable)
|
||||
cf_options.max_write_buffer_number = 3;
|
||||
// Size target of levels are changed dynamically based on size of the last level
|
||||
cf_options.level_compaction_dynamic_level_bytes = true;
|
||||
|
||||
// Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however.
|
||||
cf_options.ttl = 1 * 24 * 60 * 60;
|
||||
}
|
||||
// Number of memtables to keep in memory (1 active, rest inactive/immutable)
|
||||
cf_options.max_write_buffer_number = rocksdb_config.num_memtables;
|
||||
|
||||
return cf_options;
|
||||
}
|
||||
|
@ -602,7 +604,7 @@ bool nano::rocksdb_store::copy_db (boost::filesystem::path const & destination_p
|
|||
// Open it so that it flushes all WAL files
|
||||
if (status.ok ())
|
||||
{
|
||||
nano::rocksdb_store rocksdb_store (logger, destination_path.string (), false);
|
||||
nano::rocksdb_store rocksdb_store (logger, destination_path.string (), rocksdb_config, false);
|
||||
return !rocksdb_store.init_error ();
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -18,13 +18,14 @@
|
|||
namespace nano
|
||||
{
|
||||
class logging_mt;
|
||||
class rocksdb_config;
|
||||
/**
|
||||
* rocksdb implementation of the block store
|
||||
*/
|
||||
class rocksdb_store : public block_store_partial<rocksdb::Slice, rocksdb_store>
|
||||
{
|
||||
public:
|
||||
rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, bool open_read_only = false);
|
||||
rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, nano::rocksdb_config const & = nano::rocksdb_config{}, bool open_read_only = false);
|
||||
~rocksdb_store ();
|
||||
nano::write_transaction tx_begin_write (std::vector<nano::tables> const & tables_requiring_lock = {}, std::vector<nano::tables> const & tables_no_lock = {}) override;
|
||||
nano::read_transaction tx_begin_read () override;
|
||||
|
@ -98,10 +99,10 @@ private:
|
|||
|
||||
int increment (nano::write_transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key_a, uint64_t amount_a);
|
||||
int decrement (nano::write_transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key_a, uint64_t amount_a);
|
||||
bool low_end_system () const;
|
||||
rocksdb::ColumnFamilyOptions get_cf_options () const;
|
||||
void construct_column_family_mutexes ();
|
||||
rocksdb::Options get_db_options () const;
|
||||
rocksdb::BlockBasedTableOptions get_table_options () const;
|
||||
nano::rocksdb_config rocksdb_config;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <nano/lib/diagnosticsconfig.hpp>
|
||||
#include <nano/lib/logger_mt.hpp>
|
||||
#include <nano/lib/memory.hpp>
|
||||
#include <nano/lib/rocksdbconfig.hpp>
|
||||
#include <nano/secure/common.hpp>
|
||||
#include <nano/secure/versioning.hpp>
|
||||
|
||||
|
@ -729,7 +730,7 @@ public:
|
|||
virtual nano::read_transaction tx_begin_read () = 0;
|
||||
};
|
||||
|
||||
std::unique_ptr<nano::block_store> make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool open_read_only = false, bool add_db_postfix = false, nano::txn_tracking_config const & txn_tracking_config_a = nano::txn_tracking_config{}, std::chrono::milliseconds block_processor_batch_max_time_a = std::chrono::milliseconds (5000), int lmdb_max_dbs = 128, size_t batch_size = 512, bool backup_before_upgrade = false, bool rocksdb_backend = false);
|
||||
std::unique_ptr<nano::block_store> make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool open_read_only = false, bool add_db_postfix = false, nano::rocksdb_config const & rocksdb_config = nano::rocksdb_config{}, nano::txn_tracking_config const & txn_tracking_config_a = nano::txn_tracking_config{}, std::chrono::milliseconds block_processor_batch_max_time_a = std::chrono::milliseconds (5000), int lmdb_max_dbs = 128, size_t batch_size = 512, bool backup_before_upgrade = false, bool rocksdb_backend = false);
|
||||
}
|
||||
|
||||
namespace std
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue