diff --git a/nano/core_test/toml.cpp b/nano/core_test/toml.cpp index 19e4c9d3..a975e731 100644 --- a/nano/core_test/toml.cpp +++ b/nano/core_test/toml.cpp @@ -239,6 +239,15 @@ TEST (toml, daemon_config_deserialize_defaults) ASSERT_EQ (conf.node.stat_config.log_samples_filename, defaults.node.stat_config.log_samples_filename); ASSERT_EQ (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable); + ASSERT_EQ (conf.node.rocksdb_config.bloom_filter_bits, defaults.node.rocksdb_config.bloom_filter_bits); + ASSERT_EQ (conf.node.rocksdb_config.block_cache, defaults.node.rocksdb_config.block_cache); + ASSERT_EQ (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); + ASSERT_EQ (conf.node.rocksdb_config.enable_pipelined_write, defaults.node.rocksdb_config.enable_pipelined_write); + ASSERT_EQ (conf.node.rocksdb_config.cache_index_and_filter_blocks, defaults.node.rocksdb_config.cache_index_and_filter_blocks); + ASSERT_EQ (conf.node.rocksdb_config.block_size, defaults.node.rocksdb_config.block_size); + ASSERT_EQ (conf.node.rocksdb_config.memtable_size, defaults.node.rocksdb_config.memtable_size); + ASSERT_EQ (conf.node.rocksdb_config.num_memtables, defaults.node.rocksdb_config.num_memtables); + ASSERT_EQ (conf.node.rocksdb_config.total_memtable_size, defaults.node.rocksdb_config.total_memtable_size); } TEST (toml, optional_child) @@ -473,6 +482,15 @@ TEST (toml, daemon_config_deserialize_no_defaults) [node.rocksdb] enable = true + bloom_filter_bits = 10 + block_cache = 512 + io_threads = 99 + enable_pipelined_write = true + cache_index_and_filter_blocks = true + block_size = 16 + memtable_size = 128 + num_memtables = 3 + total_memtable_size = 0 [opencl] device = 999 @@ -603,6 +621,15 @@ TEST (toml, daemon_config_deserialize_no_defaults) ASSERT_NE (conf.node.stat_config.log_samples_filename, defaults.node.stat_config.log_samples_filename); ASSERT_NE (conf.node.rocksdb_config.enable, defaults.node.rocksdb_config.enable); + ASSERT_NE (conf.node.rocksdb_config.bloom_filter_bits, defaults.node.rocksdb_config.bloom_filter_bits); + ASSERT_NE (conf.node.rocksdb_config.block_cache, defaults.node.rocksdb_config.block_cache); + ASSERT_NE (conf.node.rocksdb_config.io_threads, defaults.node.rocksdb_config.io_threads); + ASSERT_NE (conf.node.rocksdb_config.enable_pipelined_write, defaults.node.rocksdb_config.enable_pipelined_write); + ASSERT_NE (conf.node.rocksdb_config.cache_index_and_filter_blocks, defaults.node.rocksdb_config.cache_index_and_filter_blocks); + ASSERT_NE (conf.node.rocksdb_config.block_size, defaults.node.rocksdb_config.block_size); + ASSERT_NE (conf.node.rocksdb_config.memtable_size, defaults.node.rocksdb_config.memtable_size); + ASSERT_NE (conf.node.rocksdb_config.num_memtables, defaults.node.rocksdb_config.num_memtables); + ASSERT_NE (conf.node.rocksdb_config.total_memtable_size, defaults.node.rocksdb_config.total_memtable_size); } /** There should be no required values **/ diff --git a/nano/lib/rocksdbconfig.cpp b/nano/lib/rocksdbconfig.cpp index 34955cd9..4008f8af 100644 --- a/nano/lib/rocksdbconfig.cpp +++ b/nano/lib/rocksdbconfig.cpp @@ -4,11 +4,56 @@ nano::error nano::rocksdb_config::serialize_toml (nano::tomlconfig & toml) const { toml.put ("enable", enable, "Whether to use the RocksDB backend for the ledger database\ntype:bool"); + toml.put ("enable_pipelined_write", enable_pipelined_write, "Whether to use 2 separate write queues for memtable/WAL, true is recommended.\ntype:bool"); + toml.put ("cache_index_and_filter_blocks", cache_index_and_filter_blocks, "Whether index and filter blocks are stored in block_cache, true is recommended.\ntype:bool"); + toml.put ("bloom_filter_bits", bloom_filter_bits, "Number of bits to use with a bloom filter. Helps with point reads but uses more memory. 0 disables the bloom filter, 10 is recommended\ntype:uint32"); + toml.put ("block_cache", block_cache, "Size (MB) of the block cache; A larger number will increase performance of read operations. At least 512MB is recommended.\ntype:uint64"); + toml.put ("io_threads", io_threads, "Number of threads to use with the background compaction and flushing. Number of hardware threads is recommended.\ntype:uint32"); + toml.put ("block_size", block_size, "Uncompressed data (KBs) per block. Increasing block size decreases memory usage and space amplification, but increases read amplification. 16 is recommended.\ntype:uint32"); + toml.put ("num_memtables", num_memtables, "Number of memtables to keep in memory per column family. 2 is the minimum, 3 is recommended.\ntype:uint32"); + toml.put ("memtable_size", memtable_size, "Amount of memory (MB) to build up before flushing to disk for an individual column family. Large values increase performance. 64 or 128 is recommended\ntype:uint32"); + toml.put ("total_memtable_size", total_memtable_size, "Total memory (MB) which can be used across all memtables, set to 0 for unconstrained.\ntype:uint32"); return toml.get_error (); } nano::error nano::rocksdb_config::deserialize_toml (nano::tomlconfig & toml) { toml.get_optional ("enable", enable); + toml.get_optional ("enable_pipelined_write", enable_pipelined_write); + toml.get_optional ("cache_index_and_filter_blocks", cache_index_and_filter_blocks); + toml.get_optional ("bloom_filter_bits", bloom_filter_bits); + toml.get_optional ("block_cache", block_cache); + toml.get_optional ("io_threads", io_threads); + toml.get_optional ("block_size", block_size); + toml.get_optional ("num_memtables", num_memtables); + toml.get_optional ("memtable_size", memtable_size); + toml.get_optional ("total_memtable_size", total_memtable_size); + + // Validate ranges + if (bloom_filter_bits > 100) + { + toml.get_error ().set ("bloom_filter_bits is too high"); + } + if (num_memtables < 2) + { + toml.get_error ().set ("num_memtables must be at least 2"); + } + if (memtable_size == 0) + { + toml.get_error ().set ("memtable_size must be non-zero"); + } + if ((total_memtable_size < memtable_size * 8) && (total_memtable_size != 0)) + { + toml.get_error ().set ("total_memtable_size should be at least 8 times greater than memtable_size or be set to 0"); + } + if (io_threads == 0) + { + toml.get_error ().set ("io_threads must be non-zero"); + } + if (block_size == 0) + { + toml.get_error ().set ("block_size must be non-zero"); + } + return toml.get_error (); } diff --git a/nano/lib/rocksdbconfig.hpp b/nano/lib/rocksdbconfig.hpp index a02ff749..e47873b3 100644 --- a/nano/lib/rocksdbconfig.hpp +++ b/nano/lib/rocksdbconfig.hpp @@ -2,6 +2,8 @@ #include +#include + namespace nano { class tomlconfig; @@ -14,5 +16,14 @@ public: nano::error deserialize_toml (nano::tomlconfig & toml_a); bool enable{ false }; + unsigned bloom_filter_bits{ 0 }; + uint64_t block_cache{ 64 }; // MB + unsigned io_threads{ std::thread::hardware_concurrency () }; + bool enable_pipelined_write{ false }; + bool cache_index_and_filter_blocks{ false }; + unsigned block_size{ 4 }; // KB + unsigned memtable_size{ 32 }; // MB + unsigned num_memtables{ 2 }; // Need a minimum of 2 + unsigned total_memtable_size{ 512 }; // MB }; } diff --git a/nano/node/node.cpp b/nano/node/node.cpp index cf952c59..82749df3 100644 --- a/nano/node/node.cpp +++ b/nano/node/node.cpp @@ -123,7 +123,7 @@ alarm (alarm_a), work (work_a), distributed_work (*this), logger (config_a.logging.min_time_between_log_output), -store_impl (nano::make_store (logger, application_path_a, flags.read_only, true, config_a.diagnostics_config.txn_tracking, config_a.block_processor_batch_max_time, config_a.lmdb_max_dbs, flags.sideband_batch_size, config_a.backup_before_upgrade, config_a.rocksdb_config.enable)), +store_impl (nano::make_store (logger, application_path_a, flags.read_only, true, config_a.rocksdb_config, config_a.diagnostics_config.txn_tracking, config_a.block_processor_batch_max_time, config_a.lmdb_max_dbs, flags.sideband_batch_size, config_a.backup_before_upgrade, config_a.rocksdb_config.enable)), store (*store_impl), wallets_store_impl (std::make_unique (application_path_a / "wallets.ldb", config_a.lmdb_max_dbs)), wallets_store (*wallets_store_impl), @@ -1356,11 +1356,11 @@ nano::node_flags const & nano::inactive_node_flag_defaults () return node_flags; } -std::unique_ptr nano::make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool read_only, bool add_db_postfix, nano::txn_tracking_config const & txn_tracking_config_a, std::chrono::milliseconds block_processor_batch_max_time_a, int lmdb_max_dbs, size_t batch_size, bool backup_before_upgrade, bool use_rocksdb_backend) +std::unique_ptr nano::make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool read_only, bool add_db_postfix, nano::rocksdb_config const & rocksdb_config, nano::txn_tracking_config const & txn_tracking_config_a, std::chrono::milliseconds block_processor_batch_max_time_a, int lmdb_max_dbs, size_t batch_size, bool backup_before_upgrade, bool use_rocksdb_backend) { #if NANO_ROCKSDB - auto make_rocksdb = [&logger, add_db_postfix, &path, read_only]() { - return std::make_unique (logger, add_db_postfix ? path / "rocksdb" : path, read_only); + auto make_rocksdb = [&logger, add_db_postfix, &path, &rocksdb_config, read_only]() { + return std::make_unique (logger, add_db_postfix ? path / "rocksdb" : path, rocksdb_config, read_only); }; #endif diff --git a/nano/node/rocksdb/rocksdb.cpp b/nano/node/rocksdb/rocksdb.cpp index 235ad015..c600184c 100644 --- a/nano/node/rocksdb/rocksdb.cpp +++ b/nano/node/rocksdb/rocksdb.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -40,8 +41,9 @@ void rocksdb_val::convert_buffer_to_value () } } -nano::rocksdb_store::rocksdb_store (nano::logger_mt & logger_a, boost::filesystem::path const & path_a, bool open_read_only_a) : -logger (logger_a) +nano::rocksdb_store::rocksdb_store (nano::logger_mt & logger_a, boost::filesystem::path const & path_a, nano::rocksdb_config const & rocksdb_config_a, bool open_read_only_a) : +logger (logger_a), +rocksdb_config (rocksdb_config_a) { boost::system::error_code error_mkdir, error_chmod; boost::filesystem::create_directories (path_a, error_mkdir); @@ -466,38 +468,40 @@ rocksdb::Options nano::rocksdb_store::get_db_options () const // Start agressively flushing WAL files when they reach over 1GB db_options.max_total_wal_size = 1 * 1024 * 1024 * 1024LL; - if (!low_end_system ()) - { - // Adds a separate write queue for memtable/WAL - db_options.enable_pipelined_write = true; + // Optimize RocksDB. This is the easiest way to get RocksDB to perform well + db_options.IncreaseParallelism (rocksdb_config.io_threads); + db_options.OptimizeLevelStyleCompaction (); + + // Adds a separate write queue for memtable/WAL + db_options.enable_pipelined_write = rocksdb_config.enable_pipelined_write; + + // Total size of memtables across column families. This can be used to manage the total memory used by memtables. + db_options.db_write_buffer_size = rocksdb_config.total_memtable_size; - // Optimize RocksDB. This is the easiest way to get RocksDB to perform well - db_options.IncreaseParallelism (std::thread::hardware_concurrency ()); - db_options.OptimizeLevelStyleCompaction (); - } return db_options; } -/** As options are currently hardcoded, a heuristic is taken based off the number of cores to modify config options */ -bool nano::rocksdb_store::low_end_system () const -{ - return (std::thread::hardware_concurrency () < 2); -} - rocksdb::BlockBasedTableOptions nano::rocksdb_store::get_table_options () const { rocksdb::BlockBasedTableOptions table_options; - if (!low_end_system ()) - { - // 512MB block cache - table_options.block_cache = rocksdb::NewLRUCache (512 * 1024 * 1024LL); - // Bloom filter to help with point reads - table_options.filter_policy.reset (rocksdb::NewBloomFilterPolicy (10, false)); - table_options.block_size = 16 * 1024; - table_options.cache_index_and_filter_blocks = true; - table_options.pin_l0_filter_and_index_blocks_in_cache = true; + // Block cache for reads + table_options.block_cache = rocksdb::NewLRUCache (rocksdb_config.block_cache * 1024 * 1024ULL); + + // Bloom filter to help with point reads + auto bloom_filter_bits = rocksdb_config.bloom_filter_bits; + if (bloom_filter_bits > 0) + { + table_options.filter_policy.reset (rocksdb::NewBloomFilterPolicy (bloom_filter_bits, false)); } + + // Increasing block_size decreases memory usage and space amplification, but increases read amplification. + table_options.block_size = rocksdb_config.block_size * 1024ULL; + + // Whether index and filter blocks are stored in block_cache. These settings should be synced + table_options.cache_index_and_filter_blocks = rocksdb_config.cache_index_and_filter_blocks; + table_options.pin_l0_filter_and_index_blocks_in_cache = rocksdb_config.cache_index_and_filter_blocks; + return table_options; } @@ -506,31 +510,29 @@ rocksdb::ColumnFamilyOptions nano::rocksdb_store::get_cf_options () const rocksdb::ColumnFamilyOptions cf_options; cf_options.table_factory = table_factory; - if (!low_end_system ()) - { - cf_options.level_compaction_dynamic_level_bytes = true; + // Number of files in level which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded + cf_options.level0_file_num_compaction_trigger = 4; - // Number of files in level which triggers compaction. Size of L0 and L1 should be kept similar as this is the only compaction which is single threaded - cf_options.level0_file_num_compaction_trigger = 4; + // L1 size, compaction is triggered for L0 at this size (4 SST files in L1) + cf_options.max_bytes_for_level_base = 1024ULL * 1024 * 4 * rocksdb_config.memtable_size; - // L1 size, compaction is triggered for L0 at this size (512MB) - cf_options.max_bytes_for_level_base = 512 * 1024 * 1024LL; + // Each level is a multiple of the above. If L1 is 512MB. L2 will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on... + cf_options.max_bytes_for_level_multiplier = 8; - // Each level is a multiple of the above. L1 will be 512MB. Le will be 512 * 8 = 2GB. L3 will be 2GB * 8 = 16GB, and so on... - cf_options.max_bytes_for_level_multiplier = 8; + // Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however. + cf_options.ttl = 1 * 24 * 60 * 60; - // Size of level 1 sst files (128MB) - cf_options.target_file_size_base = 128 * 1024 * 1024LL; + // Size of level 1 sst files + cf_options.target_file_size_base = 1024ULL * 1024 * rocksdb_config.memtable_size; - // Size of each memtable (128MB) - cf_options.write_buffer_size = 128 * 1024 * 1024LL; + // Size of each memtable + cf_options.write_buffer_size = 1024ULL * 1024 * rocksdb_config.memtable_size; - // Number of memtables to keep in memory (1 active, rest inactive/immutable) - cf_options.max_write_buffer_number = 3; + // Size target of levels are changed dynamically based on size of the last level + cf_options.level_compaction_dynamic_level_bytes = true; - // Files older than this (1 day) will be scheduled for compaction when there is no other background work. This can lead to more writes however. - cf_options.ttl = 1 * 24 * 60 * 60; - } + // Number of memtables to keep in memory (1 active, rest inactive/immutable) + cf_options.max_write_buffer_number = rocksdb_config.num_memtables; return cf_options; } @@ -602,7 +604,7 @@ bool nano::rocksdb_store::copy_db (boost::filesystem::path const & destination_p // Open it so that it flushes all WAL files if (status.ok ()) { - nano::rocksdb_store rocksdb_store (logger, destination_path.string (), false); + nano::rocksdb_store rocksdb_store (logger, destination_path.string (), rocksdb_config, false); return !rocksdb_store.init_error (); } return false; diff --git a/nano/node/rocksdb/rocksdb.hpp b/nano/node/rocksdb/rocksdb.hpp index 9086bbf2..8d414647 100644 --- a/nano/node/rocksdb/rocksdb.hpp +++ b/nano/node/rocksdb/rocksdb.hpp @@ -18,13 +18,14 @@ namespace nano { class logging_mt; +class rocksdb_config; /** * rocksdb implementation of the block store */ class rocksdb_store : public block_store_partial { public: - rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, bool open_read_only = false); + rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, nano::rocksdb_config const & = nano::rocksdb_config{}, bool open_read_only = false); ~rocksdb_store (); nano::write_transaction tx_begin_write (std::vector const & tables_requiring_lock = {}, std::vector const & tables_no_lock = {}) override; nano::read_transaction tx_begin_read () override; @@ -98,10 +99,10 @@ private: int increment (nano::write_transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key_a, uint64_t amount_a); int decrement (nano::write_transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key_a, uint64_t amount_a); - bool low_end_system () const; rocksdb::ColumnFamilyOptions get_cf_options () const; void construct_column_family_mutexes (); rocksdb::Options get_db_options () const; rocksdb::BlockBasedTableOptions get_table_options () const; + nano::rocksdb_config rocksdb_config; }; } diff --git a/nano/secure/blockstore.hpp b/nano/secure/blockstore.hpp index e4cad372..718efa1b 100644 --- a/nano/secure/blockstore.hpp +++ b/nano/secure/blockstore.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -729,7 +730,7 @@ public: virtual nano::read_transaction tx_begin_read () = 0; }; -std::unique_ptr make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool open_read_only = false, bool add_db_postfix = false, nano::txn_tracking_config const & txn_tracking_config_a = nano::txn_tracking_config{}, std::chrono::milliseconds block_processor_batch_max_time_a = std::chrono::milliseconds (5000), int lmdb_max_dbs = 128, size_t batch_size = 512, bool backup_before_upgrade = false, bool rocksdb_backend = false); +std::unique_ptr make_store (nano::logger_mt & logger, boost::filesystem::path const & path, bool open_read_only = false, bool add_db_postfix = false, nano::rocksdb_config const & rocksdb_config = nano::rocksdb_config{}, nano::txn_tracking_config const & txn_tracking_config_a = nano::txn_tracking_config{}, std::chrono::milliseconds block_processor_batch_max_time_a = std::chrono::milliseconds (5000), int lmdb_max_dbs = 128, size_t batch_size = 512, bool backup_before_upgrade = false, bool rocksdb_backend = false); } namespace std