From a3324c6abc9a1b8846fc44f6f7b6dd3b780195be Mon Sep 17 00:00:00 2001 From: Wesley Shillingford Date: Mon, 27 Jul 2020 11:40:54 +0100 Subject: [PATCH] [RocksDB] Do not fill block cache from iterators (#2858) --- nano/node/rocksdb/rocksdb.cpp | 77 +++++++++++++------------- nano/node/rocksdb/rocksdb.hpp | 9 ++- nano/node/rocksdb/rocksdb_iterator.hpp | 48 ++++++---------- nano/node/rocksdb/rocksdb_txn.cpp | 10 +++- nano/secure/blockstore_partial.hpp | 2 +- 5 files changed, 71 insertions(+), 75 deletions(-) diff --git a/nano/node/rocksdb/rocksdb.cpp b/nano/node/rocksdb/rocksdb.cpp index 6f0ec6f02..9ece686b4 100644 --- a/nano/node/rocksdb/rocksdb.cpp +++ b/nano/node/rocksdb/rocksdb.cpp @@ -62,16 +62,6 @@ rocksdb_config (rocksdb_config_a) } } -nano::rocksdb_store::~rocksdb_store () -{ - for (auto handle : handles) - { - delete handle; - } - - delete db; -} - void nano::rocksdb_store::open (bool & error_a, boost::filesystem::path const & path_a, bool open_read_only_a) { std::initializer_list names{ rocksdb::kDefaultColumnFamilyName.c_str (), "frontiers", "accounts", "blocks", "pending", "representation", "unchecked", "vote", "online_weight", "meta", "peers", "cached_counts", "confirmation_height" }; @@ -84,19 +74,28 @@ void nano::rocksdb_store::open (bool & error_a, boost::filesystem::path const & auto options = get_db_options (); rocksdb::Status s; + std::vector handles_l; if (open_read_only_a) { - s = rocksdb::DB::OpenForReadOnly (options, path_a.string (), column_families, &handles, &db); + rocksdb::DB * db_l; + s = rocksdb::DB::OpenForReadOnly (options, path_a.string (), column_families, &handles_l, &db_l); + db.reset (db_l); } else { - s = rocksdb::OptimisticTransactionDB::Open (options, path_a.string (), column_families, &handles, &optimistic_db); + s = rocksdb::OptimisticTransactionDB::Open (options, path_a.string (), column_families, &handles_l, &optimistic_db); if (optimistic_db) { - db = optimistic_db; + db.reset (optimistic_db); } } + handles.resize (handles_l.size ()); + for (auto i = 0; i < handles_l.size (); ++i) + { + handles[i].reset (handles_l[i]); + } + // Assign handles to supplied error_a |= !s.ok (); @@ -134,7 +133,7 @@ nano::write_transaction nano::rocksdb_store::tx_begin_write (std::vector (db) }; + return nano::read_transaction{ std::make_unique (db.get ()) }; } std::string nano::rocksdb_store::vendor_get () const @@ -146,11 +145,11 @@ rocksdb::ColumnFamilyHandle * nano::rocksdb_store::table_to_column_family (table { auto & handles_l = handles; auto get_handle = [&handles_l](const char * name) { - auto iter = std::find_if (handles_l.begin (), handles_l.end (), [name](auto handle) { + auto iter = std::find_if (handles_l.begin (), handles_l.end (), [name](auto & handle) { return (handle->GetName () == name); }); debug_assert (iter != handles_l.end ()); - return *iter; + return (*iter).get (); }; switch (table_a) @@ -194,6 +193,7 @@ bool nano::rocksdb_store::exists (nano::transaction const & transaction_a, table else { rocksdb::ReadOptions options; + options.fill_cache = false; status = tx (transaction_a)->Get (options, table_to_column_family (table_a), key_a, &slice); } @@ -423,14 +423,15 @@ int nano::rocksdb_store::clear (rocksdb::ColumnFamilyHandle * column_family) auto name = column_family->GetName (); auto status = db->DropColumnFamily (column_family); release_assert (status.ok ()); - delete column_family; // Need to add it back as we just want to clear the contents - auto handle_it = std::find (handles.begin (), handles.end (), column_family); + auto handle_it = std::find_if (handles.begin (), handles.end (), [column_family](auto & handle) { + return handle.get () == column_family; + }); debug_assert (handle_it != handles.cend ()); status = db->CreateColumnFamily (get_cf_options (), name, &column_family); release_assert (status.ok ()); - *handle_it = column_family; + handle_it->reset (column_family); return status.code (); } @@ -540,7 +541,7 @@ bool nano::rocksdb_store::copy_db (boost::filesystem::path const & destination_p } } - auto status = backup_engine->CreateNewBackup (db); + auto status = backup_engine->CreateNewBackup (db.get ()); if (!status.ok ()) { return false; @@ -558,27 +559,29 @@ bool nano::rocksdb_store::copy_db (boost::filesystem::path const & destination_p } } - rocksdb::BackupEngineReadOnly * backup_engine_read; - status = rocksdb::BackupEngineReadOnly::Open (rocksdb::Env::Default (), rocksdb::BackupableDBOptions (destination_path.string ()), &backup_engine_read); - if (!status.ok ()) { - delete backup_engine_read; - return false; - } - - // First remove all files (not directories) in the destination - for (boost::filesystem::directory_iterator end_dir_it, it (destination_path); it != end_dir_it; ++it) - { - auto path = it->path (); - if (boost::filesystem::is_regular_file (path)) + std::unique_ptr backup_engine_read; { - boost::filesystem::remove (it->path ()); + rocksdb::BackupEngineReadOnly * backup_engine_read_raw; + status = rocksdb::BackupEngineReadOnly::Open (rocksdb::Env::Default (), rocksdb::BackupableDBOptions (destination_path.string ()), &backup_engine_read_raw); + } + if (!status.ok ()) + { + return false; } - } - // Now generate the relevant files from the backup - status = backup_engine->RestoreDBFromLatestBackup (destination_path.string (), destination_path.string ()); - delete backup_engine_read; + // First remove all files (not directories) in the destination + for (auto const & path : boost::make_iterator_range (boost::filesystem::directory_iterator (destination_path))) + { + if (boost::filesystem::is_regular_file (path)) + { + boost::filesystem::remove (path); + } + } + + // Now generate the relevant files from the backup + status = backup_engine->RestoreDBFromLatestBackup (destination_path.string (), destination_path.string ()); + } // Open it so that it flushes all WAL files if (status.ok ()) diff --git a/nano/node/rocksdb/rocksdb.hpp b/nano/node/rocksdb/rocksdb.hpp index c901dfbaa..55da1d2d4 100644 --- a/nano/node/rocksdb/rocksdb.hpp +++ b/nano/node/rocksdb/rocksdb.hpp @@ -27,7 +27,6 @@ class rocksdb_store : public block_store_partial { public: rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, nano::rocksdb_config const & = nano::rocksdb_config{}, bool open_read_only = false); - ~rocksdb_store (); nano::write_transaction tx_begin_write (std::vector const & tables_requiring_lock = {}, std::vector const & tables_no_lock = {}) override; nano::read_transaction tx_begin_read () override; @@ -52,13 +51,13 @@ public: template nano::store_iterator make_iterator (nano::transaction const & transaction_a, tables table_a) const { - return nano::store_iterator (std::make_unique> (db, transaction_a, table_to_column_family (table_a))); + return nano::store_iterator (std::make_unique> (db.get (), transaction_a, table_to_column_family (table_a))); } template nano::store_iterator make_iterator (nano::transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key) const { - return nano::store_iterator (std::make_unique> (db, transaction_a, table_to_column_family (table_a), key)); + return nano::store_iterator (std::make_unique> (db.get (), transaction_a, table_to_column_family (table_a), &key)); } bool init_error () const override; @@ -66,10 +65,10 @@ public: private: bool error{ false }; nano::logger_mt & logger; - std::vector handles; // Optimistic transactions are used in write mode rocksdb::OptimisticTransactionDB * optimistic_db = nullptr; - rocksdb::DB * db = nullptr; + std::unique_ptr db; + std::vector> handles; std::shared_ptr table_factory; std::unordered_map write_lock_mutexes; diff --git a/nano/node/rocksdb/rocksdb_iterator.hpp b/nano/node/rocksdb/rocksdb_iterator.hpp index ff10f0217..fa4cb9b12 100644 --- a/nano/node/rocksdb/rocksdb_iterator.hpp +++ b/nano/node/rocksdb/rocksdb_iterator.hpp @@ -16,10 +16,10 @@ inline bool is_read (nano::transaction const & transaction_a) return (dynamic_cast (&transaction_a) != nullptr); } -inline rocksdb::ReadOptions const & snapshot_options (nano::transaction const & transaction_a) +inline rocksdb::ReadOptions & snapshot_options (nano::transaction const & transaction_a) { debug_assert (is_read (transaction_a)); - return *static_cast (transaction_a.get_handle ()); + return *static_cast (transaction_a.get_handle ()); } } @@ -31,50 +31,33 @@ template class rocksdb_iterator : public store_iterator_impl { public: - rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a) + rocksdb_iterator () = default; + + rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a, rocksdb_val const * val_a) { + // Don't fill the block cache for any blocks read as a result of an iterator rocksdb::Iterator * iter; if (is_read (transaction_a)) { - iter = db->NewIterator (snapshot_options (transaction_a), handle_a); + auto & read_options = snapshot_options (transaction_a); + read_options.fill_cache = false; + cursor.reset (db->NewIterator (read_options, handle_a)); } else { rocksdb::ReadOptions ropts; ropts.fill_cache = false; - iter = tx (transaction_a)->GetIterator (ropts, handle_a); + cursor.reset (tx (transaction_a)->GetIterator (ropts, handle_a)); } - cursor.reset (iter); - cursor->SeekToFirst (); - - if (cursor->Valid ()) + if (val_a) { - current.first.value = cursor->key (); - current.second.value = cursor->value (); + cursor->Seek (*val_a); } else { - clear (); + cursor->SeekToFirst (); } - } - - rocksdb_iterator () = default; - - rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a, rocksdb_val const & val_a) - { - rocksdb::Iterator * iter; - if (is_read (transaction_a)) - { - iter = db->NewIterator (snapshot_options (transaction_a), handle_a); - } - else - { - iter = tx (transaction_a)->GetIterator (rocksdb::ReadOptions (), handle_a); - } - - cursor.reset (iter); - cursor->Seek (val_a); if (cursor->Valid ()) { @@ -87,6 +70,11 @@ public: } } + rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a) : + rocksdb_iterator (db, transaction_a, handle_a, nullptr) + { + } + rocksdb_iterator (nano::rocksdb_iterator && other_a) { cursor = other_a.cursor; diff --git a/nano/node/rocksdb/rocksdb_txn.cpp b/nano/node/rocksdb/rocksdb_txn.cpp index 6e7abfd72..fc10cd542 100644 --- a/nano/node/rocksdb/rocksdb_txn.cpp +++ b/nano/node/rocksdb/rocksdb_txn.cpp @@ -3,7 +3,10 @@ nano::read_rocksdb_txn::read_rocksdb_txn (rocksdb::DB * db_a) : db (db_a) { - options.snapshot = db_a->GetSnapshot (); + if (db_a) + { + options.snapshot = db_a->GetSnapshot (); + } } nano::read_rocksdb_txn::~read_rocksdb_txn () @@ -13,7 +16,10 @@ nano::read_rocksdb_txn::~read_rocksdb_txn () void nano::read_rocksdb_txn::reset () { - db->ReleaseSnapshot (options.snapshot); + if (db) + { + db->ReleaseSnapshot (options.snapshot); + } } void nano::read_rocksdb_txn::renew () diff --git a/nano/secure/blockstore_partial.hpp b/nano/secure/blockstore_partial.hpp index 0f98bc013..422136e34 100644 --- a/nano/secure/blockstore_partial.hpp +++ b/nano/secure/blockstore_partial.hpp @@ -360,7 +360,7 @@ public: nano::db_val data; auto status = get (transaction_a, tables::meta, nano::db_val (version_key), data); int result (minimum_version); - if (!not_found (status)) + if (success (status)) { nano::uint256_union version_value (data); debug_assert (version_value.qwords[2] == 0 && version_value.qwords[1] == 0 && version_value.qwords[0] == 0);