[RocksDB] Do not fill block cache from iterators (#2858)

This commit is contained in:
Wesley Shillingford 2020-07-27 11:40:54 +01:00 committed by GitHub
commit a3324c6abc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 75 deletions

View file

@ -62,16 +62,6 @@ rocksdb_config (rocksdb_config_a)
} }
} }
nano::rocksdb_store::~rocksdb_store ()
{
for (auto handle : handles)
{
delete handle;
}
delete db;
}
void nano::rocksdb_store::open (bool & error_a, boost::filesystem::path const & path_a, bool open_read_only_a) void nano::rocksdb_store::open (bool & error_a, boost::filesystem::path const & path_a, bool open_read_only_a)
{ {
std::initializer_list<const char *> names{ rocksdb::kDefaultColumnFamilyName.c_str (), "frontiers", "accounts", "blocks", "pending", "representation", "unchecked", "vote", "online_weight", "meta", "peers", "cached_counts", "confirmation_height" }; std::initializer_list<const char *> names{ rocksdb::kDefaultColumnFamilyName.c_str (), "frontiers", "accounts", "blocks", "pending", "representation", "unchecked", "vote", "online_weight", "meta", "peers", "cached_counts", "confirmation_height" };
@ -84,19 +74,28 @@ void nano::rocksdb_store::open (bool & error_a, boost::filesystem::path const &
auto options = get_db_options (); auto options = get_db_options ();
rocksdb::Status s; rocksdb::Status s;
std::vector<rocksdb::ColumnFamilyHandle *> handles_l;
if (open_read_only_a) if (open_read_only_a)
{ {
s = rocksdb::DB::OpenForReadOnly (options, path_a.string (), column_families, &handles, &db); rocksdb::DB * db_l;
s = rocksdb::DB::OpenForReadOnly (options, path_a.string (), column_families, &handles_l, &db_l);
db.reset (db_l);
} }
else else
{ {
s = rocksdb::OptimisticTransactionDB::Open (options, path_a.string (), column_families, &handles, &optimistic_db); s = rocksdb::OptimisticTransactionDB::Open (options, path_a.string (), column_families, &handles_l, &optimistic_db);
if (optimistic_db) if (optimistic_db)
{ {
db = optimistic_db; db.reset (optimistic_db);
} }
} }
handles.resize (handles_l.size ());
for (auto i = 0; i < handles_l.size (); ++i)
{
handles[i].reset (handles_l[i]);
}
// Assign handles to supplied // Assign handles to supplied
error_a |= !s.ok (); error_a |= !s.ok ();
@ -134,7 +133,7 @@ nano::write_transaction nano::rocksdb_store::tx_begin_write (std::vector<nano::t
nano::read_transaction nano::rocksdb_store::tx_begin_read () nano::read_transaction nano::rocksdb_store::tx_begin_read ()
{ {
return nano::read_transaction{ std::make_unique<nano::read_rocksdb_txn> (db) }; return nano::read_transaction{ std::make_unique<nano::read_rocksdb_txn> (db.get ()) };
} }
std::string nano::rocksdb_store::vendor_get () const std::string nano::rocksdb_store::vendor_get () const
@ -146,11 +145,11 @@ rocksdb::ColumnFamilyHandle * nano::rocksdb_store::table_to_column_family (table
{ {
auto & handles_l = handles; auto & handles_l = handles;
auto get_handle = [&handles_l](const char * name) { auto get_handle = [&handles_l](const char * name) {
auto iter = std::find_if (handles_l.begin (), handles_l.end (), [name](auto handle) { auto iter = std::find_if (handles_l.begin (), handles_l.end (), [name](auto & handle) {
return (handle->GetName () == name); return (handle->GetName () == name);
}); });
debug_assert (iter != handles_l.end ()); debug_assert (iter != handles_l.end ());
return *iter; return (*iter).get ();
}; };
switch (table_a) switch (table_a)
@ -194,6 +193,7 @@ bool nano::rocksdb_store::exists (nano::transaction const & transaction_a, table
else else
{ {
rocksdb::ReadOptions options; rocksdb::ReadOptions options;
options.fill_cache = false;
status = tx (transaction_a)->Get (options, table_to_column_family (table_a), key_a, &slice); status = tx (transaction_a)->Get (options, table_to_column_family (table_a), key_a, &slice);
} }
@ -423,14 +423,15 @@ int nano::rocksdb_store::clear (rocksdb::ColumnFamilyHandle * column_family)
auto name = column_family->GetName (); auto name = column_family->GetName ();
auto status = db->DropColumnFamily (column_family); auto status = db->DropColumnFamily (column_family);
release_assert (status.ok ()); release_assert (status.ok ());
delete column_family;
// Need to add it back as we just want to clear the contents // Need to add it back as we just want to clear the contents
auto handle_it = std::find (handles.begin (), handles.end (), column_family); auto handle_it = std::find_if (handles.begin (), handles.end (), [column_family](auto & handle) {
return handle.get () == column_family;
});
debug_assert (handle_it != handles.cend ()); debug_assert (handle_it != handles.cend ());
status = db->CreateColumnFamily (get_cf_options (), name, &column_family); status = db->CreateColumnFamily (get_cf_options (), name, &column_family);
release_assert (status.ok ()); release_assert (status.ok ());
*handle_it = column_family; handle_it->reset (column_family);
return status.code (); return status.code ();
} }
@ -540,7 +541,7 @@ bool nano::rocksdb_store::copy_db (boost::filesystem::path const & destination_p
} }
} }
auto status = backup_engine->CreateNewBackup (db); auto status = backup_engine->CreateNewBackup (db.get ());
if (!status.ok ()) if (!status.ok ())
{ {
return false; return false;
@ -558,27 +559,29 @@ bool nano::rocksdb_store::copy_db (boost::filesystem::path const & destination_p
} }
} }
rocksdb::BackupEngineReadOnly * backup_engine_read;
status = rocksdb::BackupEngineReadOnly::Open (rocksdb::Env::Default (), rocksdb::BackupableDBOptions (destination_path.string ()), &backup_engine_read);
if (!status.ok ())
{ {
delete backup_engine_read; std::unique_ptr<rocksdb::BackupEngineReadOnly> backup_engine_read;
return false;
}
// First remove all files (not directories) in the destination
for (boost::filesystem::directory_iterator end_dir_it, it (destination_path); it != end_dir_it; ++it)
{
auto path = it->path ();
if (boost::filesystem::is_regular_file (path))
{ {
boost::filesystem::remove (it->path ()); rocksdb::BackupEngineReadOnly * backup_engine_read_raw;
status = rocksdb::BackupEngineReadOnly::Open (rocksdb::Env::Default (), rocksdb::BackupableDBOptions (destination_path.string ()), &backup_engine_read_raw);
}
if (!status.ok ())
{
return false;
} }
}
// Now generate the relevant files from the backup // First remove all files (not directories) in the destination
status = backup_engine->RestoreDBFromLatestBackup (destination_path.string (), destination_path.string ()); for (auto const & path : boost::make_iterator_range (boost::filesystem::directory_iterator (destination_path)))
delete backup_engine_read; {
if (boost::filesystem::is_regular_file (path))
{
boost::filesystem::remove (path);
}
}
// Now generate the relevant files from the backup
status = backup_engine->RestoreDBFromLatestBackup (destination_path.string (), destination_path.string ());
}
// Open it so that it flushes all WAL files // Open it so that it flushes all WAL files
if (status.ok ()) if (status.ok ())

View file

@ -27,7 +27,6 @@ class rocksdb_store : public block_store_partial<rocksdb::Slice, rocksdb_store>
{ {
public: public:
rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, nano::rocksdb_config const & = nano::rocksdb_config{}, bool open_read_only = false); rocksdb_store (nano::logger_mt &, boost::filesystem::path const &, nano::rocksdb_config const & = nano::rocksdb_config{}, bool open_read_only = false);
~rocksdb_store ();
nano::write_transaction tx_begin_write (std::vector<nano::tables> const & tables_requiring_lock = {}, std::vector<nano::tables> const & tables_no_lock = {}) override; nano::write_transaction tx_begin_write (std::vector<nano::tables> const & tables_requiring_lock = {}, std::vector<nano::tables> const & tables_no_lock = {}) override;
nano::read_transaction tx_begin_read () override; nano::read_transaction tx_begin_read () override;
@ -52,13 +51,13 @@ public:
template <typename Key, typename Value> template <typename Key, typename Value>
nano::store_iterator<Key, Value> make_iterator (nano::transaction const & transaction_a, tables table_a) const nano::store_iterator<Key, Value> make_iterator (nano::transaction const & transaction_a, tables table_a) const
{ {
return nano::store_iterator<Key, Value> (std::make_unique<nano::rocksdb_iterator<Key, Value>> (db, transaction_a, table_to_column_family (table_a))); return nano::store_iterator<Key, Value> (std::make_unique<nano::rocksdb_iterator<Key, Value>> (db.get (), transaction_a, table_to_column_family (table_a)));
} }
template <typename Key, typename Value> template <typename Key, typename Value>
nano::store_iterator<Key, Value> make_iterator (nano::transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key) const nano::store_iterator<Key, Value> make_iterator (nano::transaction const & transaction_a, tables table_a, nano::rocksdb_val const & key) const
{ {
return nano::store_iterator<Key, Value> (std::make_unique<nano::rocksdb_iterator<Key, Value>> (db, transaction_a, table_to_column_family (table_a), key)); return nano::store_iterator<Key, Value> (std::make_unique<nano::rocksdb_iterator<Key, Value>> (db.get (), transaction_a, table_to_column_family (table_a), &key));
} }
bool init_error () const override; bool init_error () const override;
@ -66,10 +65,10 @@ public:
private: private:
bool error{ false }; bool error{ false };
nano::logger_mt & logger; nano::logger_mt & logger;
std::vector<rocksdb::ColumnFamilyHandle *> handles;
// Optimistic transactions are used in write mode // Optimistic transactions are used in write mode
rocksdb::OptimisticTransactionDB * optimistic_db = nullptr; rocksdb::OptimisticTransactionDB * optimistic_db = nullptr;
rocksdb::DB * db = nullptr; std::unique_ptr<rocksdb::DB> db;
std::vector<std::unique_ptr<rocksdb::ColumnFamilyHandle>> handles;
std::shared_ptr<rocksdb::TableFactory> table_factory; std::shared_ptr<rocksdb::TableFactory> table_factory;
std::unordered_map<nano::tables, std::mutex> write_lock_mutexes; std::unordered_map<nano::tables, std::mutex> write_lock_mutexes;

View file

@ -16,10 +16,10 @@ inline bool is_read (nano::transaction const & transaction_a)
return (dynamic_cast<const nano::read_transaction *> (&transaction_a) != nullptr); return (dynamic_cast<const nano::read_transaction *> (&transaction_a) != nullptr);
} }
inline rocksdb::ReadOptions const & snapshot_options (nano::transaction const & transaction_a) inline rocksdb::ReadOptions & snapshot_options (nano::transaction const & transaction_a)
{ {
debug_assert (is_read (transaction_a)); debug_assert (is_read (transaction_a));
return *static_cast<const rocksdb::ReadOptions *> (transaction_a.get_handle ()); return *static_cast<rocksdb::ReadOptions *> (transaction_a.get_handle ());
} }
} }
@ -31,50 +31,33 @@ template <typename T, typename U>
class rocksdb_iterator : public store_iterator_impl<T, U> class rocksdb_iterator : public store_iterator_impl<T, U>
{ {
public: public:
rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a) rocksdb_iterator () = default;
rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a, rocksdb_val const * val_a)
{ {
// Don't fill the block cache for any blocks read as a result of an iterator
rocksdb::Iterator * iter; rocksdb::Iterator * iter;
if (is_read (transaction_a)) if (is_read (transaction_a))
{ {
iter = db->NewIterator (snapshot_options (transaction_a), handle_a); auto & read_options = snapshot_options (transaction_a);
read_options.fill_cache = false;
cursor.reset (db->NewIterator (read_options, handle_a));
} }
else else
{ {
rocksdb::ReadOptions ropts; rocksdb::ReadOptions ropts;
ropts.fill_cache = false; ropts.fill_cache = false;
iter = tx (transaction_a)->GetIterator (ropts, handle_a); cursor.reset (tx (transaction_a)->GetIterator (ropts, handle_a));
} }
cursor.reset (iter); if (val_a)
cursor->SeekToFirst ();
if (cursor->Valid ())
{ {
current.first.value = cursor->key (); cursor->Seek (*val_a);
current.second.value = cursor->value ();
} }
else else
{ {
clear (); cursor->SeekToFirst ();
} }
}
rocksdb_iterator () = default;
rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a, rocksdb_val const & val_a)
{
rocksdb::Iterator * iter;
if (is_read (transaction_a))
{
iter = db->NewIterator (snapshot_options (transaction_a), handle_a);
}
else
{
iter = tx (transaction_a)->GetIterator (rocksdb::ReadOptions (), handle_a);
}
cursor.reset (iter);
cursor->Seek (val_a);
if (cursor->Valid ()) if (cursor->Valid ())
{ {
@ -87,6 +70,11 @@ public:
} }
} }
rocksdb_iterator (rocksdb::DB * db, nano::transaction const & transaction_a, rocksdb::ColumnFamilyHandle * handle_a) :
rocksdb_iterator (db, transaction_a, handle_a, nullptr)
{
}
rocksdb_iterator (nano::rocksdb_iterator<T, U> && other_a) rocksdb_iterator (nano::rocksdb_iterator<T, U> && other_a)
{ {
cursor = other_a.cursor; cursor = other_a.cursor;

View file

@ -3,7 +3,10 @@
nano::read_rocksdb_txn::read_rocksdb_txn (rocksdb::DB * db_a) : nano::read_rocksdb_txn::read_rocksdb_txn (rocksdb::DB * db_a) :
db (db_a) db (db_a)
{ {
options.snapshot = db_a->GetSnapshot (); if (db_a)
{
options.snapshot = db_a->GetSnapshot ();
}
} }
nano::read_rocksdb_txn::~read_rocksdb_txn () nano::read_rocksdb_txn::~read_rocksdb_txn ()
@ -13,7 +16,10 @@ nano::read_rocksdb_txn::~read_rocksdb_txn ()
void nano::read_rocksdb_txn::reset () void nano::read_rocksdb_txn::reset ()
{ {
db->ReleaseSnapshot (options.snapshot); if (db)
{
db->ReleaseSnapshot (options.snapshot);
}
} }
void nano::read_rocksdb_txn::renew () void nano::read_rocksdb_txn::renew ()

View file

@ -360,7 +360,7 @@ public:
nano::db_val<Val> data; nano::db_val<Val> data;
auto status = get (transaction_a, tables::meta, nano::db_val<Val> (version_key), data); auto status = get (transaction_a, tables::meta, nano::db_val<Val> (version_key), data);
int result (minimum_version); int result (minimum_version);
if (!not_found (status)) if (success (status))
{ {
nano::uint256_union version_value (data); nano::uint256_union version_value (data);
debug_assert (version_value.qwords[2] == 0 && version_value.qwords[1] == 0 && version_value.qwords[0] == 0); debug_assert (version_value.qwords[2] == 0 && version_value.qwords[1] == 0 && version_value.qwords[0] == 0);