Upgrade to RocksDB 5.8.8 and tune settings to reduce space amplification (#7348)

* kvdb-rocksdb: update to RocksDB 5.8.8

* kvdb-rocksdb: tune RocksDB options

* Switch to level-style compaction
* Increase default block size (16K), and use bigger blocks for HDDs (64K)
* Increase default file size base (64MB SSDs, 256MB HDDs)
* Create a single block cache shared across all column families
* Tune compaction settings using RocksDB helper functions, taking into account
  memory budget spread across all columns
* Configure backgrounds jobs based on the number of CPUs
* Set some default recommended settings

* ethcore: remove unused config blockchain.db_cache_size

* parity: increase default value for db_cache_size

* kvdb-rocksdb: enable compression on all levels

* kvdb-rocksdb: set global db_write_bufer_size

* kvdb-rocksdb: reduce db_write_bufer_size to force earlier flushing

* kvdb-rocksdb: use master branch for rust-rocksdb dependency
This commit is contained in:
André Silva
2018-01-03 10:00:37 +00:00
committed by Tomasz Drwięga
parent 8405edab41
commit e114b0b28d
13 changed files with 93 additions and 93 deletions

View File

@@ -18,15 +18,17 @@
extern crate log;
extern crate elastic_array;
extern crate interleaved_ordered;
extern crate num_cpus;
extern crate parking_lot;
extern crate regex;
extern crate rocksdb;
extern crate interleaved_ordered;
extern crate ethcore_bigint as bigint;
extern crate kvdb;
extern crate rlp;
use std::cmp;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::path::{PathBuf, Path};
@@ -35,7 +37,7 @@ use std::{mem, fs, io};
use parking_lot::{Mutex, MutexGuard, RwLock};
use rocksdb::{
DB, Writable, WriteBatch, WriteOptions, IteratorMode, DBIterator,
Options, DBCompactionStyle, BlockBasedOptions, Direction, Cache, Column, ReadOptions
Options, BlockBasedOptions, Direction, Cache, Column, ReadOptions
};
use interleaved_ordered::{interleave_ordered, InterleaveOrdered};
@@ -50,9 +52,7 @@ use std::process::Command;
#[cfg(target_os = "linux")]
use std::fs::File;
const DB_BACKGROUND_FLUSHES: i32 = 2;
const DB_BACKGROUND_COMPACTIONS: i32 = 2;
const DB_WRITE_BUFFER_SIZE: usize = 2048 * 1000;
const DB_DEFAULT_MEMORY_BUDGET_MB: usize = 128;
enum KeyState {
Insert(DBValue),
@@ -65,8 +65,8 @@ enum KeyState {
pub struct CompactionProfile {
/// L0-L1 target file size
pub initial_file_size: u64,
/// L2-LN target file size multiplier
pub file_size_multiplier: i32,
/// block size
pub block_size: usize,
/// rate limiter for background flushes and compactions, bytes/sec, if any
pub write_rate_limit: Option<u64>,
}
@@ -136,8 +136,8 @@ impl CompactionProfile {
/// Default profile suitable for SSD storage
pub fn ssd() -> CompactionProfile {
CompactionProfile {
initial_file_size: 32 * 1024 * 1024,
file_size_multiplier: 2,
initial_file_size: 64 * 1024 * 1024,
block_size: 16 * 1024,
write_rate_limit: None,
}
}
@@ -145,9 +145,9 @@ impl CompactionProfile {
/// Slow HDD compaction profile
pub fn hdd() -> CompactionProfile {
CompactionProfile {
initial_file_size: 192 * 1024 * 1024,
file_size_multiplier: 1,
write_rate_limit: Some(8 * 1024 * 1024),
initial_file_size: 256 * 1024 * 1024,
block_size: 64 * 1024,
write_rate_limit: Some(16 * 1024 * 1024),
}
}
}
@@ -157,8 +157,8 @@ impl CompactionProfile {
pub struct DatabaseConfig {
/// Max number of open files.
pub max_open_files: i32,
/// Cache sizes (in MiB) for specific columns.
pub cache_sizes: HashMap<Option<u32>, usize>,
/// Memory budget (in MiB) used for setting block cache size, write buffer size.
pub memory_budget: Option<usize>,
/// Compaction profile
pub compaction: CompactionProfile,
/// Set number of columns
@@ -176,17 +176,20 @@ impl DatabaseConfig {
config
}
/// Set the column cache size in MiB.
pub fn set_cache(&mut self, col: Option<u32>, size: usize) {
self.cache_sizes.insert(col, size);
pub fn memory_budget(&self) -> usize {
self.memory_budget.unwrap_or(DB_DEFAULT_MEMORY_BUDGET_MB) * 1024 * 1024
}
pub fn memory_budget_per_col(&self) -> usize {
self.memory_budget() / self.columns.unwrap_or(1) as usize
}
}
impl Default for DatabaseConfig {
fn default() -> DatabaseConfig {
DatabaseConfig {
cache_sizes: HashMap::new(),
max_open_files: 512,
memory_budget: None,
compaction: CompactionProfile::default(),
columns: None,
wal: true,
@@ -217,27 +220,24 @@ struct DBAndColumns {
}
// get column family configuration from database config.
fn col_config(col: u32, config: &DatabaseConfig) -> Options {
// default cache size for columns not specified.
const DEFAULT_CACHE: usize = 2;
fn col_config(config: &DatabaseConfig, block_opts: &BlockBasedOptions) -> Result<Options> {
let mut opts = Options::new();
opts.set_compaction_style(DBCompactionStyle::DBUniversalCompaction);
opts.set_parsed_options("level_compaction_dynamic_level_bytes=true")?;
opts.set_block_based_table_factory(block_opts);
opts.set_parsed_options(
&format!("block_based_table_factory={{{};{}}}",
"cache_index_and_filter_blocks=true",
"pin_l0_filter_and_index_blocks_in_cache=true"))?;
opts.optimize_level_style_compaction(config.memory_budget_per_col() as i32);
opts.set_target_file_size_base(config.compaction.initial_file_size);
opts.set_target_file_size_multiplier(config.compaction.file_size_multiplier);
opts.set_db_write_buffer_size(DB_WRITE_BUFFER_SIZE);
let col_opt = config.columns.map(|_| col);
opts.set_parsed_options("compression_per_level=")?;
{
let cache_size = config.cache_sizes.get(&col_opt).cloned().unwrap_or(DEFAULT_CACHE);
let mut block_opts = BlockBasedOptions::new();
// all goes to read cache.
block_opts.set_cache(Cache::new(cache_size * 1024 * 1024));
opts.set_block_based_table_factory(&block_opts);
}
opts
Ok(opts)
}
/// Key-Value database.
@@ -246,6 +246,7 @@ pub struct Database {
config: DatabaseConfig,
write_opts: WriteOptions,
read_opts: ReadOptions,
block_opts: BlockBasedOptions,
path: String,
// Dirty values added with `write_buffered`. Cleaned on `flush`.
overlay: RwLock<Vec<HashMap<ElasticArray32<u8>, KeyState>>>,
@@ -265,31 +266,35 @@ impl Database {
/// Open database file. Creates if it does not exist.
pub fn open(config: &DatabaseConfig, path: &str) -> Result<Database> {
let mut opts = Options::new();
if let Some(rate_limit) = config.compaction.write_rate_limit {
opts.set_parsed_options(&format!("rate_limiter_bytes_per_sec={}", rate_limit))?;
}
opts.set_parsed_options(&format!("max_total_wal_size={}", 64 * 1024 * 1024))?;
opts.set_parsed_options("verify_checksums_in_compaction=0")?;
opts.set_parsed_options("keep_log_file_num=1")?;
opts.set_max_open_files(config.max_open_files);
opts.create_if_missing(true);
opts.set_use_fsync(false);
opts.set_db_write_buffer_size(DB_WRITE_BUFFER_SIZE);
opts.create_if_missing(true);
opts.set_max_open_files(config.max_open_files);
opts.set_parsed_options("keep_log_file_num=1")?;
opts.set_parsed_options("bytes_per_sync=1048576")?;
opts.set_db_write_buffer_size(config.memory_budget_per_col() / 2);
opts.increase_parallelism(cmp::max(1, ::num_cpus::get() as i32 / 2));
opts.set_max_background_flushes(DB_BACKGROUND_FLUSHES);
opts.set_max_background_compactions(DB_BACKGROUND_COMPACTIONS);
let mut block_opts = BlockBasedOptions::new();
// compaction settings
opts.set_compaction_style(DBCompactionStyle::DBUniversalCompaction);
opts.set_target_file_size_base(config.compaction.initial_file_size);
opts.set_target_file_size_multiplier(config.compaction.file_size_multiplier);
{
block_opts.set_block_size(config.compaction.block_size);
let cache_size = cmp::max(8, config.memory_budget() / 3);
let cache = Cache::new(cache_size);
block_opts.set_cache(cache);
}
let mut cf_options = Vec::with_capacity(config.columns.unwrap_or(0) as usize);
let cfnames: Vec<_> = (0..config.columns.unwrap_or(0)).map(|c| format!("col{}", c)).collect();
let columns = config.columns.unwrap_or(0) as usize;
let mut cf_options = Vec::with_capacity(columns);
let cfnames: Vec<_> = (0..columns).map(|c| format!("col{}", c)).collect();
let cfnames: Vec<&str> = cfnames.iter().map(|n| n as &str).collect();
for col in 0 .. config.columns.unwrap_or(0) {
cf_options.push(col_config(col, &config));
for _ in 0 .. config.columns.unwrap_or(0) {
cf_options.push(col_config(&config, &block_opts)?);
}
let mut write_opts = WriteOptions::new();
@@ -348,6 +353,7 @@ impl Database {
flushing_lock: Mutex::new((false)),
path: path.to_owned(),
read_opts: read_opts,
block_opts: block_opts,
})
}
@@ -632,7 +638,7 @@ impl Database {
Some(DBAndColumns { ref mut db, ref mut cfs }) => {
let col = cfs.len() as u32;
let name = format!("col{}", col);
cfs.push(db.create_cf(&name, &col_config(col, &self.config))?);
cfs.push(db.create_cf(&name, &col_config(&self.config, &self.block_opts)?)?);
Ok(())
},
None => Ok(()),