// Copyright 2015-2018 Parity Technologies (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see .
#[macro_use]
extern crate log;
extern crate elastic_array;
extern crate fs_swap;
extern crate interleaved_ordered;
extern crate num_cpus;
extern crate parking_lot;
extern crate regex;
extern crate rocksdb;
extern crate ethereum_types;
extern crate kvdb;
use std::collections::HashMap;
use std::marker::PhantomData;
use std::{cmp, fs, io, mem, result, error};
use std::path::Path;
use parking_lot::{Mutex, MutexGuard, RwLock};
use rocksdb::{
DB, Writable, WriteBatch, WriteOptions, IteratorMode, DBIterator,
Options, BlockBasedOptions, Direction, Cache, Column, ReadOptions
};
use interleaved_ordered::{interleave_ordered, InterleaveOrdered};
use elastic_array::ElasticArray32;
use fs_swap::{swap, swap_nonatomic};
use kvdb::{KeyValueDB, DBTransaction, DBValue, DBOp};
#[cfg(target_os = "linux")]
use regex::Regex;
#[cfg(target_os = "linux")]
use std::process::Command;
#[cfg(target_os = "linux")]
use std::fs::File;
#[cfg(target_os = "linux")]
use std::path::PathBuf;
fn other_io_err(e: E) -> io::Error where E: Into> {
io::Error::new(io::ErrorKind::Other, e)
}
const DB_DEFAULT_MEMORY_BUDGET_MB: usize = 128;
enum KeyState {
Insert(DBValue),
Delete,
}
/// Compaction profile for the database settings
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct CompactionProfile {
/// L0-L1 target file size
pub initial_file_size: u64,
/// block size
pub block_size: usize,
/// rate limiter for background flushes and compactions, bytes/sec, if any
pub write_rate_limit: Option,
}
impl Default for CompactionProfile {
/// Default profile suitable for most storage
fn default() -> CompactionProfile {
CompactionProfile::ssd()
}
}
/// Given output of df command return Linux rotational flag file path.
#[cfg(target_os = "linux")]
pub fn rotational_from_df_output(df_out: Vec) -> Option {
use std::str;
str::from_utf8(df_out.as_slice())
.ok()
// Get the drive name.
.and_then(|df_str| Regex::new(r"/dev/(sd[:alpha:]{1,2})")
.ok()
.and_then(|re| re.captures(df_str))
.and_then(|captures| captures.get(1)))
// Generate path e.g. /sys/block/sda/queue/rotational
.map(|drive_path| {
let mut p = PathBuf::from("/sys/block");
p.push(drive_path.as_str());
p.push("queue/rotational");
p
})
}
impl CompactionProfile {
/// Attempt to determine the best profile automatically, only Linux for now.
#[cfg(target_os = "linux")]
pub fn auto(db_path: &Path) -> CompactionProfile {
use std::io::Read;
let hdd_check_file = db_path
.to_str()
.and_then(|path_str| Command::new("df").arg(path_str).output().ok())
.and_then(|df_res| match df_res.status.success() {
true => Some(df_res.stdout),
false => None,
})
.and_then(rotational_from_df_output);
// Read out the file and match compaction profile.
if let Some(hdd_check) = hdd_check_file {
if let Ok(mut file) = File::open(hdd_check.as_path()) {
let mut buffer = [0; 1];
if file.read_exact(&mut buffer).is_ok() {
// 0 means not rotational.
if buffer == [48] { return Self::ssd(); }
// 1 means rotational.
if buffer == [49] { return Self::hdd(); }
}
}
}
// Fallback if drive type was not determined.
Self::default()
}
/// Just default for other platforms.
#[cfg(not(target_os = "linux"))]
pub fn auto(_db_path: &Path) -> CompactionProfile {
Self::default()
}
/// Default profile suitable for SSD storage
pub fn ssd() -> CompactionProfile {
CompactionProfile {
initial_file_size: 64 * 1024 * 1024,
block_size: 16 * 1024,
write_rate_limit: None,
}
}
/// Slow HDD compaction profile
pub fn hdd() -> CompactionProfile {
CompactionProfile {
initial_file_size: 256 * 1024 * 1024,
block_size: 64 * 1024,
write_rate_limit: Some(16 * 1024 * 1024),
}
}
}
/// Database configuration
#[derive(Clone)]
pub struct DatabaseConfig {
/// Max number of open files.
pub max_open_files: i32,
/// Memory budget (in MiB) used for setting block cache size, write buffer size.
pub memory_budget: Option,
/// Compaction profile
pub compaction: CompactionProfile,
/// Set number of columns
pub columns: Option,
/// Should we keep WAL enabled?
pub wal: bool,
}
impl DatabaseConfig {
/// Create new `DatabaseConfig` with default parameters and specified set of columns.
/// Note that cache sizes must be explicitly set.
pub fn with_columns(columns: Option) -> Self {
let mut config = Self::default();
config.columns = columns;
config
}
pub fn memory_budget(&self) -> usize {
self.memory_budget.unwrap_or(DB_DEFAULT_MEMORY_BUDGET_MB) * 1024 * 1024
}
pub fn memory_budget_per_col(&self) -> usize {
self.memory_budget() / self.columns.unwrap_or(1) as usize
}
}
impl Default for DatabaseConfig {
fn default() -> DatabaseConfig {
DatabaseConfig {
max_open_files: 512,
memory_budget: None,
compaction: CompactionProfile::default(),
columns: None,
wal: true,
}
}
}
/// Database iterator (for flushed data only)
// The compromise of holding only a virtual borrow vs. holding a lock on the
// inner DB (to prevent closing via restoration) may be re-evaluated in the future.
//
pub struct DatabaseIterator<'a> {
iter: InterleaveOrdered<::std::vec::IntoIter<(Box<[u8]>, Box<[u8]>)>, DBIterator>,
_marker: PhantomData<&'a Database>,
}
impl<'a> Iterator for DatabaseIterator<'a> {
type Item = (Box<[u8]>, Box<[u8]>);
fn next(&mut self) -> Option {
self.iter.next()
}
}
struct DBAndColumns {
db: DB,
cfs: Vec,
}
// get column family configuration from database config.
fn col_config(config: &DatabaseConfig, block_opts: &BlockBasedOptions) -> io::Result {
let mut opts = Options::new();
opts.set_parsed_options("level_compaction_dynamic_level_bytes=true").map_err(other_io_err)?;
opts.set_block_based_table_factory(block_opts);
opts.set_parsed_options(
&format!("block_based_table_factory={{{};{}}}",
"cache_index_and_filter_blocks=true",
"pin_l0_filter_and_index_blocks_in_cache=true")).map_err(other_io_err)?;
opts.optimize_level_style_compaction(config.memory_budget_per_col() as i32);
opts.set_target_file_size_base(config.compaction.initial_file_size);
opts.set_parsed_options("compression_per_level=").map_err(other_io_err)?;
Ok(opts)
}
/// Key-Value database.
pub struct Database {
db: RwLock