openethereum/util/src/journaldb.rs

512 lines
16 KiB
Rust
Raw Normal View History

2016-02-05 13:40:41 +01:00
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
2016-01-18 12:41:31 +01:00
//! Disk-backed HashDB implementation.
use common::*;
use rlp::*;
use hashdb::*;
2016-02-04 02:40:35 +01:00
use memorydb::*;
2016-03-06 21:57:55 +01:00
use rocksdb::{DB, Writable, WriteBatch, IteratorMode};
2016-01-31 17:01:36 +01:00
#[cfg(test)]
use std::env;
2016-01-18 12:41:31 +01:00
/// Implementation of the HashDB trait for a disk-backed database with a memory overlay
2016-03-06 21:57:55 +01:00
/// and latent-removal semantics.
2016-01-18 12:41:31 +01:00
///
/// Like OverlayDB, there is a memory overlay; `commit()` must be called in order to
/// write operations out to disk. Unlike OverlayDB, `remove()` operations do not take effect
/// immediately. Rather some age (based on a linear but arbitrary metric) must pass before
/// the removals actually take effect.
pub struct JournalDB {
2016-02-04 02:40:35 +01:00
overlay: MemoryDB,
2016-03-06 21:57:55 +01:00
backing: Arc<DB>,
counters: Arc<RwLock<HashMap<H256, i32>>>,
2016-01-18 12:41:31 +01:00
}
2016-02-04 21:33:30 +01:00
impl Clone for JournalDB {
fn clone(&self) -> JournalDB {
JournalDB {
overlay: MemoryDB::new(),
backing: self.backing.clone(),
counters: self.counters.clone(),
}
}
}
2016-03-06 21:57:55 +01:00
const LAST_ERA_KEY : [u8; 4] = [ b'l', b'a', b's', b't' ];
const VERSION_KEY : [u8; 4] = [ b'j', b'v', b'e', b'r' ];
2016-02-18 03:46:24 +01:00
2016-03-06 21:57:55 +01:00
const DB_VERSION: u32 = 1;
2016-02-04 21:33:30 +01:00
2016-01-18 12:41:31 +01:00
impl JournalDB {
2016-03-06 21:57:55 +01:00
/// Create a new instance given a `backing` database.
pub fn new(backing: DB) -> JournalDB {
let db = Arc::new(backing);
JournalDB::new_with_arc(db)
}
2016-03-06 21:57:55 +01:00
/// Create a new instance given a shared `backing` database.
pub fn new_with_arc(backing: Arc<DB>) -> JournalDB {
if backing.iterator(IteratorMode::Start).next().is_some() {
2016-02-05 01:49:06 +01:00
match backing.get(&VERSION_KEY).map(|d| d.map(|v| decode::<u32>(&v))) {
2016-03-06 21:57:55 +01:00
Ok(Some(DB_VERSION)) => {},
2016-02-05 01:49:06 +01:00
v => panic!("Incompatible DB version, expected {}, got {:?}", DB_VERSION, v)
}
} else {
2016-03-06 21:57:55 +01:00
backing.put(&VERSION_KEY, &encode(&DB_VERSION)).expect("Error writing version to database");
2016-02-05 01:49:06 +01:00
}
2016-03-06 21:57:55 +01:00
let counters = JournalDB::read_counters(&backing);
2016-01-21 23:33:52 +01:00
JournalDB {
2016-02-04 02:40:35 +01:00
overlay: MemoryDB::new(),
2016-03-06 21:57:55 +01:00
backing: backing,
counters: Arc::new(RwLock::new(counters)),
2016-01-21 23:33:52 +01:00
}
}
2016-01-18 12:41:31 +01:00
/// Create a new instance with an anonymous temporary database.
#[cfg(test)]
2016-01-18 12:41:31 +01:00
pub fn new_temp() -> JournalDB {
let mut dir = env::temp_dir();
dir.push(H32::random().hex());
2016-03-06 21:57:55 +01:00
Self::new(DB::open_default(dir.to_str().unwrap()).unwrap())
2016-01-18 12:41:31 +01:00
}
2016-02-05 01:49:06 +01:00
/// Check if this database has any commits
pub fn is_empty(&self) -> bool {
2016-03-06 21:57:55 +01:00
self.backing.get(&LAST_ERA_KEY).expect("Low level database error").is_none()
2016-02-05 01:49:06 +01:00
}
2016-01-18 12:41:31 +01:00
2016-03-06 21:57:55 +01:00
fn morph_key(key: &H256, index: u8) -> Bytes {
let mut ret = key.bytes().to_owned();
ret.push(index);
ret
}
2016-03-06 21:57:55 +01:00
// The next three are valid only as long as there is an insert operation of `key` in the journal.
2016-03-06 22:05:12 +01:00
fn set_already_in(batch: &WriteBatch, key: &H256) { batch.put(&Self::morph_key(key, 0), &[1u8]).expect("Low-level database error. Some issue with your hard disk?"); }
fn reset_already_in(batch: &WriteBatch, key: &H256) { batch.delete(&Self::morph_key(key, 0)).expect("Low-level database error. Some issue with your hard disk?"); }
2016-03-06 21:57:55 +01:00
fn is_already_in(backing: &DB, key: &H256) -> bool {
backing.get(&Self::morph_key(key, 0)).expect("Low-level database error. Some issue with your hard disk?").is_some()
}
fn insert_keys(inserts: &Vec<(H256, Bytes)>, backing: &DB, counters: &mut HashMap<H256, i32>, batch: &WriteBatch) {
for &(ref h, ref d) in inserts {
if let Some(c) = counters.get_mut(h) {
// already counting. increment.
*c += 1;
continue;
}
2016-03-06 21:57:55 +01:00
// this is the first entry for this node in the journal.
if backing.get(&h.bytes()).expect("Low-level database error. Some issue with your hard disk?").is_some() {
// already in the backing DB. start counting, and remember it was already in.
Self::set_already_in(batch, &h);
counters.insert(h.clone(), 1);
continue;
}
2016-03-06 21:57:55 +01:00
// Gets removed when a key leaves the journal, so should never be set when we're placing a new key.
//Self::reset_already_in(&h);
assert!(!Self::is_already_in(backing, &h));
2016-03-06 22:05:12 +01:00
batch.put(&h.bytes(), d).expect("Low-level database error. Some issue with your hard disk?");
}
}
2016-03-06 21:57:55 +01:00
fn replay_keys(inserts: &Vec<H256>, backing: &DB, counters: &mut HashMap<H256, i32>) {
for h in inserts {
if let Some(c) = counters.get_mut(h) {
// already counting. increment.
*c += 1;
continue;
}
// this is the first entry for this node in the journal.
// it is initialised to 1 if it was already in.
counters.insert(h.clone(), if Self::is_already_in(backing, h) {1} else {0});
}
}
fn kill_keys(deletes: Vec<H256>, counters: &mut HashMap<H256, i32>, batch: &WriteBatch) {
for h in deletes.into_iter() {
let mut n: Option<i32> = None;
if let Some(c) = counters.get_mut(&h) {
if *c > 1 {
*c -= 1;
continue;
} else {
n = Some(*c);
}
}
match &n {
&Some(i) if i == 1 => {
counters.remove(&h);
Self::reset_already_in(batch, &h);
}
&None => {
// Gets removed when moving from 1 to 0 additional refs. Should never be here at 0 additional refs.
//assert!(!Self::is_already_in(db, &h));
2016-03-06 22:05:12 +01:00
batch.delete(&h.bytes()).expect("Low-level database error. Some issue with your hard disk?");
2016-03-06 21:57:55 +01:00
}
_ => panic!("Invalid value in counters: {:?}", n),
}
}
}
2016-01-18 12:41:31 +01:00
/// Commit all recent insert operations and historical removals from the old era
/// to the backing database.
2016-03-06 21:57:55 +01:00
pub fn commit(&mut self, now: u64, id: &H256, end: Option<(u64, H256)>) -> Result<u32, UtilError> {
2016-01-18 12:41:31 +01:00
// journal format:
// [era, 0] => [ id, [insert_0, ...], [remove_0, ...] ]
// [era, 1] => [ id, [insert_0, ...], [remove_0, ...] ]
// [era, n] => [ ... ]
2016-02-05 01:49:06 +01:00
// TODO: store reclaim_period.
2016-01-18 12:41:31 +01:00
2016-03-06 21:57:55 +01:00
// When we make a new commit, we make a journal of all blocks in the recent history and record
// all keys that were inserted and deleted. The journal is ordered by era; multiple commits can
// share the same era. This forms a data structure similar to a queue but whose items are tuples.
// By the time comes to remove a tuple from the queue (i.e. then the era passes from recent history
// into ancient history) then only one commit from the tuple is considered canonical. This commit
// is kept in the main backing database, whereas any others from the same era are reverted.
//
// It is possible that a key, properly available in the backing database be deleted and re-inserted
// in the recent history queue, yet have both operations in commits that are eventually non-canonical.
// To avoid the original, and still required, key from being deleted, we maintain a reference count
// which includes an original key, if any.
//
// The semantics of the `counter` are:
// insert key k:
// counter already contains k: count += 1
// counter doesn't contain k:
// backing db contains k: count = 1
// backing db doesn't contain k: insert into backing db, count = 0
// delete key k:
// counter contains k (count is asserted to be non-zero):
// count > 1: counter -= 1
// count == 1: remove counter
// count == 0: remove key from backing db
// counter doesn't contain k: remove key from backing db
2016-02-05 22:54:33 +01:00
//
2016-03-06 21:57:55 +01:00
// Practically, this means that for each commit block turning from recent to ancient we do the
// following:
// is_canonical:
// inserts: Ignored (left alone in the backing database).
// deletes: Enacted; however, recent history queue is checked for ongoing references. This is
// reduced as a preference to deletion from the backing database.
// !is_canonical:
// inserts: Reverted; however, recent history queue is checked for ongoing references. This is
// reduced as a preference to deletion from the backing database.
// deletes: Ignored (they were never inserted).
//
2016-01-18 12:41:31 +01:00
// record new commit's details.
2016-03-06 21:57:55 +01:00
let batch = WriteBatch::new();
let mut counters = self.counters.write().unwrap();
2016-01-18 12:41:31 +01:00
{
let mut index = 0usize;
let mut last;
2016-03-06 21:57:55 +01:00
while try!(self.backing.get({
let mut r = RlpStream::new_list(2);
r.append(&now);
r.append(&index);
last = r.drain();
&last
})).is_some() {
2016-01-18 12:41:31 +01:00
index += 1;
}
2016-03-06 21:57:55 +01:00
let drained = self.overlay.drain();
let removes: Vec<H256> = drained
.iter()
.filter_map(|(ref k, &(_, ref c))| if *c < 0 {Some(k.clone())} else {None}).cloned()
.collect();
let inserts: Vec<(H256, Bytes)> = drained
.into_iter()
.filter_map(|(k, (v, r))| if r > 0 { assert!(r == 1); Some((k, v)) } else { assert!(r >= -1); None })
.collect();
2016-01-18 12:41:31 +01:00
let mut r = RlpStream::new_list(3);
r.append(id);
2016-03-06 21:57:55 +01:00
// Process the new inserts.
// We use the inserts for three things. For each:
// - we place into the backing DB or increment the counter if already in;
// - we note in the backing db that it was already in;
// - we write the key into our journal for this block;
r.begin_list(inserts.len());
inserts.iter().foreach(|&(k, _)| {r.append(&k);});
2016-02-04 02:40:35 +01:00
r.append(&removes);
2016-03-06 21:57:55 +01:00
Self::insert_keys(&inserts, &self.backing, &mut counters, &batch);
2016-02-04 21:33:30 +01:00
try!(batch.put(&last, r.as_raw()));
2016-01-18 12:41:31 +01:00
}
// apply old commits' details
2016-01-18 13:30:01 +01:00
if let Some((end_era, canon_id)) = end {
let mut index = 0usize;
let mut last;
while let Some(rlp_data) = try!(self.backing.get({
2016-03-06 21:57:55 +01:00
let mut r = RlpStream::new_list(2);
2016-01-18 13:30:01 +01:00
r.append(&end_era);
r.append(&index);
2016-01-18 15:47:50 +01:00
last = r.drain();
2016-01-18 13:30:01 +01:00
&last
})) {
let rlp = Rlp::new(&rlp_data);
2016-03-06 21:57:55 +01:00
let inserts: Vec<H256> = rlp.val_at(1);
let deletes: Vec<H256> = rlp.val_at(2);
// Collect keys to be removed. These are removed keys for canonical block, inserted for non-canonical
2016-03-06 21:57:55 +01:00
Self::kill_keys(if canon_id == rlp.val_at(0) {deletes} else {inserts}, &mut counters, &batch);
2016-02-04 21:33:30 +01:00
try!(batch.delete(&last));
2016-01-18 13:30:01 +01:00
index += 1;
2016-01-18 12:41:31 +01:00
}
2016-03-06 21:57:55 +01:00
try!(batch.put(&LAST_ERA_KEY, &encode(&end_era)));
trace!("JournalDB: delete journal for time #{}.{}, (canon was {})", end_era, index, canon_id);
2016-01-18 12:41:31 +01:00
}
2016-02-04 21:33:30 +01:00
try!(self.backing.write(batch));
2016-03-06 21:57:55 +01:00
// trace!("JournalDB::commit() deleted {} nodes", deletes);
Ok(0)
2016-02-05 22:54:33 +01:00
}
2016-02-04 02:40:35 +01:00
fn payload(&self, key: &H256) -> Option<Bytes> {
self.backing.get(&key.bytes()).expect("Low-level database error. Some issue with your hard disk?").map(|v| v.to_vec())
2016-01-18 12:41:31 +01:00
}
2016-02-04 21:33:30 +01:00
2016-03-06 21:57:55 +01:00
fn read_counters(db: &DB) -> HashMap<H256, i32> {
let mut counters = HashMap::new();
if let Some(val) = db.get(&LAST_ERA_KEY).expect("Low-level database error.") {
let mut era = decode::<u64>(&val) + 1;
2016-02-04 21:33:30 +01:00
loop {
let mut index = 0usize;
while let Some(rlp_data) = db.get({
2016-03-06 21:57:55 +01:00
let mut r = RlpStream::new_list(2);
2016-02-04 21:33:30 +01:00
r.append(&era);
r.append(&index);
&r.drain()
}).expect("Low-level database error.") {
let rlp = Rlp::new(&rlp_data);
2016-03-06 21:57:55 +01:00
let inserts: Vec<H256> = rlp.val_at(1);
Self::replay_keys(&inserts, db, &mut counters);
2016-02-04 21:33:30 +01:00
index += 1;
};
2016-03-06 21:57:55 +01:00
if index == 0 {
2016-02-04 21:33:30 +01:00
break;
}
2016-03-06 21:57:55 +01:00
era += 1;
2016-02-04 21:33:30 +01:00
}
}
2016-03-06 21:57:55 +01:00
trace!("Recovered {} counters", counters.len());
counters
2016-02-04 21:33:30 +01:00
}
2016-01-18 12:41:31 +01:00
}
impl HashDB for JournalDB {
2016-02-04 02:40:35 +01:00
fn keys(&self) -> HashMap<H256, i32> {
let mut ret: HashMap<H256, i32> = HashMap::new();
2016-03-06 21:57:55 +01:00
for (key, _) in self.backing.iterator(IteratorMode::Start) {
2016-02-04 02:40:35 +01:00
let h = H256::from_slice(key.deref());
ret.insert(h, 1);
}
for (key, refs) in self.overlay.keys().into_iter() {
let refs = *ret.get(&key).unwrap_or(&0) + refs;
ret.insert(key, refs);
}
ret
}
fn lookup(&self, key: &H256) -> Option<&[u8]> {
let k = self.overlay.raw(key);
match k {
Some(&(ref d, rc)) if rc > 0 => Some(d),
_ => {
if let Some(x) = self.payload(key) {
Some(&self.overlay.denote(key, x).0)
}
else {
None
}
}
}
}
fn exists(&self, key: &H256) -> bool {
self.lookup(key).is_some()
}
fn insert(&mut self, value: &[u8]) -> H256 {
2016-02-04 21:33:30 +01:00
self.overlay.insert(value)
2016-02-04 02:40:35 +01:00
}
fn emplace(&mut self, key: H256, value: Bytes) {
self.overlay.emplace(key, value);
}
fn kill(&mut self, key: &H256) {
2016-02-04 21:33:30 +01:00
self.overlay.kill(key);
}
2016-01-18 12:41:31 +01:00
}
2016-01-18 13:30:01 +01:00
#[cfg(test)]
mod tests {
use common::*;
use super::*;
use hashdb::*;
2016-03-06 21:57:55 +01:00
#[test]
fn insert_same_in_fork() {
// history is 1
let mut jdb = JournalDB::new_temp();
let x = jdb.insert(b"X");
jdb.commit(1, &b"1".sha3(), None).unwrap();
jdb.commit(2, &b"2".sha3(), None).unwrap();
jdb.commit(3, &b"1002a".sha3(), Some((1, b"1".sha3()))).unwrap();
jdb.commit(4, &b"1003a".sha3(), Some((2, b"2".sha3()))).unwrap();
jdb.remove(&x);
jdb.commit(3, &b"1002b".sha3(), Some((1, b"1".sha3()))).unwrap();
let x = jdb.insert(b"X");
jdb.commit(4, &b"1003b".sha3(), Some((2, b"2".sha3()))).unwrap();
jdb.commit(5, &b"1004a".sha3(), Some((3, b"1002a".sha3()))).unwrap();
jdb.commit(6, &b"1005a".sha3(), Some((4, b"1003a".sha3()))).unwrap();
assert!(jdb.exists(&x));
}
2016-01-18 13:30:01 +01:00
#[test]
fn long_history() {
// history is 3
let mut jdb = JournalDB::new_temp();
let h = jdb.insert(b"foo");
jdb.commit(0, &b"0".sha3(), None).unwrap();
assert!(jdb.exists(&h));
jdb.remove(&h);
jdb.commit(1, &b"1".sha3(), None).unwrap();
assert!(jdb.exists(&h));
jdb.commit(2, &b"2".sha3(), None).unwrap();
assert!(jdb.exists(&h));
2016-01-18 23:50:40 +01:00
jdb.commit(3, &b"3".sha3(), Some((0, b"0".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(jdb.exists(&h));
2016-01-18 23:50:40 +01:00
jdb.commit(4, &b"4".sha3(), Some((1, b"1".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(!jdb.exists(&h));
}
#[test]
fn complex() {
// history is 1
let mut jdb = JournalDB::new_temp();
let foo = jdb.insert(b"foo");
let bar = jdb.insert(b"bar");
jdb.commit(0, &b"0".sha3(), None).unwrap();
assert!(jdb.exists(&foo));
assert!(jdb.exists(&bar));
jdb.remove(&foo);
jdb.remove(&bar);
let baz = jdb.insert(b"baz");
2016-01-18 23:50:40 +01:00
jdb.commit(1, &b"1".sha3(), Some((0, b"0".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(jdb.exists(&foo));
assert!(jdb.exists(&bar));
assert!(jdb.exists(&baz));
let foo = jdb.insert(b"foo");
jdb.remove(&baz);
2016-01-18 23:50:40 +01:00
jdb.commit(2, &b"2".sha3(), Some((1, b"1".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(jdb.exists(&foo));
assert!(!jdb.exists(&bar));
assert!(jdb.exists(&baz));
jdb.remove(&foo);
2016-01-18 23:50:40 +01:00
jdb.commit(3, &b"3".sha3(), Some((2, b"2".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(jdb.exists(&foo));
assert!(!jdb.exists(&bar));
assert!(!jdb.exists(&baz));
2016-01-18 23:50:40 +01:00
jdb.commit(4, &b"4".sha3(), Some((3, b"3".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(!jdb.exists(&foo));
assert!(!jdb.exists(&bar));
assert!(!jdb.exists(&baz));
}
#[test]
fn fork() {
// history is 1
let mut jdb = JournalDB::new_temp();
let foo = jdb.insert(b"foo");
let bar = jdb.insert(b"bar");
jdb.commit(0, &b"0".sha3(), None).unwrap();
assert!(jdb.exists(&foo));
assert!(jdb.exists(&bar));
jdb.remove(&foo);
let baz = jdb.insert(b"baz");
2016-01-18 23:50:40 +01:00
jdb.commit(1, &b"1a".sha3(), Some((0, b"0".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
jdb.remove(&bar);
2016-01-18 23:50:40 +01:00
jdb.commit(1, &b"1b".sha3(), Some((0, b"0".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(jdb.exists(&foo));
assert!(jdb.exists(&bar));
assert!(jdb.exists(&baz));
2016-01-18 23:50:40 +01:00
jdb.commit(2, &b"2b".sha3(), Some((1, b"1b".sha3()))).unwrap();
2016-01-18 13:30:01 +01:00
assert!(jdb.exists(&foo));
assert!(!jdb.exists(&baz));
assert!(!jdb.exists(&bar));
}
2016-02-04 21:33:30 +01:00
#[test]
fn overwrite() {
// history is 1
let mut jdb = JournalDB::new_temp();
let foo = jdb.insert(b"foo");
jdb.commit(0, &b"0".sha3(), None).unwrap();
assert!(jdb.exists(&foo));
jdb.remove(&foo);
jdb.commit(1, &b"1".sha3(), Some((0, b"0".sha3()))).unwrap();
jdb.insert(b"foo");
assert!(jdb.exists(&foo));
jdb.commit(2, &b"2".sha3(), Some((1, b"1".sha3()))).unwrap();
assert!(jdb.exists(&foo));
jdb.commit(3, &b"2".sha3(), Some((0, b"2".sha3()))).unwrap();
assert!(jdb.exists(&foo));
}
2016-02-05 22:54:33 +01:00
#[test]
fn fork_same_key() {
// history is 1
let mut jdb = JournalDB::new_temp();
jdb.commit(0, &b"0".sha3(), None).unwrap();
let foo = jdb.insert(b"foo");
jdb.commit(1, &b"1a".sha3(), Some((0, b"0".sha3()))).unwrap();
jdb.insert(b"foo");
jdb.commit(1, &b"1b".sha3(), Some((0, b"0".sha3()))).unwrap();
assert!(jdb.exists(&foo));
jdb.commit(2, &b"2a".sha3(), Some((1, b"1a".sha3()))).unwrap();
assert!(jdb.exists(&foo));
}
2016-01-18 13:30:01 +01:00
}