Shared code cache (#2921)

* add a global code cache

* extract memory-limited lru cache to util

* use memory-limited code cache

* account for code cache size in mem_used
This commit is contained in:
Robert Habermeier 2016-10-28 16:04:44 +02:00 committed by Gav Wood
parent 956a059a06
commit 29ab4ecac1
9 changed files with 180 additions and 61 deletions

9
Cargo.lock generated
View File

@ -298,7 +298,7 @@ dependencies = [
"hyper 0.9.4 (git+https://github.com/ethcore/hyper)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"lru-cache 0.0.7 (git+https://github.com/contain-rs/lru-cache)",
"lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -555,6 +555,7 @@ dependencies = [
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.68 (registry+https://github.com/rust-lang/crates.io-index)",
@ -907,8 +908,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lru-cache"
version = "0.0.7"
source = "git+https://github.com/contain-rs/lru-cache#13255e33c45ceb69a4b143f235a4322df5fb580e"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"linked-hash-map 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -1987,7 +1988,7 @@ dependencies = [
"checksum linked-hash-map 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bda158e0dabeb97ee8a401f4d17e479d6b891a14de0bba79d5cc2d4d325b5e48"
"checksum linked-hash-map 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6d262045c5b87c0861b3f004610afd0e2c851e2908d08b6c870cbb9d5f494ecd"
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
"checksum lru-cache 0.0.7 (git+https://github.com/contain-rs/lru-cache)" = "<none>"
"checksum lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "656fa4dfcb02bcf1063c592ba3ff6a5303ee1f2afe98c8a889e8b1a77c6dfdb7"
"checksum matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15305656809ce5a4805b1ff2946892810992197ce1270ff79baded852187942e"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
"checksum mime 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a74cc2587bf97c49f3f5bab62860d6abf3902ca73b66b51d9b049fbdcd727bd2"

View File

@ -27,7 +27,6 @@ time = "0.1"
rand = "0.3"
byteorder = "0.5"
transient-hashmap = "0.1"
lru-cache = { git = "https://github.com/contain-rs/lru-cache" }
evmjit = { path = "../evmjit", optional = true }
clippy = { version = "0.0.96", optional = true}
ethash = { path = "../ethash" }
@ -40,6 +39,7 @@ ethstore = { path = "../ethstore" }
ethkey = { path = "../ethkey" }
ethcore-ipc-nano = { path = "../ipc/nano" }
rlp = { path = "../util/rlp" }
lru-cache = "0.1.0"
ethcore-bloom-journal = { path = "../util/bloom" }
[dependencies.hyper]

View File

@ -15,20 +15,27 @@
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
use std::sync::Arc;
use lru_cache::LruCache;
use util::{H256, Mutex};
use util::{H256, HeapSizeOf, Mutex};
use util::sha3::*;
use util::cache::MemoryLruCache;
use bit_set::BitSet;
use super::super::instructions;
const INITIAL_CAPACITY: usize = 32;
const DEFAULT_CACHE_SIZE: usize = 4 * 1024 * 1024;
// stub for a HeapSizeOf implementation.
struct Bits(Arc<BitSet>);
impl HeapSizeOf for Bits {
fn heap_size_of_children(&self) -> usize {
// dealing in bits here
self.0.capacity() * 8
}
}
/// Global cache for EVM interpreter
pub struct SharedCache {
jump_destinations: Mutex<LruCache<H256, Arc<BitSet>>>,
max_size: usize,
cur_size: Mutex<usize>,
jump_destinations: Mutex<MemoryLruCache<H256, Bits>>,
}
impl SharedCache {
@ -36,9 +43,7 @@ impl SharedCache {
/// to cache.
pub fn new(max_size: usize) -> Self {
SharedCache {
jump_destinations: Mutex::new(LruCache::new(INITIAL_CAPACITY)),
max_size: max_size * 8, // dealing with bits here.
cur_size: Mutex::new(0),
jump_destinations: Mutex::new(MemoryLruCache::new(max_size)),
}
}
@ -49,37 +54,11 @@ impl SharedCache {
}
if let Some(d) = self.jump_destinations.lock().get_mut(code_hash) {
return d.clone();
return d.0.clone();
}
let d = Self::find_jump_destinations(code);
{
let mut cur_size = self.cur_size.lock();
*cur_size += d.capacity();
let mut jump_dests = self.jump_destinations.lock();
let cap = jump_dests.capacity();
// grow the cache as necessary; it operates on amount of items
// but we're working based on memory usage.
if jump_dests.len() == cap && *cur_size < self.max_size {
jump_dests.set_capacity(cap * 2);
}
// account for any element displaced from the cache.
if let Some(lru) = jump_dests.insert(code_hash.clone(), d.clone()) {
*cur_size -= lru.capacity();
}
// remove elements until we are below the memory target.
while *cur_size > self.max_size {
match jump_dests.remove_lru() {
Some((_, v)) => *cur_size -= v.capacity(),
_ => break,
}
}
}
self.jump_destinations.lock().insert(code_hash.clone(), Bits(d.clone()));
d
}

View File

@ -247,23 +247,34 @@ impl Account {
}
/// Provide a database to get `code_hash`. Should not be called if it is a contract without code.
pub fn cache_code(&mut self, db: &HashDB) -> bool {
pub fn cache_code(&mut self, db: &HashDB) -> Option<Arc<Bytes>> {
// TODO: fill out self.code_cache;
trace!("Account::cache_code: ic={}; self.code_hash={:?}, self.code_cache={}", self.is_cached(), self.code_hash, self.code_cache.pretty());
self.is_cached() ||
if self.is_cached() { return Some(self.code_cache.clone()) }
match db.get(&self.code_hash) {
Some(x) => {
self.code_size = Some(x.len());
self.code_cache = Arc::new(x.to_vec());
true
Some(self.code_cache.clone())
},
_ => {
warn!("Failed reverse get of {}", self.code_hash);
false
None
},
}
}
/// Provide code to cache. For correctness, should be the correct code for the
/// account.
pub fn cache_given_code(&mut self, code: Arc<Bytes>) {
trace!("Account::cache_given_code: ic={}; self.code_hash={:?}, self.code_cache={}", self.is_cached(), self.code_hash, self.code_cache.pretty());
self.code_size = Some(code.len());
self.code_cache = code;
}
/// Provide a database to get `code_size`. Should not be called if it is a contract without code.
pub fn cache_code_size(&mut self, db: &HashDB) -> bool {
// TODO: fill out self.code_cache;
@ -476,7 +487,7 @@ mod tests {
};
let mut a = Account::from_rlp(&rlp);
assert!(a.cache_code(&db.immutable()));
assert!(a.cache_code(&db.immutable()).is_some());
let mut a = Account::from_rlp(&rlp);
assert_eq!(a.note_code(vec![0x55, 0x44, 0xffu8]), Ok(()));

View File

@ -599,14 +599,30 @@ impl State {
pod_state::diff_pod(&state_pre.to_pod(), &pod_state_post)
}
fn update_account_cache(require: RequireCache, account: &mut Account, db: &HashDB) {
match require {
RequireCache::None => {},
RequireCache::Code => {
account.cache_code(db);
}
RequireCache::CodeSize => {
account.cache_code_size(db);
// load required account data from the databases.
fn update_account_cache(require: RequireCache, account: &mut Account, state_db: &StateDB, db: &HashDB) {
match (account.is_cached(), require) {
(true, _) | (false, RequireCache::None) => {}
(false, require) => {
// if there's already code in the global cache, always cache it
// locally.
let hash = account.code_hash();
match state_db.get_cached_code(&hash) {
Some(code) => account.cache_given_code(code),
None => match require {
RequireCache::None => {},
RequireCache::Code => {
if let Some(code) = account.cache_code(db) {
// propagate code loaded from the database to
// the global code cache.
state_db.cache_code(hash, code)
}
}
RequireCache::CodeSize => {
account.cache_code_size(db);
}
}
}
}
}
}
@ -620,7 +636,7 @@ impl State {
if let Some(ref mut maybe_acc) = self.cache.borrow_mut().get_mut(a) {
if let Some(ref mut account) = maybe_acc.account {
let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a));
Self::update_account_cache(require, account, accountdb.as_hashdb());
Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb());
return f(Some(account));
}
return f(None);
@ -629,7 +645,7 @@ impl State {
let result = self.db.get_cached(a, |mut acc| {
if let Some(ref mut account) = acc {
let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a));
Self::update_account_cache(require, account, accountdb.as_hashdb());
Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb());
}
f(acc.map(|a| &*a))
});
@ -647,7 +663,7 @@ impl State {
};
if let Some(ref mut account) = maybe_acc.as_mut() {
let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a));
Self::update_account_cache(require, account, accountdb.as_hashdb());
Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb());
}
let r = f(maybe_acc.as_ref());
self.insert_cache(a, AccountEntry::new_clean(maybe_acc));
@ -703,7 +719,7 @@ impl State {
if require_code {
let addr_hash = account.address_hash(a);
let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), addr_hash);
account.cache_code(accountdb.as_hashdb());
Self::update_account_cache(RequireCache::Code, account, &self.db, accountdb.as_hashdb());
}
account
},

View File

@ -16,6 +16,7 @@
use std::collections::{VecDeque, HashSet};
use lru_cache::LruCache;
use util::cache::MemoryLruCache;
use util::journaldb::JournalDB;
use util::hash::{H256};
use util::hashdb::HashDB;
@ -33,12 +34,17 @@ pub const ACCOUNT_BLOOM_HASHCOUNT_KEY: &'static [u8] = b"account_hash_count";
const STATE_CACHE_BLOCKS: usize = 12;
// The percentage of supplied cache size to go to accounts.
const ACCOUNT_CACHE_RATIO: usize = 90;
/// Shared canonical state cache.
struct AccountCache {
/// DB Account cache. `None` indicates that account is known to be missing.
// When changing the type of the values here, be sure to update `mem_used` and
// `new`.
accounts: LruCache<Address, Option<Account>>,
/// DB Code cache. Maps code hashes to shared bytes.
code: MemoryLruCache<H256, Arc<Vec<u8>>>,
/// Information on the modifications in recently committed blocks; specifically which addresses
/// changed in which block. Ordered by block number.
modifications: VecDeque<BlockChanges>,
@ -111,12 +117,15 @@ impl StateDB {
// into the `AccountCache` structure as its own `LruCache<(Address, H256), H256>`.
pub fn new(db: Box<JournalDB>, cache_size: usize) -> StateDB {
let bloom = Self::load_bloom(db.backing());
let cache_items = cache_size / ::std::mem::size_of::<Option<Account>>();
let acc_cache_size = cache_size * ACCOUNT_CACHE_RATIO / 100;
let code_cache_size = cache_size - acc_cache_size;
let cache_items = acc_cache_size / ::std::mem::size_of::<Option<Account>>();
StateDB {
db: db,
account_cache: Arc::new(Mutex::new(AccountCache {
accounts: LruCache::new(cache_items),
code: MemoryLruCache::new(code_cache_size),
modifications: VecDeque::new(),
})),
local_cache: Vec::new(),
@ -342,7 +351,12 @@ impl StateDB {
/// Heap size used.
pub fn mem_used(&self) -> usize {
// TODO: account for LRU-cache overhead; this is a close approximation.
self.db.mem_used() + self.account_cache.lock().accounts.len() * ::std::mem::size_of::<Option<Account>>()
self.db.mem_used() + {
let cache = self.account_cache.lock();
cache.code.current_size() +
cache.accounts.len() * ::std::mem::size_of::<Option<Account>>()
}
}
/// Returns underlying `JournalDB`.
@ -362,6 +376,15 @@ impl StateDB {
})
}
/// Add a global code cache entry. This doesn't need to worry about canonicality because
/// it simply maps hashes to raw code and will always be correct in the absence of
/// hash collisions.
pub fn cache_code(&self, hash: H256, code: Arc<Vec<u8>>) {
let mut cache = self.account_cache.lock();
cache.code.insert(hash, code);
}
/// Get basic copy of the cached account. Does not include storage.
/// Returns 'None' if cache is disabled or if the account is not cached.
pub fn get_cached_account(&self, addr: &Address) -> Option<Option<Account>> {
@ -372,6 +395,13 @@ impl StateDB {
cache.accounts.get_mut(addr).map(|a| a.as_ref().map(|a| a.clone_basic()))
}
/// Get cached code based on hash.
pub fn get_cached_code(&self, hash: &H256) -> Option<Arc<Vec<u8>>> {
let mut cache = self.account_cache.lock();
cache.code.get_mut(hash).map(|code| code.clone())
}
/// Get value from a cached account.
/// Returns 'None' if cache is disabled or if the account is not cached.
pub fn get_cached<F, U>(&self, a: &Address, f: F) -> Option<U>

View File

@ -36,6 +36,7 @@ ansi_term = "0.7"
tiny-keccak= "1.0"
ethcore-bloom-journal = { path = "bloom" }
regex = "0.1"
lru-cache = "0.1.0"
[features]
default = []

79
util/src/cache.rs Normal file
View File

@ -0,0 +1,79 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! Lru-cache related utilities as quick-and-dirty wrappers around the lru-cache
//! crate.
// TODO: push changes upstream in a clean way.
use heapsize::HeapSizeOf;
use lru_cache::LruCache;
use std::hash::Hash;
const INITIAL_CAPACITY: usize = 4;
/// An LRU-cache which operates on memory used.
pub struct MemoryLruCache<K: Eq + Hash, V: HeapSizeOf> {
inner: LruCache<K, V>,
cur_size: usize,
max_size: usize,
}
impl<K: Eq + Hash, V: HeapSizeOf> MemoryLruCache<K, V> {
/// Create a new cache with a maximum size in bytes.
pub fn new(max_size: usize) -> Self {
MemoryLruCache {
inner: LruCache::new(INITIAL_CAPACITY),
max_size: max_size,
cur_size: 0,
}
}
/// Insert an item.
pub fn insert(&mut self, key: K, val: V) {
let cap = self.inner.capacity();
// grow the cache as necessary; it operates on amount of items
// but we're working based on memory usage.
if self.inner.len() == cap && self.cur_size < self.max_size {
self.inner.set_capacity(cap * 2);
}
// account for any element displaced from the cache.
if let Some(lru) = self.inner.insert(key, val) {
self.cur_size -= lru.heap_size_of_children();
}
// remove elements until we are below the memory target.
while self.cur_size > self.max_size {
match self.inner.remove_lru() {
Some((_, v)) => self.cur_size -= v.heap_size_of_children(),
_ => break,
}
}
}
/// Get a reference to an item in the cache. It is a logic error for its
/// heap size to be altered while borrowed.
pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
self.inner.get_mut(key)
}
/// Currently-used size of values in bytes.
pub fn current_size(&self) -> usize {
self.cur_size
}
}

View File

@ -105,6 +105,7 @@ extern crate ansi_term;
extern crate tiny_keccak;
extern crate rlp;
extern crate regex;
extern crate lru_cache;
#[macro_use]
extern crate heapsize;
@ -143,6 +144,7 @@ pub mod semantic_version;
pub mod log;
pub mod path;
pub mod snappy;
pub mod cache;
mod timer;
pub use common::*;