From 29ab4ecac1e78d90980a56f39116c46f40f9a473 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 28 Oct 2016 16:04:44 +0200 Subject: [PATCH] Shared code cache (#2921) * add a global code cache * extract memory-limited lru cache to util * use memory-limited code cache * account for code cache size in mem_used --- Cargo.lock | 9 +-- ethcore/Cargo.toml | 2 +- ethcore/src/evm/interpreter/shared_cache.rs | 53 +++++--------- ethcore/src/state/account.rs | 21 ++++-- ethcore/src/state/mod.rs | 40 +++++++---- ethcore/src/state_db.rs | 34 ++++++++- util/Cargo.toml | 1 + util/src/cache.rs | 79 +++++++++++++++++++++ util/src/lib.rs | 2 + 9 files changed, 180 insertions(+), 61 deletions(-) create mode 100644 util/src/cache.rs diff --git a/Cargo.lock b/Cargo.lock index f7c08953e..26fab3d49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,7 +298,7 @@ dependencies = [ "hyper 0.9.4 (git+https://github.com/ethcore/hyper)", "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "lru-cache 0.0.7 (git+https://github.com/contain-rs/lru-cache)", + "lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -555,6 +555,7 @@ dependencies = [ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.68 (registry+https://github.com/rust-lang/crates.io-index)", @@ -907,8 +908,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "lru-cache" -version = "0.0.7" -source = "git+https://github.com/contain-rs/lru-cache#13255e33c45ceb69a4b143f235a4322df5fb580e" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "linked-hash-map 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1987,7 +1988,7 @@ dependencies = [ "checksum linked-hash-map 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bda158e0dabeb97ee8a401f4d17e479d6b891a14de0bba79d5cc2d4d325b5e48" "checksum linked-hash-map 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6d262045c5b87c0861b3f004610afd0e2c851e2908d08b6c870cbb9d5f494ecd" "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" -"checksum lru-cache 0.0.7 (git+https://github.com/contain-rs/lru-cache)" = "" +"checksum lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "656fa4dfcb02bcf1063c592ba3ff6a5303ee1f2afe98c8a889e8b1a77c6dfdb7" "checksum matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15305656809ce5a4805b1ff2946892810992197ce1270ff79baded852187942e" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum mime 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a74cc2587bf97c49f3f5bab62860d6abf3902ca73b66b51d9b049fbdcd727bd2" diff --git a/ethcore/Cargo.toml b/ethcore/Cargo.toml index d48e76e94..1f8413339 100644 --- a/ethcore/Cargo.toml +++ b/ethcore/Cargo.toml @@ -27,7 +27,6 @@ time = "0.1" rand = "0.3" byteorder = "0.5" transient-hashmap = "0.1" -lru-cache = { git = "https://github.com/contain-rs/lru-cache" } evmjit = { path = "../evmjit", optional = true } clippy = { version = "0.0.96", optional = true} ethash = { path = "../ethash" } @@ -40,6 +39,7 @@ ethstore = { path = "../ethstore" } ethkey = { path = "../ethkey" } ethcore-ipc-nano = { path = "../ipc/nano" } rlp = { path = "../util/rlp" } +lru-cache = "0.1.0" ethcore-bloom-journal = { path = "../util/bloom" } [dependencies.hyper] diff --git a/ethcore/src/evm/interpreter/shared_cache.rs b/ethcore/src/evm/interpreter/shared_cache.rs index dee557522..cacc4dde3 100644 --- a/ethcore/src/evm/interpreter/shared_cache.rs +++ b/ethcore/src/evm/interpreter/shared_cache.rs @@ -15,20 +15,27 @@ // along with Parity. If not, see . use std::sync::Arc; -use lru_cache::LruCache; -use util::{H256, Mutex}; +use util::{H256, HeapSizeOf, Mutex}; use util::sha3::*; +use util::cache::MemoryLruCache; use bit_set::BitSet; use super::super::instructions; -const INITIAL_CAPACITY: usize = 32; const DEFAULT_CACHE_SIZE: usize = 4 * 1024 * 1024; +// stub for a HeapSizeOf implementation. +struct Bits(Arc); + +impl HeapSizeOf for Bits { + fn heap_size_of_children(&self) -> usize { + // dealing in bits here + self.0.capacity() * 8 + } +} + /// Global cache for EVM interpreter pub struct SharedCache { - jump_destinations: Mutex>>, - max_size: usize, - cur_size: Mutex, + jump_destinations: Mutex>, } impl SharedCache { @@ -36,9 +43,7 @@ impl SharedCache { /// to cache. pub fn new(max_size: usize) -> Self { SharedCache { - jump_destinations: Mutex::new(LruCache::new(INITIAL_CAPACITY)), - max_size: max_size * 8, // dealing with bits here. - cur_size: Mutex::new(0), + jump_destinations: Mutex::new(MemoryLruCache::new(max_size)), } } @@ -49,37 +54,11 @@ impl SharedCache { } if let Some(d) = self.jump_destinations.lock().get_mut(code_hash) { - return d.clone(); + return d.0.clone(); } let d = Self::find_jump_destinations(code); - - { - let mut cur_size = self.cur_size.lock(); - *cur_size += d.capacity(); - - let mut jump_dests = self.jump_destinations.lock(); - let cap = jump_dests.capacity(); - - // grow the cache as necessary; it operates on amount of items - // but we're working based on memory usage. - if jump_dests.len() == cap && *cur_size < self.max_size { - jump_dests.set_capacity(cap * 2); - } - - // account for any element displaced from the cache. - if let Some(lru) = jump_dests.insert(code_hash.clone(), d.clone()) { - *cur_size -= lru.capacity(); - } - - // remove elements until we are below the memory target. - while *cur_size > self.max_size { - match jump_dests.remove_lru() { - Some((_, v)) => *cur_size -= v.capacity(), - _ => break, - } - } - } + self.jump_destinations.lock().insert(code_hash.clone(), Bits(d.clone())); d } diff --git a/ethcore/src/state/account.rs b/ethcore/src/state/account.rs index 2bd8a2d15..d8d281b17 100644 --- a/ethcore/src/state/account.rs +++ b/ethcore/src/state/account.rs @@ -247,23 +247,34 @@ impl Account { } /// Provide a database to get `code_hash`. Should not be called if it is a contract without code. - pub fn cache_code(&mut self, db: &HashDB) -> bool { + pub fn cache_code(&mut self, db: &HashDB) -> Option> { // TODO: fill out self.code_cache; trace!("Account::cache_code: ic={}; self.code_hash={:?}, self.code_cache={}", self.is_cached(), self.code_hash, self.code_cache.pretty()); - self.is_cached() || + + if self.is_cached() { return Some(self.code_cache.clone()) } + match db.get(&self.code_hash) { Some(x) => { self.code_size = Some(x.len()); self.code_cache = Arc::new(x.to_vec()); - true + Some(self.code_cache.clone()) }, _ => { warn!("Failed reverse get of {}", self.code_hash); - false + None }, } } + /// Provide code to cache. For correctness, should be the correct code for the + /// account. + pub fn cache_given_code(&mut self, code: Arc) { + trace!("Account::cache_given_code: ic={}; self.code_hash={:?}, self.code_cache={}", self.is_cached(), self.code_hash, self.code_cache.pretty()); + + self.code_size = Some(code.len()); + self.code_cache = code; + } + /// Provide a database to get `code_size`. Should not be called if it is a contract without code. pub fn cache_code_size(&mut self, db: &HashDB) -> bool { // TODO: fill out self.code_cache; @@ -476,7 +487,7 @@ mod tests { }; let mut a = Account::from_rlp(&rlp); - assert!(a.cache_code(&db.immutable())); + assert!(a.cache_code(&db.immutable()).is_some()); let mut a = Account::from_rlp(&rlp); assert_eq!(a.note_code(vec![0x55, 0x44, 0xffu8]), Ok(())); diff --git a/ethcore/src/state/mod.rs b/ethcore/src/state/mod.rs index bef20d257..7c0f43d97 100644 --- a/ethcore/src/state/mod.rs +++ b/ethcore/src/state/mod.rs @@ -599,14 +599,30 @@ impl State { pod_state::diff_pod(&state_pre.to_pod(), &pod_state_post) } - fn update_account_cache(require: RequireCache, account: &mut Account, db: &HashDB) { - match require { - RequireCache::None => {}, - RequireCache::Code => { - account.cache_code(db); - } - RequireCache::CodeSize => { - account.cache_code_size(db); + // load required account data from the databases. + fn update_account_cache(require: RequireCache, account: &mut Account, state_db: &StateDB, db: &HashDB) { + match (account.is_cached(), require) { + (true, _) | (false, RequireCache::None) => {} + (false, require) => { + // if there's already code in the global cache, always cache it + // locally. + let hash = account.code_hash(); + match state_db.get_cached_code(&hash) { + Some(code) => account.cache_given_code(code), + None => match require { + RequireCache::None => {}, + RequireCache::Code => { + if let Some(code) = account.cache_code(db) { + // propagate code loaded from the database to + // the global code cache. + state_db.cache_code(hash, code) + } + } + RequireCache::CodeSize => { + account.cache_code_size(db); + } + } + } } } } @@ -620,7 +636,7 @@ impl State { if let Some(ref mut maybe_acc) = self.cache.borrow_mut().get_mut(a) { if let Some(ref mut account) = maybe_acc.account { let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a)); - Self::update_account_cache(require, account, accountdb.as_hashdb()); + Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb()); return f(Some(account)); } return f(None); @@ -629,7 +645,7 @@ impl State { let result = self.db.get_cached(a, |mut acc| { if let Some(ref mut account) = acc { let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a)); - Self::update_account_cache(require, account, accountdb.as_hashdb()); + Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb()); } f(acc.map(|a| &*a)) }); @@ -647,7 +663,7 @@ impl State { }; if let Some(ref mut account) = maybe_acc.as_mut() { let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a)); - Self::update_account_cache(require, account, accountdb.as_hashdb()); + Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb()); } let r = f(maybe_acc.as_ref()); self.insert_cache(a, AccountEntry::new_clean(maybe_acc)); @@ -703,7 +719,7 @@ impl State { if require_code { let addr_hash = account.address_hash(a); let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), addr_hash); - account.cache_code(accountdb.as_hashdb()); + Self::update_account_cache(RequireCache::Code, account, &self.db, accountdb.as_hashdb()); } account }, diff --git a/ethcore/src/state_db.rs b/ethcore/src/state_db.rs index dfa65ab1d..affc0b405 100644 --- a/ethcore/src/state_db.rs +++ b/ethcore/src/state_db.rs @@ -16,6 +16,7 @@ use std::collections::{VecDeque, HashSet}; use lru_cache::LruCache; +use util::cache::MemoryLruCache; use util::journaldb::JournalDB; use util::hash::{H256}; use util::hashdb::HashDB; @@ -33,12 +34,17 @@ pub const ACCOUNT_BLOOM_HASHCOUNT_KEY: &'static [u8] = b"account_hash_count"; const STATE_CACHE_BLOCKS: usize = 12; +// The percentage of supplied cache size to go to accounts. +const ACCOUNT_CACHE_RATIO: usize = 90; + /// Shared canonical state cache. struct AccountCache { /// DB Account cache. `None` indicates that account is known to be missing. // When changing the type of the values here, be sure to update `mem_used` and // `new`. accounts: LruCache>, + /// DB Code cache. Maps code hashes to shared bytes. + code: MemoryLruCache>>, /// Information on the modifications in recently committed blocks; specifically which addresses /// changed in which block. Ordered by block number. modifications: VecDeque, @@ -111,12 +117,15 @@ impl StateDB { // into the `AccountCache` structure as its own `LruCache<(Address, H256), H256>`. pub fn new(db: Box, cache_size: usize) -> StateDB { let bloom = Self::load_bloom(db.backing()); - let cache_items = cache_size / ::std::mem::size_of::>(); + let acc_cache_size = cache_size * ACCOUNT_CACHE_RATIO / 100; + let code_cache_size = cache_size - acc_cache_size; + let cache_items = acc_cache_size / ::std::mem::size_of::>(); StateDB { db: db, account_cache: Arc::new(Mutex::new(AccountCache { accounts: LruCache::new(cache_items), + code: MemoryLruCache::new(code_cache_size), modifications: VecDeque::new(), })), local_cache: Vec::new(), @@ -342,7 +351,12 @@ impl StateDB { /// Heap size used. pub fn mem_used(&self) -> usize { // TODO: account for LRU-cache overhead; this is a close approximation. - self.db.mem_used() + self.account_cache.lock().accounts.len() * ::std::mem::size_of::>() + self.db.mem_used() + { + let cache = self.account_cache.lock(); + + cache.code.current_size() + + cache.accounts.len() * ::std::mem::size_of::>() + } } /// Returns underlying `JournalDB`. @@ -362,6 +376,15 @@ impl StateDB { }) } + /// Add a global code cache entry. This doesn't need to worry about canonicality because + /// it simply maps hashes to raw code and will always be correct in the absence of + /// hash collisions. + pub fn cache_code(&self, hash: H256, code: Arc>) { + let mut cache = self.account_cache.lock(); + + cache.code.insert(hash, code); + } + /// Get basic copy of the cached account. Does not include storage. /// Returns 'None' if cache is disabled or if the account is not cached. pub fn get_cached_account(&self, addr: &Address) -> Option> { @@ -372,6 +395,13 @@ impl StateDB { cache.accounts.get_mut(addr).map(|a| a.as_ref().map(|a| a.clone_basic())) } + /// Get cached code based on hash. + pub fn get_cached_code(&self, hash: &H256) -> Option>> { + let mut cache = self.account_cache.lock(); + + cache.code.get_mut(hash).map(|code| code.clone()) + } + /// Get value from a cached account. /// Returns 'None' if cache is disabled or if the account is not cached. pub fn get_cached(&self, a: &Address, f: F) -> Option diff --git a/util/Cargo.toml b/util/Cargo.toml index 1b6939595..78cca92e0 100644 --- a/util/Cargo.toml +++ b/util/Cargo.toml @@ -36,6 +36,7 @@ ansi_term = "0.7" tiny-keccak= "1.0" ethcore-bloom-journal = { path = "bloom" } regex = "0.1" +lru-cache = "0.1.0" [features] default = [] diff --git a/util/src/cache.rs b/util/src/cache.rs new file mode 100644 index 000000000..2b2c50c8b --- /dev/null +++ b/util/src/cache.rs @@ -0,0 +1,79 @@ +// Copyright 2015, 2016 Ethcore (UK) Ltd. +// This file is part of Parity. + +// Parity is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity. If not, see . + +//! Lru-cache related utilities as quick-and-dirty wrappers around the lru-cache +//! crate. +// TODO: push changes upstream in a clean way. + +use heapsize::HeapSizeOf; +use lru_cache::LruCache; + +use std::hash::Hash; + +const INITIAL_CAPACITY: usize = 4; + +/// An LRU-cache which operates on memory used. +pub struct MemoryLruCache { + inner: LruCache, + cur_size: usize, + max_size: usize, +} + +impl MemoryLruCache { + /// Create a new cache with a maximum size in bytes. + pub fn new(max_size: usize) -> Self { + MemoryLruCache { + inner: LruCache::new(INITIAL_CAPACITY), + max_size: max_size, + cur_size: 0, + } + } + + /// Insert an item. + pub fn insert(&mut self, key: K, val: V) { + let cap = self.inner.capacity(); + + // grow the cache as necessary; it operates on amount of items + // but we're working based on memory usage. + if self.inner.len() == cap && self.cur_size < self.max_size { + self.inner.set_capacity(cap * 2); + } + + // account for any element displaced from the cache. + if let Some(lru) = self.inner.insert(key, val) { + self.cur_size -= lru.heap_size_of_children(); + } + + // remove elements until we are below the memory target. + while self.cur_size > self.max_size { + match self.inner.remove_lru() { + Some((_, v)) => self.cur_size -= v.heap_size_of_children(), + _ => break, + } + } + } + + /// Get a reference to an item in the cache. It is a logic error for its + /// heap size to be altered while borrowed. + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + self.inner.get_mut(key) + } + + /// Currently-used size of values in bytes. + pub fn current_size(&self) -> usize { + self.cur_size + } +} \ No newline at end of file diff --git a/util/src/lib.rs b/util/src/lib.rs index e362459a6..f5558bcfc 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -105,6 +105,7 @@ extern crate ansi_term; extern crate tiny_keccak; extern crate rlp; extern crate regex; +extern crate lru_cache; #[macro_use] extern crate heapsize; @@ -143,6 +144,7 @@ pub mod semantic_version; pub mod log; pub mod path; pub mod snappy; +pub mod cache; mod timer; pub use common::*;