Shared code cache (#2921)

* add a global code cache * extract memory-limited lru cache to util * use memory-limited code cache * account for code cache size in mem_used
2016-10-28 16:04:44 +02:00 · 2016-10-28 16:04:44 +02:00 · 29ab4ecac1
commit 29ab4ecac1
parent 956a059a06
9 changed files with 180 additions and 61 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -298,7 +298,7 @@ dependencies = [
 "hyper 0.9.4 (git+https://github.com/ethcore/hyper)",
 "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "lru-cache 0.0.7 (git+https://github.com/contain-rs/lru-cache)",
+ "lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "num_cpus 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
 "rayon 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -555,6 +555,7 @@ dependencies = [
 "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "parking_lot 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 0.1.68 (registry+https://github.com/rust-lang/crates.io-index)",
@ -907,8 +908,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "lru-cache"
-version = "0.0.7"
-source = "git+https://github.com/contain-rs/lru-cache#13255e33c45ceb69a4b143f235a4322df5fb580e"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "linked-hash-map 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@ -1987,7 +1988,7 @@ dependencies = [
 "checksum linked-hash-map 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bda158e0dabeb97ee8a401f4d17e479d6b891a14de0bba79d5cc2d4d325b5e48"
 "checksum linked-hash-map 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6d262045c5b87c0861b3f004610afd0e2c851e2908d08b6c870cbb9d5f494ecd"
 "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
-"checksum lru-cache 0.0.7 (git+https://github.com/contain-rs/lru-cache)" = "<none>"
+"checksum lru-cache 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "656fa4dfcb02bcf1063c592ba3ff6a5303ee1f2afe98c8a889e8b1a77c6dfdb7"
 "checksum matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15305656809ce5a4805b1ff2946892810992197ce1270ff79baded852187942e"
 "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
 "checksum mime 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a74cc2587bf97c49f3f5bab62860d6abf3902ca73b66b51d9b049fbdcd727bd2"
--- a/ethcore/Cargo.toml
+++ b/ethcore/Cargo.toml
@ -27,7 +27,6 @@ time = "0.1"
 rand = "0.3"
 byteorder = "0.5"
 transient-hashmap = "0.1"
-lru-cache = { git = "https://github.com/contain-rs/lru-cache" }
 evmjit = { path = "../evmjit", optional = true }
 clippy = { version = "0.0.96", optional = true}
 ethash = { path = "../ethash" }
@ -40,6 +39,7 @@ ethstore = { path = "../ethstore" }
 ethkey = { path = "../ethkey" }
 ethcore-ipc-nano = { path = "../ipc/nano" }
 rlp = { path = "../util/rlp" }
+lru-cache = "0.1.0"
 ethcore-bloom-journal = { path = "../util/bloom" }

 [dependencies.hyper]
--- a/ethcore/src/evm/interpreter/shared_cache.rs
+++ b/ethcore/src/evm/interpreter/shared_cache.rs
@ -15,20 +15,27 @@
 // along with Parity.  If not, see <http://www.gnu.org/licenses/>.

 use std::sync::Arc;
-use lru_cache::LruCache;
-use util::{H256, Mutex};
+use util::{H256, HeapSizeOf, Mutex};
 use util::sha3::*;
+use util::cache::MemoryLruCache;
 use bit_set::BitSet;
 use super::super::instructions;

-const INITIAL_CAPACITY: usize = 32;
 const DEFAULT_CACHE_SIZE: usize = 4 * 1024 * 1024;

+// stub for a HeapSizeOf implementation.
+struct Bits(Arc<BitSet>);
+
+impl HeapSizeOf for Bits {
+	fn heap_size_of_children(&self) -> usize {
+		// dealing in bits here
+		self.0.capacity() * 8
+	}
+}
+
 /// Global cache for EVM interpreter
 pub struct SharedCache {
-	jump_destinations: Mutex<LruCache<H256, Arc<BitSet>>>,
-	max_size: usize,
-	cur_size: Mutex<usize>,
+	jump_destinations: Mutex<MemoryLruCache<H256, Bits>>,
 }

 impl SharedCache {
@ -36,9 +43,7 @@ impl SharedCache {
 	/// to cache.
 	pub fn new(max_size: usize) -> Self {
 		SharedCache {
-			jump_destinations: Mutex::new(LruCache::new(INITIAL_CAPACITY)),
-			max_size: max_size * 8, // dealing with bits here.
-			cur_size: Mutex::new(0),
+			jump_destinations: Mutex::new(MemoryLruCache::new(max_size)),
 		}
 	}

@ -49,37 +54,11 @@ impl SharedCache {
 		}

 		if let Some(d) = self.jump_destinations.lock().get_mut(code_hash) {
-			return d.clone();
+			return d.0.clone();
 		}

 		let d = Self::find_jump_destinations(code);
-
-		{
-			let mut cur_size = self.cur_size.lock();
-			*cur_size += d.capacity();
-
-			let mut jump_dests = self.jump_destinations.lock();
-			let cap = jump_dests.capacity();
-
-			// grow the cache as necessary; it operates on amount of items
-			// but we're working based on memory usage.
-			if jump_dests.len() == cap && *cur_size < self.max_size {
-				jump_dests.set_capacity(cap * 2);
-			}
-
-			// account for any element displaced from the cache.
-			if let Some(lru) = jump_dests.insert(code_hash.clone(), d.clone()) {
-				*cur_size -= lru.capacity();
-			}
-
-			// remove elements until we are below the memory target.
-			while *cur_size > self.max_size {
-				match jump_dests.remove_lru() {
-					Some((_, v)) => *cur_size -= v.capacity(),
-					_ => break,
-				}
-			}
-		}
+		self.jump_destinations.lock().insert(code_hash.clone(), Bits(d.clone()));

 		d
 	}
--- a/ethcore/src/state/account.rs
+++ b/ethcore/src/state/account.rs
@ -247,23 +247,34 @@ impl Account {
 	}

 	/// Provide a database to get `code_hash`. Should not be called if it is a contract without code.
-	pub fn cache_code(&mut self, db: &HashDB) -> bool {
+	pub fn cache_code(&mut self, db: &HashDB) -> Option<Arc<Bytes>> {
 		// TODO: fill out self.code_cache;
 		trace!("Account::cache_code: ic={}; self.code_hash={:?}, self.code_cache={}", self.is_cached(), self.code_hash, self.code_cache.pretty());
-		self.is_cached() ||
+
+		if self.is_cached() { return Some(self.code_cache.clone()) }
+
 		match db.get(&self.code_hash) {
 			Some(x) => {
 				self.code_size = Some(x.len());
 				self.code_cache = Arc::new(x.to_vec());
-				true
+				Some(self.code_cache.clone())
 			},
 			_ => {
 				warn!("Failed reverse get of {}", self.code_hash);
-				false
+				None
 			},
 		}
 	}

+	/// Provide code to cache. For correctness, should be the correct code for the
+	/// account.
+	pub fn cache_given_code(&mut self, code: Arc<Bytes>) {
+		trace!("Account::cache_given_code: ic={}; self.code_hash={:?}, self.code_cache={}", self.is_cached(), self.code_hash, self.code_cache.pretty());
+
+		self.code_size = Some(code.len());
+		self.code_cache = code;
+	}
+
 	/// Provide a database to get `code_size`. Should not be called if it is a contract without code.
 	pub fn cache_code_size(&mut self, db: &HashDB) -> bool {
 		// TODO: fill out self.code_cache;
@ -476,7 +487,7 @@ mod tests {
 		};

 		let mut a = Account::from_rlp(&rlp);
-		assert!(a.cache_code(&db.immutable()));
+		assert!(a.cache_code(&db.immutable()).is_some());

 		let mut a = Account::from_rlp(&rlp);
 		assert_eq!(a.note_code(vec![0x55, 0x44, 0xffu8]), Ok(()));
--- a/ethcore/src/state/mod.rs
+++ b/ethcore/src/state/mod.rs
@ -599,14 +599,30 @@ impl State {
 		pod_state::diff_pod(&state_pre.to_pod(), &pod_state_post)
 	}

-	fn update_account_cache(require: RequireCache, account: &mut Account, db: &HashDB) {
-		match require {
-			RequireCache::None => {},
-			RequireCache::Code => {
-				account.cache_code(db);
-			}
-			RequireCache::CodeSize => {
-				account.cache_code_size(db);
+	// load required account data from the databases.
+	fn update_account_cache(require: RequireCache, account: &mut Account, state_db: &StateDB, db: &HashDB) {
+		match (account.is_cached(), require) {
+			(true, _) | (false, RequireCache::None) => {}
+			(false, require) => {
+				// if there's already code in the global cache, always cache it
+				// locally.
+				let hash = account.code_hash();
+				match state_db.get_cached_code(&hash) {
+					Some(code) => account.cache_given_code(code),
+					None => match require {
+						RequireCache::None => {},
+						RequireCache::Code => {
+							if let Some(code) = account.cache_code(db) {
+								// propagate code loaded from the database to
+								// the global code cache.
+								state_db.cache_code(hash, code)
+							}
+						}
+						RequireCache::CodeSize => {
+							account.cache_code_size(db);
+						}
+					}
+				}
 			}
 		}
 	}
@ -620,7 +636,7 @@ impl State {
 		if let Some(ref mut maybe_acc) = self.cache.borrow_mut().get_mut(a) {
 			if let Some(ref mut account) = maybe_acc.account {
 				let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a));
-				Self::update_account_cache(require, account, accountdb.as_hashdb());
+				Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb());
 				return f(Some(account));
 			}
 			return f(None);
@ -629,7 +645,7 @@ impl State {
 		let result = self.db.get_cached(a, |mut acc| {
 			if let Some(ref mut account) = acc {
 				let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a));
-				Self::update_account_cache(require, account, accountdb.as_hashdb());
+				Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb());
 			}
 			f(acc.map(|a| &*a))
 		});
@ -647,7 +663,7 @@ impl State {
 				};
 				if let Some(ref mut account) = maybe_acc.as_mut() {
 					let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), account.address_hash(a));
-					Self::update_account_cache(require, account, accountdb.as_hashdb());
+					Self::update_account_cache(require, account, &self.db, accountdb.as_hashdb());
 				}
 				let r = f(maybe_acc.as_ref());
 				self.insert_cache(a, AccountEntry::new_clean(maybe_acc));
@ -703,7 +719,7 @@ impl State {
 					if require_code {
 						let addr_hash = account.address_hash(a);
 						let accountdb = self.factories.accountdb.readonly(self.db.as_hashdb(), addr_hash);
-						account.cache_code(accountdb.as_hashdb());
+						Self::update_account_cache(RequireCache::Code, account, &self.db, accountdb.as_hashdb());
 					}
 					account
 				},
--- a/ethcore/src/state_db.rs
+++ b/ethcore/src/state_db.rs
@ -16,6 +16,7 @@

 use std::collections::{VecDeque, HashSet};
 use lru_cache::LruCache;
+use util::cache::MemoryLruCache;
 use util::journaldb::JournalDB;
 use util::hash::{H256};
 use util::hashdb::HashDB;
@ -33,12 +34,17 @@ pub const ACCOUNT_BLOOM_HASHCOUNT_KEY: &'static [u8] = b"account_hash_count";

 const STATE_CACHE_BLOCKS: usize = 12;

+// The percentage of supplied cache size to go to accounts.
+const ACCOUNT_CACHE_RATIO: usize = 90;
+
 /// Shared canonical state cache.
 struct AccountCache {
 	/// DB Account cache. `None` indicates that account is known to be missing.
 	// When changing the type of the values here, be sure to update `mem_used` and
 	// `new`.
 	accounts: LruCache<Address, Option<Account>>,
+	/// DB Code cache. Maps code hashes to shared bytes.
+	code: MemoryLruCache<H256, Arc<Vec<u8>>>,
 	/// Information on the modifications in recently committed blocks; specifically which addresses
 	/// changed in which block. Ordered by block number.
 	modifications: VecDeque<BlockChanges>,
@ -111,12 +117,15 @@ impl StateDB {
 	// into the `AccountCache` structure as its own `LruCache<(Address, H256), H256>`.
 	pub fn new(db: Box<JournalDB>, cache_size: usize) -> StateDB {
 		let bloom = Self::load_bloom(db.backing());
-		let cache_items = cache_size / ::std::mem::size_of::<Option<Account>>();
+		let acc_cache_size = cache_size * ACCOUNT_CACHE_RATIO / 100;
+		let code_cache_size = cache_size - acc_cache_size;
+		let cache_items = acc_cache_size / ::std::mem::size_of::<Option<Account>>();

 		StateDB {
 			db: db,
 			account_cache: Arc::new(Mutex::new(AccountCache {
 				accounts: LruCache::new(cache_items),
+				code: MemoryLruCache::new(code_cache_size),
 				modifications: VecDeque::new(),
 			})),
 			local_cache: Vec::new(),
@ -342,7 +351,12 @@ impl StateDB {
 	/// Heap size used.
 	pub fn mem_used(&self) -> usize {
 		// TODO: account for LRU-cache overhead; this is a close approximation.
-		self.db.mem_used() + self.account_cache.lock().accounts.len() * ::std::mem::size_of::<Option<Account>>()
+		self.db.mem_used() + {
+			let cache = self.account_cache.lock();
+
+			cache.code.current_size() +
+				cache.accounts.len() * ::std::mem::size_of::<Option<Account>>()
+		}
 	}

 	/// Returns underlying `JournalDB`.
@ -362,6 +376,15 @@ impl StateDB {
 		})
 	}

+	/// Add a global code cache entry. This doesn't need to worry about canonicality because
+	/// it simply maps hashes to raw code and will always be correct in the absence of
+	/// hash collisions.
+	pub fn cache_code(&self, hash: H256, code: Arc<Vec<u8>>) {
+		let mut cache = self.account_cache.lock();
+
+		cache.code.insert(hash, code);
+	}
+
 	/// Get basic copy of the cached account. Does not include storage.
 	/// Returns 'None' if cache is disabled or if the account is not cached.
 	pub fn get_cached_account(&self, addr: &Address) -> Option<Option<Account>> {
@ -372,6 +395,13 @@ impl StateDB {
 		cache.accounts.get_mut(addr).map(|a| a.as_ref().map(|a| a.clone_basic()))
 	}

+	/// Get cached code based on hash.
+	pub fn get_cached_code(&self, hash: &H256) -> Option<Arc<Vec<u8>>> {
+		let mut cache = self.account_cache.lock();
+
+		cache.code.get_mut(hash).map(|code| code.clone())
+	}
+
 	/// Get value from a cached account.
 	/// Returns 'None' if cache is disabled or if the account is not cached.
 	pub fn get_cached<F, U>(&self, a: &Address, f: F) -> Option<U>
--- a/util/Cargo.toml
+++ b/util/Cargo.toml
@ -36,6 +36,7 @@ ansi_term = "0.7"
 tiny-keccak= "1.0"
 ethcore-bloom-journal = { path = "bloom" }
 regex = "0.1"
+lru-cache = "0.1.0"

 [features]
 default = []
--- a/util/src/cache.rs
+++ b/util/src/cache.rs
@ -0,0 +1,79 @@
+// Copyright 2015, 2016 Ethcore (UK) Ltd.
+// This file is part of Parity.
+
+// Parity is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Parity is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Parity.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Lru-cache related utilities as quick-and-dirty wrappers around the lru-cache
+//! crate.
+// TODO: push changes upstream in a clean way.
+
+use heapsize::HeapSizeOf;
+use lru_cache::LruCache;
+
+use std::hash::Hash;
+
+const INITIAL_CAPACITY: usize = 4;
+
+/// An LRU-cache which operates on memory used.
+pub struct MemoryLruCache<K: Eq + Hash, V: HeapSizeOf> {
+	inner: LruCache<K, V>,
+	cur_size: usize,
+	max_size: usize,
+}
+
+impl<K: Eq + Hash, V: HeapSizeOf> MemoryLruCache<K, V> {
+	/// Create a new cache with a maximum size in bytes.
+	pub fn new(max_size: usize) -> Self {
+		MemoryLruCache {
+			inner: LruCache::new(INITIAL_CAPACITY),
+			max_size: max_size,
+			cur_size: 0,
+		}
+	}
+
+	/// Insert an item.
+	pub fn insert(&mut self, key: K, val: V) {
+		let cap = self.inner.capacity();
+
+		// grow the cache as necessary; it operates on amount of items
+		// but we're working based on memory usage.
+		if self.inner.len() == cap && self.cur_size < self.max_size {
+			self.inner.set_capacity(cap * 2);
+		}
+
+		// account for any element displaced from the cache.
+		if let Some(lru) = self.inner.insert(key, val) {
+			self.cur_size -= lru.heap_size_of_children();
+		}
+
+		// remove elements until we are below the memory target.
+		while self.cur_size > self.max_size {
+			match self.inner.remove_lru() {
+				Some((_, v)) => self.cur_size -= v.heap_size_of_children(),
+				_ => break,
+			}
+		}
+	}
+
+	/// Get a reference to an item in the cache. It is a logic error for its
+	/// heap size to be altered while borrowed.
+	pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
+		self.inner.get_mut(key)
+	}
+
+	/// Currently-used size of values in bytes.
+	pub fn current_size(&self) -> usize {
+		self.cur_size
+	}
+}
--- a/util/src/lib.rs
+++ b/util/src/lib.rs
@ -105,6 +105,7 @@ extern crate ansi_term;
 extern crate tiny_keccak;
 extern crate rlp;
 extern crate regex;
+extern crate lru_cache;

 #[macro_use]
 extern crate heapsize;
@ -143,6 +144,7 @@ pub mod semantic_version;
 pub mod log;
 pub mod path;
 pub mod snappy;
+pub mod cache;
 mod timer;

 pub use common::*;