cache manager and clearing tracing cache (#1769)

* removed configure_cache method * generic cache_manager struct * fixed #1743, tracing caches are cleared * removed deadlocks in garbage_collect, implemented HeapSizeOf for traces * trace cache config * fixed carbage typo
2016-07-31 00:19:27 +02:00 · 2016-07-31 00:19:27 +02:00 · bcf8cd6dc0
commit bcf8cd6dc0
parent b29329c3c5
9 changed files with 193 additions and 92 deletions
--- a/ethcore/src/blockchain/blockchain.rs
+++ b/ethcore/src/blockchain/blockchain.rs
@ -16,7 +16,6 @@

 //! Blockchain database.

-use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrder};
 use bloomchain as bc;
 use util::*;
 use header::*;
@ -32,6 +31,7 @@ use blockchain::update::ExtrasUpdate;
 use blockchain::{CacheSize, ImportRoute, Config};
 use db::{Writable, Readable, CacheUpdatePolicy};
 use client::{DB_COL_EXTRA, DB_COL_HEADERS, DB_COL_BODIES};
+use cache_manager::CacheManager;

 const LOG_BLOOMS_LEVELS: usize = 3;
 const LOG_BLOOMS_ELEMENTS_PER_INDEX: usize = 16;
@ -130,11 +130,6 @@ enum CacheID {
 	BlockReceipts(H256),
 }

-struct CacheManager {
-	cache_usage: VecDeque<HashSet<CacheID>>,
-	in_use: HashSet<CacheID>,
-}
-
 impl bc::group::BloomGroupDatabase for BlockChain {
 	fn blooms_at(&self, position: &bc::group::GroupPosition) -> Option<bc::group::BloomGroup> {
 		let position = LogGroupPosition::from(position.clone());
@ -148,8 +143,6 @@ impl bc::group::BloomGroupDatabase for BlockChain {
 /// **Does not do input data verification.**
 pub struct BlockChain {
 	// All locks must be captured in the order declared here.
-	pref_cache_size: AtomicUsize,
-	max_cache_size: AtomicUsize,
 	blooms_config: bc::Config,

 	best_block: RwLock<BestBlock>,
@ -167,7 +160,7 @@ pub struct BlockChain {

 	db: Arc<Database>,

-	cache_man: RwLock<CacheManager>,
+	cache_man: RwLock<CacheManager<CacheID>>,
 }

 impl BlockProvider for BlockChain {
@ -297,8 +290,6 @@ impl BlockProvider for BlockChain {
 	}
 }

-const COLLECTION_QUEUE_SIZE: usize = 8;
-
 pub struct AncestryIter<'a> {
 	current: H256,
 	chain: &'a BlockChain,
@ -320,12 +311,10 @@ impl<'a> Iterator for AncestryIter<'a> {
 impl BlockChain {
 	/// Create new instance of blockchain from given Genesis
 	pub fn new(config: Config, genesis: &[u8], db: Arc<Database>) -> BlockChain {
-		let mut cache_man = CacheManager{cache_usage: VecDeque::new(), in_use: HashSet::new()};
-		(0..COLLECTION_QUEUE_SIZE).foreach(|_| cache_man.cache_usage.push_back(HashSet::new()));
+		// 400 is the avarage size of the key
+		let cache_man = CacheManager::new(config.pref_cache_size, config.max_cache_size, 400);

 		let bc = BlockChain {
-			pref_cache_size: AtomicUsize::new(config.pref_cache_size),
-			max_cache_size: AtomicUsize::new(config.max_cache_size),
 			blooms_config: bc::Config {
 				levels: LOG_BLOOMS_LEVELS,
 				elements_per_index: LOG_BLOOMS_ELEMENTS_PER_INDEX,
@ -449,12 +438,6 @@ impl BlockChain {
 		None
 	}

-	/// Set the cache configuration.
-	pub fn configure_cache(&self, pref_cache_size: usize, max_cache_size: usize) {
-		self.pref_cache_size.store(pref_cache_size, AtomicOrder::Relaxed);
-		self.max_cache_size.store(max_cache_size, AtomicOrder::Relaxed);
-	}
-
 	/// Returns a tree route between `from` and `to`, which is a tuple of:
 	///
 	/// - a vector of hashes of all blocks, ordered from `from` to `to`.
@ -874,74 +857,40 @@ impl BlockChain {
 	/// Let the cache system know that a cacheable item has been used.
 	fn note_used(&self, id: CacheID) {
 		let mut cache_man = self.cache_man.write();
-		if !cache_man.cache_usage[0].contains(&id) {
-			cache_man.cache_usage[0].insert(id.clone());
-			if cache_man.in_use.contains(&id) {
-				if let Some(c) = cache_man.cache_usage.iter_mut().skip(1).find(|e|e.contains(&id)) {
-					c.remove(&id);
-				}
-			} else {
-				cache_man.in_use.insert(id);
-			}
-		}
+		cache_man.note_used(id);
 	}

 	/// Ticks our cache system and throws out any old data.
 	pub fn collect_garbage(&self) {
-		if self.cache_size().total() < self.pref_cache_size.load(AtomicOrder::Relaxed) {
-			// rotate cache
-			let mut cache_man = self.cache_man.write();
-			const AVERAGE_BYTES_PER_CACHE_ENTRY: usize = 400; //estimated
-			if cache_man.cache_usage[0].len() > self.pref_cache_size.load(AtomicOrder::Relaxed) / COLLECTION_QUEUE_SIZE / AVERAGE_BYTES_PER_CACHE_ENTRY {
-				trace!("Cache rotation, cache_size = {}", self.cache_size().total());
-				let cache = cache_man.cache_usage.pop_back().unwrap();
-				cache_man.cache_usage.push_front(cache);
-			}
-			return;
-		}
+		let mut cache_man = self.cache_man.write();
+		cache_man.collect_garbage(|| self.cache_size().total(), | ids | {
+			let mut block_headers = self.block_headers.write();
+			let mut block_bodies = self.block_bodies.write();
+			let mut block_details = self.block_details.write();
+			let mut block_hashes = self.block_hashes.write();
+			let mut transaction_addresses = self.transaction_addresses.write();
+			let mut blocks_blooms = self.blocks_blooms.write();
+			let mut block_receipts = self.block_receipts.write();

-		for i in 0..COLLECTION_QUEUE_SIZE {
-			{
-				trace!("Cache cleanup round started {}, cache_size = {}", i, self.cache_size().total());
-				let mut block_headers = self.block_headers.write();
-				let mut block_bodies = self.block_bodies.write();
-				let mut block_details = self.block_details.write();
-				let mut block_hashes = self.block_hashes.write();
-				let mut transaction_addresses = self.transaction_addresses.write();
-				let mut blocks_blooms = self.blocks_blooms.write();
-				let mut block_receipts = self.block_receipts.write();
-				let mut cache_man = self.cache_man.write();
-
-				for id in cache_man.cache_usage.pop_back().unwrap().into_iter() {
-					cache_man.in_use.remove(&id);
-					match id {
-						CacheID::BlockHeader(h) => { block_headers.remove(&h); },
-						CacheID::BlockBody(h) => { block_bodies.remove(&h); },
-						CacheID::BlockDetails(h) => { block_details.remove(&h); }
-						CacheID::BlockHashes(h) => { block_hashes.remove(&h); }
-						CacheID::TransactionAddresses(h) => { transaction_addresses.remove(&h); }
-						CacheID::BlocksBlooms(h) => { blocks_blooms.remove(&h); }
-						CacheID::BlockReceipts(h) => { block_receipts.remove(&h); }
-					}
+			for id in &ids {
+				match *id {
+					CacheID::BlockHeader(ref h) => { block_headers.remove(h); },
+					CacheID::BlockBody(ref h) => { block_bodies.remove(h); },
+					CacheID::BlockDetails(ref h) => { block_details.remove(h); }
+					CacheID::BlockHashes(ref h) => { block_hashes.remove(h); }
+					CacheID::TransactionAddresses(ref h) => { transaction_addresses.remove(h); }
+					CacheID::BlocksBlooms(ref h) => { blocks_blooms.remove(h); }
+					CacheID::BlockReceipts(ref h) => { block_receipts.remove(h); }
 				}
-				cache_man.cache_usage.push_front(HashSet::new());
-
-				// TODO: handle block_hashes properly.
-				block_hashes.clear();
-
-				block_headers.shrink_to_fit();
-				block_bodies.shrink_to_fit();
-				block_details.shrink_to_fit();
- 				block_hashes.shrink_to_fit();
- 				transaction_addresses.shrink_to_fit();
- 				blocks_blooms.shrink_to_fit();
- 				block_receipts.shrink_to_fit();
 			}
-			trace!("Cache cleanup round complete {}, cache_size = {}", i, self.cache_size().total());
-			if self.cache_size().total() < self.max_cache_size.load(AtomicOrder::Relaxed) { break; }
-		}
-
-		// TODO: m_lastCollection = chrono::system_clock::now();
+			block_headers.shrink_to_fit();
+			block_bodies.shrink_to_fit();
+			block_details.shrink_to_fit();
+			block_hashes.shrink_to_fit();
+			transaction_addresses.shrink_to_fit();
+			blocks_blooms.shrink_to_fit();
+			block_receipts.shrink_to_fit();
+		});
 	}

 	/// Create a block body from a block.
--- a/ethcore/src/cache_manager.rs
+++ b/ethcore/src/cache_manager.rs
@ -0,0 +1,69 @@
+// Copyright 2015, 2016 Ethcore (UK) Ltd.
+// This file is part of Parity.
+
+// Parity is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Parity is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Parity.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::collections::{VecDeque, HashSet};
+use std::hash::Hash;
+
+const COLLECTION_QUEUE_SIZE: usize = 8;
+
+pub struct CacheManager<T> where T: Eq + Hash {
+	pref_cache_size: usize,
+	max_cache_size: usize,
+	bytes_per_cache_entry: usize,
+	cache_usage: VecDeque<HashSet<T>>
+}
+
+impl<T> CacheManager<T> where T: Eq + Hash {
+	pub fn new(pref_cache_size: usize, max_cache_size: usize, bytes_per_cache_entry: usize) -> Self {
+		CacheManager {
+			pref_cache_size: pref_cache_size,
+			max_cache_size: max_cache_size,
+			bytes_per_cache_entry: bytes_per_cache_entry,
+			cache_usage: (0..COLLECTION_QUEUE_SIZE).into_iter().map(|_| Default::default()).collect(),
+		}
+	}
+
+	pub fn note_used(&mut self, id: T) {
+		if !self.cache_usage[0].contains(&id) {
+			if let Some(c) = self.cache_usage.iter_mut().skip(1).find(|e| e.contains(&id)) {
+				c.remove(&id);
+			}
+			self.cache_usage[0].insert(id);
+		}
+	}
+
+	pub fn collect_garbage<C, F>(&mut self, current_size: C, mut notify_unused: F) where C: Fn() -> usize, F: FnMut(HashSet<T>) {
+		if current_size() < self.pref_cache_size {
+			self.rotate_cache_if_needed();
+			return;
+		}
+
+		for _ in 0..COLLECTION_QUEUE_SIZE {
+			notify_unused(self.cache_usage.pop_back().unwrap());
+			self.cache_usage.push_front(Default::default());
+			if current_size() < self.max_cache_size {
+				break;
+			}
+		}
+	}
+
+	fn rotate_cache_if_needed(&mut self) {
+		if self.cache_usage[0].len() * self.bytes_per_cache_entry > self.pref_cache_size / COLLECTION_QUEUE_SIZE {
+			let cache = self.cache_usage.pop_back().unwrap();
+			self.cache_usage.push_front(cache);
+		}
+	}
+}
--- a/ethcore/src/client/client.rs
+++ b/ethcore/src/client/client.rs
@ -549,6 +549,7 @@ impl Client {
 	pub fn tick(&self) {
 		self.chain.collect_garbage();
 		self.block_queue.collect_garbage();
+		self.tracedb.collect_garbage();

 		match self.mode {
 			Mode::Dark(timeout) => {
@ -582,11 +583,6 @@ impl Client {
 		}
 	}

-	/// Set up the cache behaviour.
-	pub fn configure_cache(&self, pref_cache_size: usize, max_cache_size: usize) {
-		self.chain.configure_cache(pref_cache_size, max_cache_size);
-	}
-
 	/// Look up the block number for the given block ID.
 	pub fn block_number(&self, id: BlockID) -> Option<BlockNumber> {
 		match id {
--- a/ethcore/src/lib.rs
+++ b/ethcore/src/lib.rs
@ -118,6 +118,7 @@ pub mod snapshot;
 pub mod action_params;
 #[macro_use] pub mod evm;

+mod cache_manager;
 mod blooms;
 mod db;
 mod common;
--- a/ethcore/src/trace/config.rs
+++ b/ethcore/src/trace/config.rs
@ -68,8 +68,10 @@ pub struct Config {
 	pub enabled: Switch,
 	/// Traces blooms configuration.
 	pub blooms: BloomConfig,
-	/// Database cache-size if not default
-	pub db_cache_size: Option<usize>,
+	/// Preferef cache-size.
+	pub pref_cache_size: usize,
+	/// Max cache-size.
+	pub max_cache_size: usize,
 }

 impl Default for Config {
@ -80,7 +82,8 @@ impl Default for Config {
 				levels: 3,
 				elements_per_index: 16,
 			},
-			db_cache_size: None,
+			pref_cache_size: 15 * 1024 * 1024,
+			max_cache_size: 20 * 1024 * 1024,
 		}
 	}
 }
--- a/ethcore/src/trace/db.rs
+++ b/ethcore/src/trace/db.rs
@ -20,14 +20,14 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use bloomchain::{Number, Config as BloomConfig};
 use bloomchain::group::{BloomGroupDatabase, BloomGroupChain, GroupPosition, BloomGroup};
-use util::{H256, H264, Database, DBTransaction, RwLock};
+use util::{H256, H264, Database, DBTransaction, RwLock, HeapSizeOf};
 use header::BlockNumber;
 use trace::{LocalizedTrace, Config, Switch, Filter, Database as TraceDatabase, ImportRequest, DatabaseExtras, Error};
 use db::{Key, Writable, Readable, CacheUpdatePolicy};
 use blooms;
 use super::flat::{FlatTrace, FlatBlockTraces, FlatTransactionTraces};
 use client::DB_COL_TRACE;
-
+use cache_manager::CacheManager;

 const TRACE_DB_VER: &'static [u8] = b"1.0";

@ -62,6 +62,12 @@ impl From<GroupPosition> for TraceGroupPosition {
 	}
 }

+impl HeapSizeOf for TraceGroupPosition {
+	fn heap_size_of_children(&self) -> usize {
+		0
+	}
+}
+
 /// Helper data structure created cause [u8; 6] does not implement Deref to &[u8].
 pub struct TraceGroupKey([u8; 6]);

@ -88,11 +94,18 @@ impl Key<blooms::BloomGroup> for TraceGroupPosition {
 	}
 }

+#[derive(Debug, Hash, Eq, PartialEq)]
+enum CacheID {
+	Trace(H256),
+	Bloom(TraceGroupPosition),
+}
+
 /// Trace database.
 pub struct TraceDB<T> where T: DatabaseExtras {
 	// cache
 	traces: RwLock<HashMap<H256, FlatBlockTraces>>,
 	blooms: RwLock<HashMap<TraceGroupPosition, blooms::BloomGroup>>,
+	cache_manager: RwLock<CacheManager<CacheID>>,
 	// db
 	tracesdb: Arc<Database>,
 	// config,
@ -106,6 +119,7 @@ pub struct TraceDB<T> where T: DatabaseExtras {
 impl<T> BloomGroupDatabase for TraceDB<T> where T: DatabaseExtras {
 	fn blooms_at(&self, position: &GroupPosition) -> Option<BloomGroup> {
 		let position = TraceGroupPosition::from(position.clone());
+		self.note_used(CacheID::Bloom(position.clone()));
 		self.tracesdb.read_with_cache(DB_COL_TRACE, &self.blooms, &position).map(Into::into)
 	}
 }
@ -136,6 +150,7 @@ impl<T> TraceDB<T> where T: DatabaseExtras {
 		let db = TraceDB {
 			traces: RwLock::new(HashMap::new()),
 			blooms: RwLock::new(HashMap::new()),
+			cache_manager: RwLock::new(CacheManager::new(config.pref_cache_size, config.max_cache_size, 10 * 1024)),
 			tracesdb: tracesdb,
 			bloom_config: config.blooms,
 			enabled: enabled,
@ -145,8 +160,39 @@ impl<T> TraceDB<T> where T: DatabaseExtras {
 		Ok(db)
 	}

+	fn cache_size(&self) -> usize {
+		let traces = self.traces.read().heap_size_of_children();
+		let blooms = self.blooms.read().heap_size_of_children();
+		traces + blooms
+	}
+
+	/// Let the cache system know that a cacheable item has been used.
+	fn note_used(&self, id: CacheID) {
+		let mut cache_manager = self.cache_manager.write();
+		cache_manager.note_used(id);
+	}
+
+	/// Ticks our cache system and throws out any old data.
+	pub fn collect_garbage(&self) {
+		let mut cache_manager = self.cache_manager.write();
+		cache_manager.collect_garbage(|| self.cache_size(), | ids | {
+			let mut traces = self.traces.write();
+			let mut blooms = self.blooms.write();
+
+			for id in &ids {
+				match *id {
+					CacheID::Trace(ref h) => { traces.remove(h); },
+					CacheID::Bloom(ref h) => { blooms.remove(h); },
+				}
+			}
+			traces.shrink_to_fit();
+			blooms.shrink_to_fit();
+		});
+	}
+
 	/// Returns traces for block with hash.
 	fn traces(&self, block_hash: &H256) -> Option<FlatBlockTraces> {
+		self.note_used(CacheID::Trace(block_hash.clone()));
 		self.tracesdb.read_with_cache(DB_COL_TRACE, &self.traces, block_hash)
 	}

@ -221,6 +267,7 @@ impl<T> TraceDatabase for TraceDB<T> where T: DatabaseExtras {
 			let mut traces = self.traces.write();
 			// it's important to use overwrite here,
 			// cause this value might be queried by hash later
+			self.note_used(CacheID::Trace(request.block_hash.clone()));
 			batch.write_with_cache(DB_COL_TRACE, traces.deref_mut(), request.block_hash, request.traces, CacheUpdatePolicy::Overwrite);
 		}

@ -247,6 +294,9 @@ impl<T> TraceDatabase for TraceDB<T> where T: DatabaseExtras {
 				.collect::<HashMap<TraceGroupPosition, blooms::BloomGroup>>();

 			let mut blooms = self.blooms.write();
+			for key in blooms_to_insert.keys() {
+				self.note_used(CacheID::Bloom(key.clone()));
+			}
 			batch.extend_with_cache(DB_COL_TRACE, blooms.deref_mut(), blooms_to_insert, CacheUpdatePolicy::Remove);
 		}
 	}
--- a/ethcore/src/types/trace_types/flat.rs
+++ b/ethcore/src/types/trace_types/flat.rs
@ -20,6 +20,7 @@ use std::collections::VecDeque;
 use std::mem;
 use ipc::binary::BinaryConvertError;
 use util::rlp::*;
+use util::HeapSizeOf;
 use basic_types::LogBloom;
 use super::trace::{Action, Res};

@ -47,6 +48,12 @@ impl FlatTrace {
 	}
 }

+impl HeapSizeOf for FlatTrace {
+	fn heap_size_of_children(&self) -> usize {
+		self.trace_address.heap_size_of_children()
+	}
+}
+
 impl Encodable for FlatTrace {
 	fn rlp_append(&self, s: &mut RlpStream) {
 		s.begin_list(4);
@ -82,6 +89,12 @@ impl From<Vec<FlatTrace>> for FlatTransactionTraces {
 	}
 }

+impl HeapSizeOf for FlatTransactionTraces {
+	fn heap_size_of_children(&self) -> usize {
+		self.0.heap_size_of_children()
+	}
+}
+
 impl FlatTransactionTraces {
 	/// Returns bloom of all traces in the collection.
 	pub fn bloom(&self) -> LogBloom {
@ -111,6 +124,12 @@ impl Into<Vec<FlatTrace>> for FlatTransactionTraces {
 #[derive(Debug, PartialEq, Clone)]
 pub struct FlatBlockTraces(Vec<FlatTransactionTraces>);

+impl HeapSizeOf for FlatBlockTraces {
+	fn heap_size_of_children(&self) -> usize {
+		self.0.heap_size_of_children()
+	}
+}
+
 impl From<Vec<FlatTransactionTraces>> for FlatBlockTraces {
 	fn from(v: Vec<FlatTransactionTraces>) -> Self {
 		FlatBlockTraces(v)
--- a/parity/cache.rs
+++ b/parity/cache.rs
@ -20,6 +20,7 @@ const MIN_BC_CACHE_MB: u32 = 4;
 const MIN_DB_CACHE_MB: u32 = 2;
 const MIN_BLOCK_QUEUE_SIZE_LIMIT_MB: u32 = 16;
 const DEFAULT_BLOCK_QUEUE_SIZE_LIMIT_MB: u32 = 50;
+const DEFAULT_TRACE_CACHE_SIZE: u32 = 20;

 /// Configuration for application cache sizes.
 /// All	values are represented in MB.
@ -34,6 +35,8 @@ pub struct CacheConfig {
 	blockchain: u32,
 	/// Size of transaction queue cache.
 	queue: u32,
+	/// Size of traces cache.
+	traces: u32,
 }

 impl Default for CacheConfig {
@ -49,6 +52,7 @@ impl CacheConfig {
 			db: total * 7 / 8,
 			blockchain: total / 8,
 			queue: DEFAULT_BLOCK_QUEUE_SIZE_LIMIT_MB,
+			traces: DEFAULT_TRACE_CACHE_SIZE,
 		}
 	}

@ -58,6 +62,7 @@ impl CacheConfig {
 			db: db,
 			blockchain: blockchain,
 			queue: queue,
+			traces: DEFAULT_TRACE_CACHE_SIZE,
 		}
 	}

@ -80,6 +85,11 @@ impl CacheConfig {
 	pub fn blockchain(&self) -> u32 {
 		max(self.blockchain, MIN_BC_CACHE_MB)
 	}
+
+	/// Size of the traces cache.
+	pub fn traces(&self) -> u32 {
+		self.traces
+	}
 }

 #[cfg(test)]
--- a/parity/helpers.rs
+++ b/parity/helpers.rs
@ -212,6 +212,10 @@ pub fn to_client_config(
 	client_config.db_cache_size = Some(cache_config.db_state_cache_size() as usize);
 	// db queue cache size, in bytes
 	client_config.queue.max_mem_use = cache_config.queue() as usize * mb;
+	// in bytes
+	client_config.tracing.max_cache_size = cache_config.traces() as usize * mb;
+	// in bytes
+	client_config.tracing.pref_cache_size = cache_config.traces() as usize * 3 / 4 * mb;

 	client_config.mode = mode;
 	client_config.tracing.enabled = tracing;