Multithreaded snapshot creation (#9239)

* Add Progress to Snapshot Secondary chunks creation

* Use half of CPUs to multithread snapshot creation

* Use env var to define number of threads

* info to debug logs

* Add Snapshot threads as CLI option

* Randomize chunks per thread

* Remove randomness, add debugging

* Add warning

* Add tracing

* Use parity-common fix seek branch

* Fix log

* Fix tests

* Fix tests

* PR Grumbles

* PR Grumble II

* Update Cargo.lock

* PR Grumbles

* Default snapshot threads to half number of CPUs

* Fix default snapshot threads // min 1
This commit is contained in:
Nicolas Gotchac 2018-09-13 12:58:49 +02:00 committed by 5chdn
parent 552b381b37
commit 73e10cdbe0
No known key found for this signature in database
GPG Key ID: 1A40871B597F5F80
12 changed files with 452 additions and 336 deletions

619
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1153,7 +1153,8 @@ impl Client {
},
};
snapshot::take_snapshot(&*self.engine, &self.chain.read(), start_hash, db.as_hashdb(), writer, p)?;
let processing_threads = self.config.snapshot.processing_threads;
snapshot::take_snapshot(&*self.engine, &self.chain.read(), start_hash, db.as_hashdb(), writer, p, processing_threads)?;
Ok(())
}

View File

@ -19,6 +19,7 @@ use std::fmt::{Display, Formatter, Error as FmtError};
use verification::{VerifierType, QueueConfig};
use journaldb;
use snapshot::SnapshotConfiguration;
pub use std::time::Duration;
pub use blockchain::Config as BlockChainConfig;
@ -120,6 +121,8 @@ pub struct ClientConfig {
pub check_seal: bool,
/// Maximal number of transactions queued for verification in a separate thread.
pub transaction_verification_queue_size: usize,
/// Snapshot configuration
pub snapshot: SnapshotConfiguration,
}
impl Default for ClientConfig {
@ -144,6 +147,7 @@ impl Default for ClientConfig {
history_mem: 32 * mb,
check_seal: true,
transaction_verification_queue_size: 8192,
snapshot: Default::default(),
}
}
}

View File

@ -30,7 +30,7 @@ use machine::EthereumMachine;
use ids::BlockId;
use header::Header;
use receipt::Receipt;
use snapshot::{Error, ManifestData};
use snapshot::{Error, ManifestData, Progress};
use itertools::{Position, Itertools};
use rlp::{RlpStream, Rlp};
@ -59,6 +59,7 @@ impl SnapshotComponents for PoaSnapshot {
chain: &BlockChain,
block_at: H256,
sink: &mut ChunkSink,
_progress: &Progress,
preferred_size: usize,
) -> Result<(), Error> {
let number = chain.block_number(&block_at)

View File

@ -22,7 +22,7 @@ use std::sync::Arc;
use blockchain::{BlockChain, BlockChainDB};
use engines::EthEngine;
use snapshot::{Error, ManifestData};
use snapshot::{Error, ManifestData, Progress};
use ethereum_types::H256;
@ -49,6 +49,7 @@ pub trait SnapshotComponents: Send {
chain: &BlockChain,
block_at: H256,
chunk_sink: &mut ChunkSink,
progress: &Progress,
preferred_size: usize,
) -> Result<(), Error>;

View File

@ -28,7 +28,7 @@ use std::sync::Arc;
use blockchain::{BlockChain, BlockChainDB, BlockProvider};
use engines::EthEngine;
use snapshot::{Error, ManifestData};
use snapshot::{Error, ManifestData, Progress};
use snapshot::block::AbridgedBlock;
use ethereum_types::H256;
use kvdb::KeyValueDB;
@ -65,6 +65,7 @@ impl SnapshotComponents for PowSnapshot {
chain: &BlockChain,
block_at: H256,
chunk_sink: &mut ChunkSink,
progress: &Progress,
preferred_size: usize,
) -> Result<(), Error> {
PowWorker {
@ -72,6 +73,7 @@ impl SnapshotComponents for PowSnapshot {
rlps: VecDeque::new(),
current_hash: block_at,
writer: chunk_sink,
progress: progress,
preferred_size: preferred_size,
}.chunk_all(self.blocks)
}
@ -96,6 +98,7 @@ struct PowWorker<'a> {
rlps: VecDeque<Bytes>,
current_hash: H256,
writer: &'a mut ChunkSink<'a>,
progress: &'a Progress,
preferred_size: usize,
}
@ -138,6 +141,7 @@ impl<'a> PowWorker<'a> {
last = self.current_hash;
self.current_hash = block.header_view().parent_hash();
self.progress.blocks.fetch_add(1, Ordering::SeqCst);
}
if loaded_size != 0 {

View File

@ -20,6 +20,7 @@
//! https://wiki.parity.io/Warp-Sync-Snapshot-Format
use std::collections::{HashMap, HashSet};
use std::cmp;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use hash::{keccak, KECCAK_NULL_RLP, KECCAK_EMPTY};
@ -43,6 +44,7 @@ use trie::{Trie, TrieMut};
use ethtrie::{TrieDB, TrieDBMut};
use rlp::{RlpStream, Rlp};
use bloom_journal::Bloom;
use num_cpus;
use self::io::SnapshotWriter;
@ -88,6 +90,28 @@ const MAX_CHUNK_SIZE: usize = PREFERRED_CHUNK_SIZE / 4 * 5;
const MIN_SUPPORTED_STATE_CHUNK_VERSION: u64 = 1;
// current state chunk version.
const STATE_CHUNK_VERSION: u64 = 2;
/// number of snapshot subparts, must be a power of 2 in [1; 256]
const SNAPSHOT_SUBPARTS: usize = 16;
/// Maximum number of snapshot subparts (must be a multiple of `SNAPSHOT_SUBPARTS`)
const MAX_SNAPSHOT_SUBPARTS: usize = 256;
/// Configuration for the Snapshot service
#[derive(Debug, Clone, PartialEq)]
pub struct SnapshotConfiguration {
/// If `true`, no periodic snapshots will be created
pub no_periodic: bool,
/// Number of threads for creating snapshots
pub processing_threads: usize,
}
impl Default for SnapshotConfiguration {
fn default() -> Self {
SnapshotConfiguration {
no_periodic: false,
processing_threads: ::std::cmp::max(1, num_cpus::get() / 2),
}
}
}
/// A progress indicator for snapshots.
#[derive(Debug, Default)]
@ -130,7 +154,8 @@ pub fn take_snapshot<W: SnapshotWriter + Send>(
block_at: H256,
state_db: &HashDB<KeccakHasher>,
writer: W,
p: &Progress
p: &Progress,
processing_threads: usize,
) -> Result<(), Error> {
let start_header = chain.block_header_data(&block_at)
.ok_or(Error::InvalidStartingBlock(BlockId::Hash(block_at)))?;
@ -142,17 +167,45 @@ pub fn take_snapshot<W: SnapshotWriter + Send>(
let writer = Mutex::new(writer);
let chunker = engine.snapshot_components().ok_or(Error::SnapshotsUnsupported)?;
let snapshot_version = chunker.current_version();
let (state_hashes, block_hashes) = scope(|scope| {
let (state_hashes, block_hashes) = scope(|scope| -> Result<(Vec<H256>, Vec<H256>), Error> {
let writer = &writer;
let block_guard = scope.spawn(move || chunk_secondary(chunker, chain, block_at, writer, p));
let state_res = chunk_state(state_db, &state_root, writer, p);
state_res.and_then(|state_hashes| {
block_guard.join().map(|block_hashes| (state_hashes, block_hashes))
})
// The number of threads must be between 1 and SNAPSHOT_SUBPARTS
assert!(processing_threads >= 1, "Cannot use less than 1 threads for creating snapshots");
let num_threads: usize = cmp::min(processing_threads, SNAPSHOT_SUBPARTS);
info!(target: "snapshot", "Using {} threads for Snapshot creation.", num_threads);
let mut state_guards = Vec::with_capacity(num_threads as usize);
for thread_idx in 0..num_threads {
let state_guard = scope.spawn(move || -> Result<Vec<H256>, Error> {
let mut chunk_hashes = Vec::new();
for part in (thread_idx..SNAPSHOT_SUBPARTS).step_by(num_threads) {
debug!(target: "snapshot", "Chunking part {} in thread {}", part, thread_idx);
let mut hashes = chunk_state(state_db, &state_root, writer, p, Some(part))?;
chunk_hashes.append(&mut hashes);
}
Ok(chunk_hashes)
});
state_guards.push(state_guard);
}
let block_hashes = block_guard.join()?;
let mut state_hashes = Vec::new();
for guard in state_guards {
let part_state_hashes = guard.join()?;
state_hashes.extend(part_state_hashes);
}
debug!(target: "snapshot", "Took a snapshot of {} accounts", p.accounts.load(Ordering::SeqCst));
Ok((state_hashes, block_hashes))
})?;
info!("produced {} state chunks and {} block chunks.", state_hashes.len(), block_hashes.len());
info!(target: "snapshot", "produced {} state chunks and {} block chunks.", state_hashes.len(), block_hashes.len());
let manifest_data = ManifestData {
version: snapshot_version,
@ -200,6 +253,7 @@ pub fn chunk_secondary<'a>(mut chunker: Box<SnapshotComponents>, chain: &'a Bloc
chain,
start_hash,
&mut chunk_sink,
progress,
PREFERRED_CHUNK_SIZE,
)?;
}
@ -263,10 +317,12 @@ impl<'a> StateChunker<'a> {
/// Walk the given state database starting from the given root,
/// creating chunks and writing them out.
/// `part` is a number between 0 and 15, which describe which part of
/// the tree should be chunked.
///
/// Returns a list of hashes of chunks created, or any error it may
/// have encountered.
pub fn chunk_state<'a>(db: &HashDB<KeccakHasher>, root: &H256, writer: &Mutex<SnapshotWriter + 'a>, progress: &'a Progress) -> Result<Vec<H256>, Error> {
pub fn chunk_state<'a>(db: &HashDB<KeccakHasher>, root: &H256, writer: &Mutex<SnapshotWriter + 'a>, progress: &'a Progress, part: Option<usize>) -> Result<Vec<H256>, Error> {
let account_trie = TrieDB::new(db, &root)?;
let mut chunker = StateChunker {
@ -281,11 +337,33 @@ pub fn chunk_state<'a>(db: &HashDB<KeccakHasher>, root: &H256, writer: &Mutex<Sn
let mut used_code = HashSet::new();
// account_key here is the address' hash.
for item in account_trie.iter()? {
let mut account_iter = account_trie.iter()?;
let mut seek_to = None;
if let Some(part) = part {
assert!(part < 16, "Wrong chunk state part number (must be <16) in snapshot creation.");
let part_offset = MAX_SNAPSHOT_SUBPARTS / SNAPSHOT_SUBPARTS;
let mut seek_from = vec![0; 32];
seek_from[0] = (part * part_offset) as u8;
account_iter.seek(&seek_from)?;
// Set the upper-bound, except for the last part
if part < SNAPSHOT_SUBPARTS - 1 {
seek_to = Some(((part + 1) * part_offset) as u8)
}
}
for item in account_iter {
let (account_key, account_data) = item?;
let account = ::rlp::decode(&*account_data)?;
let account_key_hash = H256::from_slice(&account_key);
if seek_to.map_or(false, |seek_to| account_key[0] >= seek_to) {
break;
}
let account = ::rlp::decode(&*account_data)?;
let account_db = AccountDB::from_hash(db, account_key_hash);
let fat_rlps = account::to_fat_rlps(&account_key_hash, &account, &account_db, &mut used_code, PREFERRED_CHUNK_SIZE - chunker.chunk_size(), PREFERRED_CHUNK_SIZE)?;

View File

@ -22,7 +22,7 @@ use hash::{KECCAK_NULL_RLP, keccak};
use basic_account::BasicAccount;
use snapshot::account;
use snapshot::{chunk_state, Error as SnapshotError, Progress, StateRebuilder};
use snapshot::{chunk_state, Error as SnapshotError, Progress, StateRebuilder, SNAPSHOT_SUBPARTS};
use snapshot::io::{PackedReader, PackedWriter, SnapshotReader, SnapshotWriter};
use super::helpers::{compare_dbs, StateProducer};
@ -53,7 +53,11 @@ fn snap_and_restore() {
let state_root = producer.state_root();
let writer = Mutex::new(PackedWriter::new(&snap_file).unwrap());
let state_hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default()).unwrap();
let mut state_hashes = Vec::new();
for part in 0..SNAPSHOT_SUBPARTS {
let mut hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default(), Some(part)).unwrap();
state_hashes.append(&mut hashes);
}
writer.into_inner().finish(::snapshot::ManifestData {
version: 2,
@ -164,7 +168,7 @@ fn checks_flag() {
let state_root = producer.state_root();
let writer = Mutex::new(PackedWriter::new(&snap_file).unwrap());
let state_hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default()).unwrap();
let state_hashes = chunk_state(&old_db, &state_root, &writer, &Progress::default(), None).unwrap();
writer.into_inner().finish(::snapshot::ManifestData {
version: 2,

View File

@ -865,6 +865,10 @@ usage! {
"--no-periodic-snapshot",
"Disable automated snapshots which usually occur once every 10000 blocks.",
ARG arg_snapshot_threads: (Option<usize>) = None, or |c: &Config| c.snapshots.as_ref()?.processing_threads,
"--snapshot-threads=[NUM]",
"Enables multiple threads for snapshots creation.",
["Whisper Options"]
FLAG flag_whisper: (bool) = false, or |c: &Config| c.whisper.as_ref()?.enabled,
"--whisper",
@ -1345,6 +1349,7 @@ struct Footprint {
#[serde(deny_unknown_fields)]
struct Snapshots {
disable_periodic: Option<bool>,
processing_threads: Option<usize>,
}
#[derive(Default, Debug, PartialEq, Deserialize)]
@ -1771,6 +1776,7 @@ mod tests {
arg_export_state_at: "latest".into(),
arg_snapshot_at: "latest".into(),
flag_no_periodic_snapshot: false,
arg_snapshot_threads: None,
// -- Whisper options.
flag_whisper: false,
@ -2021,6 +2027,7 @@ mod tests {
}),
snapshots: Some(Snapshots {
disable_periodic: Some(true),
processing_threads: None,
}),
misc: Some(Misc {
logging: Some("own_tx=trace".into()),

View File

@ -30,8 +30,10 @@ use sync::{NetworkConfiguration, validate_node_url, self};
use ethcore::ethstore::ethkey::{Secret, Public};
use ethcore::client::{VMType};
use ethcore::miner::{stratum, MinerOptions};
use ethcore::snapshot::SnapshotConfiguration;
use ethcore::verification::queue::VerifierSettings;
use miner::pool;
use num_cpus;
use rpc::{IpcConfiguration, HttpConfiguration, WsConfiguration};
use parity_rpc::NetworkSettings;
@ -125,6 +127,7 @@ impl Configuration {
let update_policy = self.update_policy()?;
let logger_config = self.logger_config();
let ws_conf = self.ws_config()?;
let snapshot_conf = self.snapshot_config()?;
let http_conf = self.http_config()?;
let ipc_conf = self.ipc_config()?;
let net_conf = self.net_config()?;
@ -298,6 +301,7 @@ impl Configuration {
file_path: self.args.arg_snapshot_file.clone(),
kind: snapshot::Kind::Take,
block_at: to_block_id(&self.args.arg_snapshot_at)?,
snapshot_conf: snapshot_conf,
};
Cmd::Snapshot(snapshot_cmd)
} else if self.args.cmd_restore {
@ -314,6 +318,7 @@ impl Configuration {
file_path: self.args.arg_restore_file.clone(),
kind: snapshot::Kind::Restore,
block_at: to_block_id("latest")?, // unimportant.
snapshot_conf: snapshot_conf,
};
Cmd::Snapshot(restore_cmd)
} else if self.args.cmd_export_hardcoded_sync {
@ -349,6 +354,7 @@ impl Configuration {
gas_price_percentile: self.args.arg_gas_price_percentile,
poll_lifetime: self.args.arg_poll_lifetime,
ws_conf: ws_conf,
snapshot_conf: snapshot_conf,
http_conf: http_conf,
ipc_conf: ipc_conf,
net_conf: net_conf,
@ -374,7 +380,6 @@ impl Configuration {
private_tx_enabled,
name: self.args.arg_identity,
custom_bootnodes: self.args.arg_bootnodes.is_some(),
no_periodic_snapshot: self.args.flag_no_periodic_snapshot,
check_seal: !self.args.flag_no_seal_check,
download_old_blocks: !self.args.flag_no_ancient_blocks,
verifier_settings: verifier_settings,
@ -890,6 +895,18 @@ impl Configuration {
Ok((provider_conf, encryptor_conf, self.args.flag_private_enabled))
}
fn snapshot_config(&self) -> Result<SnapshotConfiguration, String> {
let conf = SnapshotConfiguration {
no_periodic: self.args.flag_no_periodic_snapshot,
processing_threads: match self.args.arg_snapshot_threads {
Some(threads) if threads > 0 => threads,
_ => ::std::cmp::max(1, num_cpus::get() / 2),
},
};
Ok(conf)
}
fn network_settings(&self) -> Result<NetworkSettings, String> {
let http_conf = self.http_config()?;
let net_addresses = self.net_addresses()?;
@ -1398,7 +1415,7 @@ mod tests {
name: "".into(),
custom_bootnodes: false,
fat_db: Default::default(),
no_periodic_snapshot: false,
snapshot_conf: Default::default(),
stratum: None,
check_seal: true,
download_old_blocks: true,

View File

@ -25,7 +25,7 @@ use ethcore::account_provider::{AccountProvider, AccountProviderSettings};
use ethcore::client::{BlockId, CallContract, Client, Mode, DatabaseCompactionProfile, VMType, BlockChainClient, BlockInfo};
use ethcore::ethstore::ethkey;
use ethcore::miner::{stratum, Miner, MinerService, MinerOptions};
use ethcore::snapshot;
use ethcore::snapshot::{self, SnapshotConfiguration};
use ethcore::spec::{SpecParams, OptimizeFor};
use ethcore::verification::queue::VerifierSettings;
use ethcore_logger::{Config as LogConfig, RotatingLogger};
@ -119,7 +119,7 @@ pub struct RunCmd {
pub name: String,
pub custom_bootnodes: bool,
pub stratum: Option<stratum::Options>,
pub no_periodic_snapshot: bool,
pub snapshot_conf: SnapshotConfiguration,
pub check_seal: bool,
pub download_old_blocks: bool,
pub verifier_settings: VerifierSettings,
@ -531,6 +531,7 @@ fn execute_impl<Cr, Rr>(cmd: RunCmd, logger: Arc<RotatingLogger>, on_client_rq:
client_config.queue.verifier_settings = cmd.verifier_settings;
client_config.transaction_verification_queue_size = ::std::cmp::max(2048, txpool_size / 4);
client_config.snapshot = cmd.snapshot_conf.clone();
// set up bootnodes
let mut net_conf = cmd.net_conf;
@ -778,7 +779,7 @@ fn execute_impl<Cr, Rr>(cmd: RunCmd, logger: Arc<RotatingLogger>, on_client_rq:
});
// the watcher must be kept alive.
let watcher = match cmd.no_periodic_snapshot {
let watcher = match cmd.snapshot_conf.no_periodic {
true => None,
false => {
let sync = sync_provider.clone();

View File

@ -22,7 +22,7 @@ use std::sync::Arc;
use hash::keccak;
use ethcore::account_provider::AccountProvider;
use ethcore::snapshot::{Progress, RestorationStatus, SnapshotService as SS};
use ethcore::snapshot::{Progress, RestorationStatus, SnapshotConfiguration, SnapshotService as SS};
use ethcore::snapshot::io::{SnapshotReader, PackedReader, PackedWriter};
use ethcore::snapshot::service::Service as SnapshotService;
use ethcore::client::{Mode, DatabaseCompactionProfile, VMType};
@ -62,6 +62,7 @@ pub struct SnapshotCommand {
pub file_path: Option<String>,
pub kind: Kind,
pub block_at: BlockId,
pub snapshot_conf: SnapshotConfiguration,
}
// helper for reading chunks from arbitrary reader and feeding them into the
@ -165,7 +166,7 @@ impl SnapshotCommand {
execute_upgrades(&self.dirs.base, &db_dirs, algorithm, &self.compaction)?;
// prepare client config
let client_config = to_client_config(
let mut client_config = to_client_config(
&self.cache_config,
spec.name.to_lowercase(),
Mode::Active,
@ -180,6 +181,8 @@ impl SnapshotCommand {
true,
);
client_config.snapshot = self.snapshot_conf;
let restoration_db_handler = db::restoration_db_handler(&client_path, &client_config);
let client_db = restoration_db_handler.open(&client_path)
.map_err(|e| format!("Failed to open database {:?}", e))?;