diff --git a/ethcore/blockchain/src/best_block.rs b/ethcore/blockchain/src/best_block.rs index 20f247391..cddb79835 100644 --- a/ethcore/blockchain/src/best_block.rs +++ b/ethcore/blockchain/src/best_block.rs @@ -24,7 +24,8 @@ use common_types::header::Header; /// For GHOST fork-choice rule it would typically describe the block with highest /// combined difficulty (usually the block with the highest block number). /// -/// Sometimes refered as 'latest block'. +/// Sometimes referred as 'latest block'. +#[derive(Debug)] pub struct BestBlock { /// Best block decoded header. pub header: Header, @@ -35,7 +36,7 @@ pub struct BestBlock { } /// Best ancient block info. If the blockchain has a gap this keeps track of where it starts. -#[derive(Default)] +#[derive(Debug, Default)] pub struct BestAncientBlock { /// Best block hash. pub hash: H256, diff --git a/ethcore/blockchain/src/blockchain.rs b/ethcore/blockchain/src/blockchain.rs index dbe18f253..f1350f957 100644 --- a/ethcore/blockchain/src/blockchain.rs +++ b/ethcore/blockchain/src/blockchain.rs @@ -652,10 +652,7 @@ impl BlockChain { // and write them if let (Some(hash), Some(number)) = (best_ancient, best_ancient_number) { let mut best_ancient_block = bc.best_ancient_block.write(); - *best_ancient_block = Some(BestAncientBlock { - hash: hash, - number: number, - }); + *best_ancient_block = Some(BestAncientBlock { hash, number }); } } diff --git a/ethcore/db/src/keys.rs b/ethcore/db/src/keys.rs index ceab94211..d7db42bf6 100644 --- a/ethcore/db/src/keys.rs +++ b/ethcore/db/src/keys.rs @@ -205,7 +205,7 @@ pub struct TransactionAddress { } /// Contains all block receipts. -#[derive(Clone, RlpEncodableWrapper, RlpDecodableWrapper, MallocSizeOf)] +#[derive(Debug, Clone, RlpEncodableWrapper, RlpDecodableWrapper, MallocSizeOf)] pub struct BlockReceipts { /// Block receipts pub receipts: Vec, @@ -214,9 +214,7 @@ pub struct BlockReceipts { impl BlockReceipts { /// Create new block receipts wrapper. pub fn new(receipts: Vec) -> Self { - BlockReceipts { - receipts: receipts - } + BlockReceipts { receipts } } } diff --git a/ethcore/src/snapshot/consensus/authority.rs b/ethcore/src/snapshot/consensus/authority.rs index 4ff812bdb..4f65d4290 100644 --- a/ethcore/src/snapshot/consensus/authority.rs +++ b/ethcore/src/snapshot/consensus/authority.rs @@ -348,7 +348,7 @@ impl Rebuilder for ChunkRebuilder { Ok(()) } - fn finalize(&mut self, _engine: &dyn Engine) -> Result<(), ::error::Error> { + fn finalize(&mut self) -> Result<(), ::error::Error> { if !self.had_genesis { return Err(Error::WrongChunkFormat("No genesis transition included.".into()).into()); } @@ -358,6 +358,7 @@ impl Rebuilder for ChunkRebuilder { None => return Err(Error::WrongChunkFormat("Warp target block not included.".into()).into()), }; + trace!(target: "snapshot", "rebuilder, finalize: verifying {} unverified first blocks", self.unverified_firsts.len()); // verify the first entries of chunks we couldn't before. // we store all last verifiers, but not all firsts. // match each unverified first epoch with a last epoch verifier. diff --git a/ethcore/src/snapshot/consensus/mod.rs b/ethcore/src/snapshot/consensus/mod.rs index 4262248b1..d6f317538 100644 --- a/ethcore/src/snapshot/consensus/mod.rs +++ b/ethcore/src/snapshot/consensus/mod.rs @@ -92,5 +92,5 @@ pub trait Rebuilder: Send { /// /// This should apply the necessary "glue" between chunks, /// and verify against the restored state. - fn finalize(&mut self, engine: &dyn Engine) -> Result<(), ::error::Error>; + fn finalize(&mut self) -> Result<(), ::error::Error>; } diff --git a/ethcore/src/snapshot/consensus/work.rs b/ethcore/src/snapshot/consensus/work.rs index 4bb0c758a..eda200d66 100644 --- a/ethcore/src/snapshot/consensus/work.rs +++ b/ethcore/src/snapshot/consensus/work.rs @@ -208,15 +208,15 @@ impl PowRebuilder { /// Create a new PowRebuilder. fn new(chain: BlockChain, db: Arc, manifest: &ManifestData, snapshot_blocks: u64) -> Result { Ok(PowRebuilder { - chain: chain, - db: db, + chain, + db, rng: OsRng::new().map_err(|e| format!("{}", e))?, disconnected: Vec::new(), best_number: manifest.block_number, best_hash: manifest.block_hash, best_root: manifest.state_root, fed_blocks: 0, - snapshot_blocks: snapshot_blocks, + snapshot_blocks, }) } } @@ -298,9 +298,9 @@ impl Rebuilder for PowRebuilder { } /// Glue together any disconnected chunks and check that the chain is complete. - fn finalize(&mut self, _: &dyn Engine) -> Result<(), ::error::Error> { + fn finalize(&mut self) -> Result<(), ::error::Error> { let mut batch = self.db.transaction(); - + trace!(target: "snapshot", "rebuilder, finalize: inserting {} disconnected chunks", self.disconnected.len()); for (first_num, first_hash) in self.disconnected.drain(..) { let parent_num = first_num - 1; diff --git a/ethcore/src/snapshot/error.rs b/ethcore/src/snapshot/error.rs index 8381bd4cb..68742e2e1 100644 --- a/ethcore/src/snapshot/error.rs +++ b/ethcore/src/snapshot/error.rs @@ -68,8 +68,8 @@ pub enum Error { BadEpochProof(u64), /// Wrong chunk format. WrongChunkFormat(String), - /// Unlinked ancient block chain - UnlinkedAncientBlockChain, + /// Unlinked ancient block chain; includes the parent hash where linkage failed + UnlinkedAncientBlockChain(H256), } impl error::Error for Error { @@ -108,7 +108,7 @@ impl fmt::Display for Error { Error::SnapshotAborted => write!(f, "Snapshot was aborted."), Error::BadEpochProof(i) => write!(f, "Bad epoch proof for transition to epoch {}", i), Error::WrongChunkFormat(ref msg) => write!(f, "Wrong chunk format: {}", msg), - Error::UnlinkedAncientBlockChain => write!(f, "Unlinked ancient blocks chain"), + Error::UnlinkedAncientBlockChain(parent_hash) => write!(f, "Unlinked ancient blocks chain at parent_hash={:#x}", parent_hash), } } } diff --git a/ethcore/src/snapshot/service.rs b/ethcore/src/snapshot/service.rs index 3e914035e..5e1efe138 100644 --- a/ethcore/src/snapshot/service.rs +++ b/ethcore/src/snapshot/service.rs @@ -43,6 +43,7 @@ use bytes::Bytes; use journaldb::Algorithm; use kvdb::DBTransaction; use snappy; +use snapshot::error::Error::UnlinkedAncientBlockChain; /// Helper for removing directories in case of error. struct Guard(bool, PathBuf); @@ -110,17 +111,17 @@ impl Restoration { let secondary = components.rebuilder(chain, raw_db.clone(), &manifest)?; - let root = manifest.state_root.clone(); + let final_state_root = manifest.state_root.clone(); Ok(Restoration { - manifest: manifest, + manifest, state_chunks_left: state_chunks, block_chunks_left: block_chunks, state: StateRebuilder::new(raw_db.key_value().clone(), params.pruning), - secondary: secondary, + secondary, writer: params.writer, snappy_buffer: Vec::new(), - final_state_root: root, + final_state_root, guard: params.guard, db: raw_db, }) @@ -170,7 +171,7 @@ impl Restoration { } // finish up restoration. - fn finalize(mut self, engine: &dyn Engine) -> Result<(), Error> { + fn finalize(mut self) -> Result<(), Error> { use trie::TrieError; if !self.is_done() { return Ok(()) } @@ -186,13 +187,14 @@ impl Restoration { self.state.finalize(self.manifest.block_number, self.manifest.block_hash)?; // connect out-of-order chunks and verify chain integrity. - self.secondary.finalize(engine)?; + self.secondary.finalize()?; if let Some(writer) = self.writer { writer.finish(self.manifest)?; } self.guard.disarm(); + trace!(target: "snapshot", "restoration finalised correctly"); Ok(()) } @@ -337,16 +339,6 @@ impl Service { dir } - // replace one the client's database with our own. - fn replace_client_db(&self) -> Result<(), Error> { - let migrated_blocks = self.migrate_blocks()?; - info!(target: "snapshot", "Migrated {} ancient blocks", migrated_blocks); - - let rest_db = self.restoration_db(); - self.client.restore_db(&*rest_db.to_string_lossy())?; - Ok(()) - } - // Migrate the blocks in the current DB into the new chain fn migrate_blocks(&self) -> Result { // Count the number of migrated blocks @@ -361,11 +353,27 @@ impl Service { // The old database looks like this: // [genesis, best_ancient_block] ... [first_block, best_block] - // If we are fully synced neither `best_ancient_block` nor `first_block` is set, and we can assume that the whole range from [genesis, best_block] is imported. - // The new database only contains the tip of the chain ([first_block, best_block]), + // If we are fully synced neither `best_ancient_block` nor `first_block` is set, and we can + // assume that the whole range from [genesis, best_block] is imported. + // The new database only contains the tip of the chain ([new_first_block, new_best_block]), // so the useful set of blocks is defined as: // [0 ... min(new.first_block, best_ancient_block or best_block)] + // + // If, for whatever reason, the old db does not have ancient blocks (i.e. + // `best_ancient_block` is `None` AND a non-zero `first_block`), such that the old db looks + // like [old_first_block..old_best_block] (which may or may not partially overlap with + // [new_first_block..new_best_block]) we do the conservative thing and do not migrate the + // old blocks. let find_range = || -> Option<(H256, H256)> { + // In theory, if the current best_block is > new first_block (i.e. ranges overlap) + // we could salvage them but what if there's been a re-org at the boundary and the two + // chains do not match anymore? We'd have to check the existing blocks carefully. + if cur_chain_info.ancient_block_number.is_none() && cur_chain_info.first_block_number.unwrap_or(0) > 0 { + info!(target: "blockchain", "blocks in the current DB do not stretch back to genesis; can't salvage them into the new DB. In current DB, first block: #{:?}/{:?}, best block: #{:?}/{:?}", + cur_chain_info.first_block_number, cur_chain_info.first_block_hash, + cur_chain_info.best_block_number, cur_chain_info.best_block_hash); + return None; + } let next_available_from = next_chain_info.first_block_number?; let cur_available_to = cur_chain_info.ancient_block_number.unwrap_or(cur_chain_info.best_block_number); @@ -375,10 +383,11 @@ impl Service { return None; } - trace!(target: "snapshot", "Trying to import ancient blocks until {}", highest_block_num); + trace!(target: "snapshot", "Trying to import ancient blocks until {}. First block in new chain=#{}, first block in old chain=#{:?}, best block in old chain=#{}", + highest_block_num, next_available_from, cur_chain_info.first_block_number, cur_chain_info.best_block_number); // Here we start from the highest block number and go backward to 0, - // thus starting at `highest_block_num` and targetting `0`. + // thus starting at `highest_block_num` and targeting `0`. let target_hash = self.client.block_hash(BlockId::Number(0))?; let start_hash = self.client.block_hash(BlockId::Number(highest_block_num))?; @@ -398,7 +407,10 @@ impl Service { return Ok(count); } - let block = self.client.block(BlockId::Hash(parent_hash)).ok_or(::snapshot::error::Error::UnlinkedAncientBlockChain)?; + let block = self.client.block(BlockId::Hash(parent_hash)).ok_or_else(|| { + error!(target: "snapshot", "migrate_blocks: did not find block from parent_hash={:#x} (start_hash={:#x})", parent_hash, start_hash); + UnlinkedAncientBlockChain(parent_hash) + })?; parent_hash = block.parent_hash(); let block_number = block.number(); @@ -412,7 +424,14 @@ impl Service { next_chain.insert_unordered_block(&mut batch, block, block_receipts, Some(parent_total_difficulty), false, true); count += 1; }, - _ => break, + _ => { + // We couldn't reach the targeted hash + error!(target: "snapshot", "migrate_blocks: failed to find receipts and parent total difficulty; cannot reach the target_hash ({:#x}). Block #{}, parent_hash={:#x}, parent_total_difficulty={:?}, start_hash={:#x}, ancient_block_number={:?}, best_block_number={:?}", + target_hash, block_number, parent_hash, parent_total_difficulty, + start_hash, cur_chain_info.ancient_block_number, cur_chain_info.best_block_number, + ); + return Err(UnlinkedAncientBlockChain(parent_hash).into()); + }, } // Writing changes to DB and logging every now and then @@ -433,11 +452,6 @@ impl Service { next_chain.commit(); next_db.key_value().flush().expect("DB flush failed."); - // We couldn't reach the targeted hash - if parent_hash != target_hash { - return Err(::snapshot::error::Error::UnlinkedAncientBlockChain.into()); - } - // Update best ancient block in the Next Chain next_chain.update_best_ancient_block(&start_hash); Ok(count) @@ -549,6 +563,8 @@ impl Service { *self.status.lock() = RestorationStatus::Initializing { chunks_done: 0, + state_chunks: manifest.state_hashes.len() as u32, + block_chunks: manifest.block_hashes.len() as u32, }; fs::create_dir_all(&rest_dir)?; @@ -563,7 +579,7 @@ impl Service { manifest: manifest.clone(), pruning: self.pruning, db: self.restoration_db_handler.open(&rest_db)?, - writer: writer, + writer, genesis: &self.genesis_block, guard: Guard::new(rest_db), engine: &*self.engine, @@ -654,15 +670,20 @@ impl Service { // lead to deadlock. fn finalize_restoration(&self, rest: &mut Option) -> Result<(), Error> { trace!(target: "snapshot", "finalizing restoration"); + *self.status.lock() = RestorationStatus::Finalizing; let recover = rest.as_ref().map_or(false, |rest| rest.writer.is_some()); // destroy the restoration before replacing databases and snapshot. rest.take() - .map(|r| r.finalize(&*self.engine)) + .map(|r| r.finalize()) .unwrap_or(Ok(()))?; - self.replace_client_db()?; + let migrated_blocks = self.migrate_blocks()?; + info!(target: "snapshot", "Migrated {} ancient blocks", migrated_blocks); + + // replace the Client's database with the new one (restart the Client). + self.client.restore_db(&*self.restoration_db().to_string_lossy())?; if recover { let mut reader = self.reader.write(); @@ -690,14 +711,20 @@ impl Service { /// Feed a chunk of either kind (block or state). no-op if no restoration or status is wrong. fn feed_chunk(&self, hash: H256, chunk: &[u8], is_state: bool) { // TODO: be able to process block chunks and state chunks at same time? - let mut restoration = self.restoration.lock(); - match self.feed_chunk_with_restoration(&mut restoration, hash, chunk, is_state) { + let r = { + let mut restoration = self.restoration.lock(); + self.feed_chunk_with_restoration(&mut restoration, hash, chunk, is_state) + }; + match r { Ok(()) | Err(Error::Snapshot(SnapshotError::RestorationAborted)) => (), Err(e) => { + // TODO: after this we're sometimes deadlocked warn!("Encountered error during snapshot restoration: {}", e); - *self.restoration.lock() = None; - *self.status.lock() = RestorationStatus::Failed; + self.abort_restore(); + if let Some(mut status) = self.status.try_lock_for(std::time::Duration::from_millis(10)) { + *status = RestorationStatus::Failed; + } let _ = fs::remove_dir_all(self.restoration_dir()); } } @@ -707,8 +734,8 @@ impl Service { fn feed_chunk_with_restoration(&self, restoration: &mut Option, hash: H256, chunk: &[u8], is_state: bool) -> Result<(), Error> { let (result, db) = { match self.status() { - RestorationStatus::Inactive | RestorationStatus::Failed => { - trace!(target: "snapshot", "Tried to restore chunk {:x} while inactive or failed", hash); + RestorationStatus::Inactive | RestorationStatus::Failed | RestorationStatus::Finalizing => { + trace!(target: "snapshot", "Tried to restore chunk {:x} while inactive, failed or finalizing", hash); return Ok(()); }, RestorationStatus::Ongoing { .. } | RestorationStatus::Initializing { .. } => { @@ -803,7 +830,7 @@ impl SnapshotService for Service { let mut cur_status = self.status.lock(); match *cur_status { - RestorationStatus::Initializing { ref mut chunks_done } => { + RestorationStatus::Initializing { ref mut chunks_done, .. } => { *chunks_done = self.state_chunks.load(Ordering::SeqCst) as u32 + self.block_chunks.load(Ordering::SeqCst) as u32; } diff --git a/ethcore/src/snapshot/tests/helpers.rs b/ethcore/src/snapshot/tests/helpers.rs index 1873d05b7..a6e516b1b 100644 --- a/ethcore/src/snapshot/tests/helpers.rs +++ b/ethcore/src/snapshot/tests/helpers.rs @@ -187,5 +187,5 @@ pub fn restore( trace!(target: "snapshot", "finalizing"); state.finalize(manifest.block_number, manifest.block_hash)?; - secondary.finalize(engine) + secondary.finalize() } diff --git a/ethcore/src/snapshot/tests/proof_of_work.rs b/ethcore/src/snapshot/tests/proof_of_work.rs index fb714e667..4aa444229 100644 --- a/ethcore/src/snapshot/tests/proof_of_work.rs +++ b/ethcore/src/snapshot/tests/proof_of_work.rs @@ -93,7 +93,7 @@ fn chunk_and_restore(amount: u64) { rebuilder.feed(&chunk, engine.as_ref(), &flag).unwrap(); } - rebuilder.finalize(engine.as_ref()).unwrap(); + rebuilder.finalize().unwrap(); drop(rebuilder); // and test it. diff --git a/ethcore/sync/src/chain/handler.rs b/ethcore/sync/src/chain/handler.rs index afd0b4ff2..ada5058dc 100644 --- a/ethcore/sync/src/chain/handler.rs +++ b/ethcore/sync/src/chain/handler.rs @@ -256,7 +256,7 @@ impl SyncHandler { return Err(DownloaderImportError::Invalid); } match io.chain().block_status(BlockId::Hash(hash.clone())) { - BlockStatus::InChain => { + BlockStatus::InChain => { trace!(target: "sync", "New block hash already in chain {:?}", hash); }, BlockStatus::Queued => { @@ -529,10 +529,14 @@ impl SyncHandler { sync.snapshot.clear(); return Ok(()); }, - RestorationStatus::Initializing { .. } => { + RestorationStatus::Initializing { .. } => { trace!(target: "warp", "{}: Snapshot restoration is initializing", peer_id); return Ok(()); } + RestorationStatus::Finalizing => { + trace!(target: "warp", "{}: Snapshot finalizing restoration", peer_id); + return Ok(()); + } RestorationStatus::Ongoing { .. } => { trace!(target: "sync", "{}: Snapshot restoration is ongoing", peer_id); }, diff --git a/ethcore/sync/src/chain/mod.rs b/ethcore/sync/src/chain/mod.rs index 0c2b03d1b..fb655308d 100644 --- a/ethcore/sync/src/chain/mod.rs +++ b/ethcore/sync/src/chain/mod.rs @@ -1210,7 +1210,7 @@ impl ChainSync { RestorationStatus::Inactive | RestorationStatus::Failed => { self.set_state(SyncState::SnapshotWaiting); }, - RestorationStatus::Initializing { .. } | RestorationStatus::Ongoing { .. } => (), + RestorationStatus::Initializing { .. } | RestorationStatus::Ongoing { .. } | RestorationStatus::Finalizing => (), }, SyncState::SnapshotWaiting => { match io.snapshot_service().status() { @@ -1221,6 +1221,9 @@ impl ChainSync { RestorationStatus::Initializing { .. } => { trace!(target:"sync", "Snapshot restoration is initializing"); }, + RestorationStatus::Finalizing { .. } => { + trace!(target:"sync", "Snapshot finalizing restoration"); + }, RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => { if !self.snapshot.is_complete() && self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize <= MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD { trace!(target:"sync", "Resuming snapshot sync"); diff --git a/ethcore/types/src/restoration_status.rs b/ethcore/types/src/restoration_status.rs index b36ec7ef4..f02aa118c 100644 --- a/ethcore/types/src/restoration_status.rs +++ b/ethcore/types/src/restoration_status.rs @@ -23,6 +23,10 @@ pub enum RestorationStatus { Inactive, /// Restoration is initializing Initializing { + /// Total number of state chunks. + state_chunks: u32, + /// Total number of block chunks. + block_chunks: u32, /// Number of chunks done/imported chunks_done: u32, }, @@ -37,6 +41,7 @@ pub enum RestorationStatus { /// Number of block chunks completed. block_chunks_done: u32, }, + Finalizing, /// Failed restoration. Failed, } diff --git a/parity/informant.rs b/parity/informant.rs index 285557976..14400d58f 100644 --- a/parity/informant.rs +++ b/parity/informant.rs @@ -319,9 +319,16 @@ impl Informant { RestorationStatus::Ongoing { state_chunks, block_chunks, state_chunks_done, block_chunks_done } => { format!("Syncing snapshot {}/{}", state_chunks_done + block_chunks_done, state_chunks + block_chunks) }, - RestorationStatus::Initializing { chunks_done } => { - format!("Snapshot initializing ({} chunks restored)", chunks_done) + RestorationStatus::Initializing { chunks_done, state_chunks, block_chunks } => { + let total_chunks = state_chunks + block_chunks; + // Note that the percentage here can be slightly misleading when + // they have chunks already on disk: we'll import the local + // chunks first and then download the rest. + format!("Snapshot initializing ({}/{} chunks restored, {:.0}%)", chunks_done, total_chunks, (chunks_done as f32 / total_chunks as f32) * 100.0) }, + RestorationStatus::Finalizing => { + format!("Snapshot finalization under way") + } _ => String::new(), } ) diff --git a/parity/snapshot.rs b/parity/snapshot.rs index 269965c33..c1d2a77e3 100644 --- a/parity/snapshot.rs +++ b/parity/snapshot.rs @@ -123,6 +123,7 @@ fn restore_using(snapshot: Arc, reader: &R, match snapshot.status() { RestorationStatus::Ongoing { .. } => Err("Snapshot file is incomplete and missing chunks.".into()), RestorationStatus::Initializing { .. } => Err("Snapshot restoration is still initializing.".into()), + RestorationStatus::Finalizing => Err("Snapshot restoration is still finalizing.".into()), RestorationStatus::Failed => Err("Snapshot restoration failed.".into()), RestorationStatus::Inactive => { info!("Restoration complete.");