Resumable warp-sync / Seed downloaded snapshots (#8544)
* Start dividing sync chain : first supplier method * WIP - updated chain sync supplier * Finish refactoring the Chain Sync Supplier * Create Chain Sync Requester * Add Propagator for Chain Sync * Add the Chain Sync Handler * Move tests from mod -> handler * Move tests to propagator * Refactor SyncRequester arguments * Refactoring peer fork header handler * Fix wrong highest block number in snapshot sync * Small refactor... * Resume warp-sync downloaded chunks * Add comments * Refactoring the previous chunks import * Fix tests * Address PR grumbles * Fix not seeding current snapshot * Address PR Grumbles * Address PR grumble * Retry failed CI job * Update SnapshotService readiness check Fix restoration locking issue for previous chunks restoration * Fix tests * Fix tests * Fix test * Early abort importing previous chunks * PR Grumbles * Update Gitlab CI config * SyncState back to Waiting when Manifest peers disconnect * Move fix * Better fix * Revert GitLab CI changes * Fix Warning * Refactor resuming snapshots * Fix string construction * Revert "Refactor resuming snapshots" This reverts commit 75fd4b553a38e4a49dc5d6a878c70e830ff382eb. * Update informant log * Fix string construction * Refactor resuming snapshots * Fix informant * PR Grumbles * Update informant message : show chunks done * PR Grumbles * Fix * Fix Warning * PR Grumbles
This commit is contained in:
committed by
Afri Schoedon
parent
6ecc855c34
commit
cdbcfaa7de
@@ -17,8 +17,8 @@
|
||||
//! Snapshot network service implementation.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::io::ErrorKind;
|
||||
use std::fs;
|
||||
use std::io::{self, Read, ErrorKind};
|
||||
use std::fs::{self, File};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
@@ -30,6 +30,7 @@ use blockchain::BlockChain;
|
||||
use client::{Client, ChainInfo, ClientIoMessage};
|
||||
use engines::EthEngine;
|
||||
use error::Error;
|
||||
use hash::keccak;
|
||||
use ids::BlockId;
|
||||
|
||||
use io::IoChannel;
|
||||
@@ -270,8 +271,8 @@ impl Service {
|
||||
}
|
||||
}
|
||||
|
||||
// delete the temporary restoration dir if it does exist.
|
||||
if let Err(e) = fs::remove_dir_all(service.restoration_dir()) {
|
||||
// delete the temporary restoration DB dir if it does exist.
|
||||
if let Err(e) = fs::remove_dir_all(service.restoration_db()) {
|
||||
if e.kind() != ErrorKind::NotFound {
|
||||
return Err(e.into())
|
||||
}
|
||||
@@ -325,6 +326,13 @@ impl Service {
|
||||
dir
|
||||
}
|
||||
|
||||
// previous snapshot chunks path.
|
||||
fn prev_chunks_dir(&self) -> PathBuf {
|
||||
let mut dir = self.snapshot_root.clone();
|
||||
dir.push("prev_chunks");
|
||||
dir
|
||||
}
|
||||
|
||||
// replace one the client's database with our own.
|
||||
fn replace_client_db(&self) -> Result<(), Error> {
|
||||
let our_db = self.restoration_db();
|
||||
@@ -406,10 +414,27 @@ impl Service {
|
||||
/// Initialize the restoration synchronously.
|
||||
/// The recover flag indicates whether to recover the restored snapshot.
|
||||
pub fn init_restore(&self, manifest: ManifestData, recover: bool) -> Result<(), Error> {
|
||||
let rest_dir = self.restoration_dir();
|
||||
|
||||
let mut res = self.restoration.lock();
|
||||
|
||||
let rest_dir = self.restoration_dir();
|
||||
let rest_db = self.restoration_db();
|
||||
let recovery_temp = self.temp_recovery_dir();
|
||||
let prev_chunks = self.prev_chunks_dir();
|
||||
|
||||
// delete and restore the restoration dir.
|
||||
if let Err(e) = fs::remove_dir_all(&prev_chunks) {
|
||||
match e.kind() {
|
||||
ErrorKind::NotFound => {},
|
||||
_ => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
// Move the previous recovery temp directory
|
||||
// to `prev_chunks` to be able to restart restoring
|
||||
// with previously downloaded blocks
|
||||
// This step is optional, so don't fail on error
|
||||
fs::rename(&recovery_temp, &prev_chunks).ok();
|
||||
|
||||
self.state_chunks.store(0, Ordering::SeqCst);
|
||||
self.block_chunks.store(0, Ordering::SeqCst);
|
||||
|
||||
@@ -424,29 +449,38 @@ impl Service {
|
||||
}
|
||||
}
|
||||
|
||||
*self.status.lock() = RestorationStatus::Initializing {
|
||||
chunks_done: 0,
|
||||
};
|
||||
|
||||
fs::create_dir_all(&rest_dir)?;
|
||||
|
||||
// make new restoration.
|
||||
let writer = match recover {
|
||||
true => Some(LooseWriter::new(self.temp_recovery_dir())?),
|
||||
true => Some(LooseWriter::new(recovery_temp)?),
|
||||
false => None
|
||||
};
|
||||
|
||||
let params = RestorationParams {
|
||||
manifest: manifest,
|
||||
manifest: manifest.clone(),
|
||||
pruning: self.pruning,
|
||||
db: self.restoration_db_handler.open(&self.restoration_db())?,
|
||||
db: self.restoration_db_handler.open(&rest_db)?,
|
||||
writer: writer,
|
||||
genesis: &self.genesis_block,
|
||||
guard: Guard::new(rest_dir),
|
||||
guard: Guard::new(rest_db),
|
||||
engine: &*self.engine,
|
||||
};
|
||||
|
||||
let state_chunks = params.manifest.state_hashes.len();
|
||||
let block_chunks = params.manifest.block_hashes.len();
|
||||
let state_chunks = manifest.state_hashes.len();
|
||||
let block_chunks = manifest.block_hashes.len();
|
||||
|
||||
*res = Some(Restoration::new(params)?);
|
||||
|
||||
self.restoring_snapshot.store(true, Ordering::SeqCst);
|
||||
|
||||
// Import previous chunks, continue if it fails
|
||||
self.import_prev_chunks(&mut res, manifest).ok();
|
||||
|
||||
*self.status.lock() = RestorationStatus::Ongoing {
|
||||
state_chunks: state_chunks as u32,
|
||||
block_chunks: block_chunks as u32,
|
||||
@@ -454,10 +488,65 @@ impl Service {
|
||||
block_chunks_done: self.block_chunks.load(Ordering::SeqCst) as u32,
|
||||
};
|
||||
|
||||
self.restoring_snapshot.store(true, Ordering::SeqCst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Import the previous chunks into the current restoration
|
||||
fn import_prev_chunks(&self, restoration: &mut Option<Restoration>, manifest: ManifestData) -> Result<(), Error> {
|
||||
let prev_chunks = self.prev_chunks_dir();
|
||||
|
||||
// Restore previous snapshot chunks
|
||||
let files = fs::read_dir(prev_chunks.as_path())?;
|
||||
let mut num_temp_chunks = 0;
|
||||
|
||||
for prev_chunk_file in files {
|
||||
// Don't go over all the files if the restoration has been aborted
|
||||
if !self.restoring_snapshot.load(Ordering::SeqCst) {
|
||||
trace!(target:"snapshot", "Aborting importing previous chunks");
|
||||
return Ok(());
|
||||
}
|
||||
// Import the chunk, don't fail and continue if one fails
|
||||
match self.import_prev_chunk(restoration, &manifest, prev_chunk_file) {
|
||||
Ok(true) => num_temp_chunks += 1,
|
||||
Err(e) => trace!(target: "snapshot", "Error importing chunk: {:?}", e),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
trace!(target:"snapshot", "Imported {} previous chunks", num_temp_chunks);
|
||||
|
||||
// Remove the prev temp directory
|
||||
fs::remove_dir_all(&prev_chunks)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Import a previous chunk at the given path. Returns whether the block was imported or not
|
||||
fn import_prev_chunk(&self, restoration: &mut Option<Restoration>, manifest: &ManifestData, file: io::Result<fs::DirEntry>) -> Result<bool, Error> {
|
||||
let file = file?;
|
||||
let path = file.path();
|
||||
|
||||
let mut file = File::open(path.clone())?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let hash = keccak(&buffer);
|
||||
|
||||
let is_state = if manifest.block_hashes.contains(&hash) {
|
||||
false
|
||||
} else if manifest.state_hashes.contains(&hash) {
|
||||
true
|
||||
} else {
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
self.feed_chunk_with_restoration(restoration, hash, &buffer, is_state)?;
|
||||
|
||||
trace!(target: "snapshot", "Fed chunk {:?}", hash);
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
// finalize the restoration. this accepts an already-locked
|
||||
// restoration as an argument -- so acquiring it again _will_
|
||||
// lead to deadlock.
|
||||
@@ -499,12 +588,19 @@ impl Service {
|
||||
/// Feed a chunk of either kind. no-op if no restoration or status is wrong.
|
||||
fn feed_chunk(&self, hash: H256, chunk: &[u8], is_state: bool) -> Result<(), Error> {
|
||||
// TODO: be able to process block chunks and state chunks at same time?
|
||||
let (result, db) = {
|
||||
let mut restoration = self.restoration.lock();
|
||||
let mut restoration = self.restoration.lock();
|
||||
self.feed_chunk_with_restoration(&mut restoration, hash, chunk, is_state)
|
||||
}
|
||||
|
||||
/// Feed a chunk with the Restoration
|
||||
fn feed_chunk_with_restoration(&self, restoration: &mut Option<Restoration>, hash: H256, chunk: &[u8], is_state: bool) -> Result<(), Error> {
|
||||
let (result, db) = {
|
||||
match self.status() {
|
||||
RestorationStatus::Inactive | RestorationStatus::Failed => return Ok(()),
|
||||
RestorationStatus::Ongoing { .. } => {
|
||||
RestorationStatus::Inactive | RestorationStatus::Failed => {
|
||||
trace!(target: "snapshot", "Tried to restore chunk {:x} while inactive or failed", hash);
|
||||
return Ok(());
|
||||
},
|
||||
RestorationStatus::Ongoing { .. } | RestorationStatus::Initializing { .. } => {
|
||||
let (res, db) = {
|
||||
let rest = match *restoration {
|
||||
Some(ref mut r) => r,
|
||||
@@ -583,11 +679,41 @@ impl SnapshotService for Service {
|
||||
self.reader.read().as_ref().and_then(|r| r.chunk(hash).ok())
|
||||
}
|
||||
|
||||
fn completed_chunks(&self) -> Option<Vec<H256>> {
|
||||
let restoration = self.restoration.lock();
|
||||
|
||||
match *restoration {
|
||||
Some(ref restoration) => {
|
||||
let completed_chunks = restoration.manifest.block_hashes
|
||||
.iter()
|
||||
.filter(|h| !restoration.block_chunks_left.contains(h))
|
||||
.chain(
|
||||
restoration.manifest.state_hashes
|
||||
.iter()
|
||||
.filter(|h| !restoration.state_chunks_left.contains(h))
|
||||
)
|
||||
.map(|h| *h)
|
||||
.collect();
|
||||
|
||||
Some(completed_chunks)
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn status(&self) -> RestorationStatus {
|
||||
let mut cur_status = self.status.lock();
|
||||
if let RestorationStatus::Ongoing { ref mut state_chunks_done, ref mut block_chunks_done, .. } = *cur_status {
|
||||
*state_chunks_done = self.state_chunks.load(Ordering::SeqCst) as u32;
|
||||
*block_chunks_done = self.block_chunks.load(Ordering::SeqCst) as u32;
|
||||
|
||||
match *cur_status {
|
||||
RestorationStatus::Initializing { ref mut chunks_done } => {
|
||||
*chunks_done = self.state_chunks.load(Ordering::SeqCst) as u32 +
|
||||
self.block_chunks.load(Ordering::SeqCst) as u32;
|
||||
}
|
||||
RestorationStatus::Ongoing { ref mut state_chunks_done, ref mut block_chunks_done, .. } => {
|
||||
*state_chunks_done = self.state_chunks.load(Ordering::SeqCst) as u32;
|
||||
*block_chunks_done = self.block_chunks.load(Ordering::SeqCst) as u32;
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
|
||||
cur_status.clone()
|
||||
@@ -600,6 +726,7 @@ impl SnapshotService for Service {
|
||||
}
|
||||
|
||||
fn abort_restore(&self) {
|
||||
trace!(target: "snapshot", "Aborting restore");
|
||||
self.restoring_snapshot.store(false, Ordering::SeqCst);
|
||||
*self.restoration.lock() = None;
|
||||
*self.status.lock() = RestorationStatus::Inactive;
|
||||
|
||||
@@ -130,12 +130,16 @@ fn guards_delete_folders() {
|
||||
service.init_restore(manifest.clone(), true).unwrap();
|
||||
assert!(path.exists());
|
||||
|
||||
// The `db` folder should have been deleted,
|
||||
// while the `temp` one kept
|
||||
service.abort_restore();
|
||||
assert!(!path.exists());
|
||||
assert!(!path.join("db").exists());
|
||||
assert!(path.join("temp").exists());
|
||||
|
||||
service.init_restore(manifest.clone(), true).unwrap();
|
||||
assert!(path.exists());
|
||||
|
||||
drop(service);
|
||||
assert!(!path.exists());
|
||||
assert!(!path.join("db").exists());
|
||||
assert!(path.join("temp").exists());
|
||||
}
|
||||
|
||||
@@ -30,6 +30,9 @@ pub trait SnapshotService : Sync + Send {
|
||||
/// `None` indicates warp sync isn't supported by the consensus engine.
|
||||
fn supported_versions(&self) -> Option<(u64, u64)>;
|
||||
|
||||
/// Returns a list of the completed chunks
|
||||
fn completed_chunks(&self) -> Option<Vec<H256>>;
|
||||
|
||||
/// Get raw chunk for a given hash.
|
||||
fn chunk(&self, hash: H256) -> Option<Bytes>;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user