Local snapshot restore (#2058)

* restore from local snapshot

* update status with chunks done

* rework local restore trigger
This commit is contained in:
Robert Habermeier 2016-09-11 14:05:59 +02:00 committed by Arkadiy Paronyan
parent fd4361e284
commit dcfd7eab6d
5 changed files with 130 additions and 86 deletions

View File

@ -186,7 +186,7 @@ impl IoHandler<ClientIoMessage> for ClientIoHandler {
ClientIoMessage::BlockVerified => { self.client.import_verified_blocks(); } ClientIoMessage::BlockVerified => { self.client.import_verified_blocks(); }
ClientIoMessage::NewTransactions(ref transactions) => { self.client.import_queued_transactions(transactions); } ClientIoMessage::NewTransactions(ref transactions) => { self.client.import_queued_transactions(transactions); }
ClientIoMessage::BeginRestoration(ref manifest) => { ClientIoMessage::BeginRestoration(ref manifest) => {
if let Err(e) = self.snapshot.init_restore(manifest.clone()) { if let Err(e) = self.snapshot.init_restore(manifest.clone(), true) {
warn!("Failed to initialize snapshot restoration: {}", e); warn!("Failed to initialize snapshot restoration: {}", e);
} }
} }

View File

@ -35,7 +35,7 @@ use service::ClientIoMessage;
use io::IoChannel; use io::IoChannel;
use util::{Bytes, H256, Mutex, RwLock, UtilError}; use util::{Bytes, H256, Mutex, RwLock, RwLockReadGuard, UtilError};
use util::journaldb::Algorithm; use util::journaldb::Algorithm;
use util::kvdb::{Database, DatabaseConfig}; use util::kvdb::{Database, DatabaseConfig};
use util::snappy; use util::snappy;
@ -70,7 +70,7 @@ struct Restoration {
block_chunks_left: HashSet<H256>, block_chunks_left: HashSet<H256>,
state: StateRebuilder, state: StateRebuilder,
blocks: BlockRebuilder, blocks: BlockRebuilder,
writer: LooseWriter, writer: Option<LooseWriter>,
snappy_buffer: Bytes, snappy_buffer: Bytes,
final_state_root: H256, final_state_root: H256,
guard: Guard, guard: Guard,
@ -80,8 +80,8 @@ struct RestorationParams<'a> {
manifest: ManifestData, // manifest to base restoration on. manifest: ManifestData, // manifest to base restoration on.
pruning: Algorithm, // pruning algorithm for the database. pruning: Algorithm, // pruning algorithm for the database.
db_path: PathBuf, // database path db_path: PathBuf, // database path
db_config: &'a DatabaseConfig, db_config: &'a DatabaseConfig, // configuration for the database.
writer: LooseWriter, // writer for recovered snapshot. writer: Option<LooseWriter>, // writer for recovered snapshot.
genesis: &'a [u8], // genesis block of the chain. genesis: &'a [u8], // genesis block of the chain.
guard: Guard, // guard for the restoration directory. guard: Guard, // guard for the restoration directory.
} }
@ -120,7 +120,10 @@ impl Restoration {
let len = try!(snappy::decompress_into(chunk, &mut self.snappy_buffer)); let len = try!(snappy::decompress_into(chunk, &mut self.snappy_buffer));
try!(self.state.feed(&self.snappy_buffer[..len])); try!(self.state.feed(&self.snappy_buffer[..len]));
try!(self.writer.write_state_chunk(hash, chunk));
if let Some(ref mut writer) = self.writer.as_mut() {
try!(writer.write_state_chunk(hash, chunk));
}
} }
Ok(()) Ok(())
@ -132,7 +135,9 @@ impl Restoration {
let len = try!(snappy::decompress_into(chunk, &mut self.snappy_buffer)); let len = try!(snappy::decompress_into(chunk, &mut self.snappy_buffer));
try!(self.blocks.feed(&self.snappy_buffer[..len], engine)); try!(self.blocks.feed(&self.snappy_buffer[..len], engine));
try!(self.writer.write_block_chunk(hash, chunk)); if let Some(ref mut writer) = self.writer.as_mut() {
try!(writer.write_block_chunk(hash, chunk));
}
} }
Ok(()) Ok(())
@ -157,7 +162,9 @@ impl Restoration {
// connect out-of-order chunks. // connect out-of-order chunks.
self.blocks.glue_chunks(); self.blocks.glue_chunks();
try!(self.writer.finish(self.manifest)); if let Some(writer) = self.writer {
try!(writer.finish(self.manifest));
}
self.guard.disarm(); self.guard.disarm();
Ok(()) Ok(())
@ -300,6 +307,11 @@ impl Service {
Ok(()) Ok(())
} }
/// Get a reference to the snapshot reader.
pub fn reader(&self) -> RwLockReadGuard<Option<LooseReader>> {
self.reader.read()
}
/// Tick the snapshot service. This will log any active snapshot /// Tick the snapshot service. This will log any active snapshot
/// being taken. /// being taken.
pub fn tick(&self) { pub fn tick(&self) {
@ -351,11 +363,15 @@ impl Service {
} }
/// Initialize the restoration synchronously. /// Initialize the restoration synchronously.
pub fn init_restore(&self, manifest: ManifestData) -> Result<(), Error> { /// The recover flag indicates whether to recover the restored snapshot.
pub fn init_restore(&self, manifest: ManifestData, recover: bool) -> Result<(), Error> {
let rest_dir = self.restoration_dir(); let rest_dir = self.restoration_dir();
let mut res = self.restoration.lock(); let mut res = self.restoration.lock();
self.state_chunks.store(0, Ordering::SeqCst);
self.block_chunks.store(0, Ordering::SeqCst);
// tear down existing restoration. // tear down existing restoration.
*res = None; *res = None;
@ -370,7 +386,10 @@ impl Service {
try!(fs::create_dir_all(&rest_dir)); try!(fs::create_dir_all(&rest_dir));
// make new restoration. // make new restoration.
let writer = try!(LooseWriter::new(self.temp_recovery_dir())); let writer = match recover {
true => Some(try!(LooseWriter::new(self.temp_recovery_dir()))),
false => None
};
let params = RestorationParams { let params = RestorationParams {
manifest: manifest, manifest: manifest,
@ -385,8 +404,8 @@ impl Service {
*res = Some(try!(Restoration::new(params))); *res = Some(try!(Restoration::new(params)));
*self.status.lock() = RestorationStatus::Ongoing { *self.status.lock() = RestorationStatus::Ongoing {
state_chunks_done: self.state_chunks.load(Ordering::Relaxed) as u32, state_chunks_done: self.state_chunks.load(Ordering::SeqCst) as u32,
block_chunks_done: self.block_chunks.load(Ordering::Relaxed) as u32, block_chunks_done: self.block_chunks.load(Ordering::SeqCst) as u32,
}; };
Ok(()) Ok(())
} }
@ -397,35 +416,35 @@ impl Service {
fn finalize_restoration(&self, rest: &mut Option<Restoration>) -> Result<(), Error> { fn finalize_restoration(&self, rest: &mut Option<Restoration>) -> Result<(), Error> {
trace!(target: "snapshot", "finalizing restoration"); trace!(target: "snapshot", "finalizing restoration");
self.state_chunks.store(0, Ordering::SeqCst); let recover = rest.as_ref().map_or(false, |rest| rest.writer.is_some());
self.block_chunks.store(0, Ordering::SeqCst);
// destroy the restoration before replacing databases and snapshot. // destroy the restoration before replacing databases and snapshot.
try!(rest.take().map(Restoration::finalize).unwrap_or(Ok(()))); try!(rest.take().map(Restoration::finalize).unwrap_or(Ok(())));
try!(self.replace_client_db()); try!(self.replace_client_db());
let mut reader = self.reader.write(); if recover {
*reader = None; // destroy the old reader if it existed. let mut reader = self.reader.write();
*reader = None; // destroy the old reader if it existed.
let snapshot_dir = self.snapshot_dir(); let snapshot_dir = self.snapshot_dir();
trace!(target: "snapshot", "removing old snapshot dir at {}", snapshot_dir.to_string_lossy()); trace!(target: "snapshot", "removing old snapshot dir at {}", snapshot_dir.to_string_lossy());
if let Err(e) = fs::remove_dir_all(&snapshot_dir) { if let Err(e) = fs::remove_dir_all(&snapshot_dir) {
match e.kind() { match e.kind() {
ErrorKind::NotFound => {} ErrorKind::NotFound => {}
_ => return Err(e.into()), _ => return Err(e.into()),
}
} }
try!(fs::create_dir(&snapshot_dir));
trace!(target: "snapshot", "copying restored snapshot files over");
try!(fs::rename(self.temp_recovery_dir(), &snapshot_dir));
*reader = Some(try!(LooseReader::new(snapshot_dir)));
} }
try!(fs::create_dir(&snapshot_dir));
trace!(target: "snapshot", "copying restored snapshot files over");
try!(fs::rename(self.temp_recovery_dir(), &snapshot_dir));
let _ = fs::remove_dir_all(self.restoration_dir()); let _ = fs::remove_dir_all(self.restoration_dir());
*reader = Some(try!(LooseReader::new(snapshot_dir)));
*self.status.lock() = RestorationStatus::Inactive; *self.status.lock() = RestorationStatus::Inactive;
Ok(()) Ok(())
@ -506,7 +525,13 @@ impl SnapshotService for Service {
} }
fn status(&self) -> RestorationStatus { fn status(&self) -> RestorationStatus {
*self.status.lock() let mut cur_status = self.status.lock();
if let RestorationStatus::Ongoing { ref mut state_chunks_done, ref mut block_chunks_done } = *cur_status {
*state_chunks_done = self.state_chunks.load(Ordering::SeqCst) as u32;
*block_chunks_done = self.block_chunks.load(Ordering::SeqCst) as u32;
}
cur_status.clone()
} }
fn begin_restore(&self, manifest: ManifestData) { fn begin_restore(&self, manifest: ManifestData) {

View File

@ -33,7 +33,7 @@ Usage:
parity export [ <file> ] [options] parity export [ <file> ] [options]
parity signer new-token [options] parity signer new-token [options]
parity snapshot <file> [options] parity snapshot <file> [options]
parity restore <file> [options] parity restore [ <file> ] [options]
Operating Options: Operating Options:
--mode MODE Set the operating mode. MODE can be one of: --mode MODE Set the operating mode. MODE can be one of:

View File

@ -21,8 +21,9 @@ use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use ethcore_logger::{setup_log, Config as LogConfig}; use ethcore_logger::{setup_log, Config as LogConfig};
use ethcore::snapshot::{Progress, RestorationStatus, SnapshotService}; use ethcore::snapshot::{Progress, RestorationStatus, SnapshotService as SS};
use ethcore::snapshot::io::{SnapshotReader, PackedReader, PackedWriter}; use ethcore::snapshot::io::{SnapshotReader, PackedReader, PackedWriter};
use ethcore::snapshot::service::Service as SnapshotService;
use ethcore::service::ClientService; use ethcore::service::ClientService;
use ethcore::client::{Mode, DatabaseCompactionProfile, Switch, VMType}; use ethcore::client::{Mode, DatabaseCompactionProfile, Switch, VMType};
use ethcore::miner::Miner; use ethcore::miner::Miner;
@ -62,6 +63,60 @@ pub struct SnapshotCommand {
pub block_at: BlockID, pub block_at: BlockID,
} }
// helper for reading chunks from arbitrary reader and feeding them into the
// service.
fn restore_using<R: SnapshotReader>(snapshot: Arc<SnapshotService>, reader: &R, recover: bool) -> Result<(), String> {
let manifest = reader.manifest();
info!("Restoring to block #{} (0x{:?})", manifest.block_number, manifest.block_hash);
try!(snapshot.init_restore(manifest.clone(), recover).map_err(|e| {
format!("Failed to begin restoration: {}", e)
}));
let (num_state, num_blocks) = (manifest.state_hashes.len(), manifest.block_hashes.len());
let informant_handle = snapshot.clone();
::std::thread::spawn(move || {
while let RestorationStatus::Ongoing { state_chunks_done, block_chunks_done } = informant_handle.status() {
info!("Processed {}/{} state chunks and {}/{} block chunks.",
state_chunks_done, num_state, block_chunks_done, num_blocks);
::std::thread::sleep(Duration::from_secs(5));
}
});
info!("Restoring state");
for &state_hash in &manifest.state_hashes {
if snapshot.status() == RestorationStatus::Failed {
return Err("Restoration failed".into());
}
let chunk = try!(reader.chunk(state_hash)
.map_err(|e| format!("Encountered error while reading chunk {:?}: {}", state_hash, e)));
snapshot.feed_state_chunk(state_hash, &chunk);
}
info!("Restoring blocks");
for &block_hash in &manifest.block_hashes {
if snapshot.status() == RestorationStatus::Failed {
return Err("Restoration failed".into());
}
let chunk = try!(reader.chunk(block_hash)
.map_err(|e| format!("Encountered error while reading chunk {:?}: {}", block_hash, e)));
snapshot.feed_block_chunk(block_hash, &chunk);
}
match snapshot.status() {
RestorationStatus::Ongoing { .. } => Err("Snapshot file is incomplete and missing chunks.".into()),
RestorationStatus::Failed => Err("Snapshot restoration failed.".into()),
RestorationStatus::Inactive => {
info!("Restoration complete.");
Ok(())
}
}
}
impl SnapshotCommand { impl SnapshotCommand {
// shared portion of snapshot commands: start the client service // shared portion of snapshot commands: start the client service
fn start_service(self) -> Result<(ClientService, Arc<PanicHandler>), String> { fn start_service(self) -> Result<(ClientService, Arc<PanicHandler>), String> {
@ -106,69 +161,35 @@ impl SnapshotCommand {
/// restore from a snapshot /// restore from a snapshot
pub fn restore(self) -> Result<(), String> { pub fn restore(self) -> Result<(), String> {
let file = try!(self.file_path.clone().ok_or("No file path provided.".to_owned())); let file = self.file_path.clone();
let (service, _panic_handler) = try!(self.start_service()); let (service, _panic_handler) = try!(self.start_service());
warn!("Snapshot restoration is experimental and the format may be subject to change."); warn!("Snapshot restoration is experimental and the format may be subject to change.");
warn!("On encountering an unexpected error, please ensure that you have a recent snapshot."); warn!("On encountering an unexpected error, please ensure that you have a recent snapshot.");
let snapshot = service.snapshot_service(); let snapshot = service.snapshot_service();
let reader = PackedReader::new(Path::new(&file))
.map_err(|e| format!("Couldn't open snapshot file: {}", e))
.and_then(|x| x.ok_or("Snapshot file has invalid format.".into()));
let reader = try!(reader); if let Some(file) = file {
let manifest = reader.manifest(); info!("Attempting to restore from snapshot at '{}'", file);
// drop the client so we don't restore while it has open DB handles. let reader = PackedReader::new(Path::new(&file))
drop(service); .map_err(|e| format!("Couldn't open snapshot file: {}", e))
.and_then(|x| x.ok_or("Snapshot file has invalid format.".into()));
try!(snapshot.init_restore(manifest.clone()).map_err(|e| { let reader = try!(reader);
format!("Failed to begin restoration: {}", e) try!(restore_using(snapshot, &reader, true));
})); } else {
info!("Attempting to restore from local snapshot.");
let (num_state, num_blocks) = (manifest.state_hashes.len(), manifest.block_hashes.len()); // attempting restoration with recovery will lead to deadlock
// as we currently hold a read lock on the service's reader.
let informant_handle = snapshot.clone(); match *snapshot.reader() {
::std::thread::spawn(move || { Some(ref reader) => try!(restore_using(snapshot.clone(), reader, false)),
while let RestorationStatus::Ongoing { state_chunks_done, block_chunks_done } = informant_handle.status() { None => return Err("No local snapshot found.".into()),
info!("Processed {}/{} state chunks and {}/{} block chunks.",
state_chunks_done, num_state, block_chunks_done, num_blocks);
::std::thread::sleep(Duration::from_secs(5));
}
});
info!("Restoring state");
for &state_hash in &manifest.state_hashes {
if snapshot.status() == RestorationStatus::Failed {
return Err("Restoration failed".into());
}
let chunk = try!(reader.chunk(state_hash)
.map_err(|e| format!("Encountered error while reading chunk {:?}: {}", state_hash, e)));
snapshot.feed_state_chunk(state_hash, &chunk);
}
info!("Restoring blocks");
for &block_hash in &manifest.block_hashes {
if snapshot.status() == RestorationStatus::Failed {
return Err("Restoration failed".into());
}
let chunk = try!(reader.chunk(block_hash)
.map_err(|e| format!("Encountered error while reading chunk {:?}: {}", block_hash, e)));
snapshot.feed_block_chunk(block_hash, &chunk);
}
match snapshot.status() {
RestorationStatus::Ongoing { .. } => Err("Snapshot file is incomplete and missing chunks.".into()),
RestorationStatus::Failed => Err("Snapshot restoration failed.".into()),
RestorationStatus::Inactive => {
info!("Restoration complete.");
Ok(())
} }
} }
Ok(())
} }
/// Take a snapshot from the head of the chain. /// Take a snapshot from the head of the chain.

View File

@ -458,8 +458,6 @@ impl Database {
let mut backup_db = PathBuf::from(&self.path); let mut backup_db = PathBuf::from(&self.path);
backup_db.pop(); backup_db.pop();
backup_db.push("backup_db"); backup_db.push("backup_db");
println!("Path at {:?}", self.path);
println!("Backup at {:?}", backup_db);
let existed = match fs::rename(&self.path, &backup_db) { let existed = match fs::rename(&self.path, &backup_db) {
Ok(_) => true, Ok(_) => true,