Resumable warp-sync / Seed downloaded snapshots (#8544)

* Start dividing sync chain : first supplier method

* WIP - updated chain sync supplier

* Finish refactoring the Chain Sync Supplier

* Create Chain Sync Requester

* Add Propagator for Chain Sync

* Add the Chain Sync Handler

* Move tests from mod -> handler

* Move tests to propagator

* Refactor SyncRequester arguments

* Refactoring peer fork header handler

* Fix wrong highest block number in snapshot sync

* Small refactor...

* Resume warp-sync downloaded chunks

* Add comments

* Refactoring the previous chunks import

* Fix tests

* Address PR grumbles

* Fix not seeding current snapshot

* Address PR Grumbles

* Address PR grumble

* Retry failed CI job

* Update SnapshotService readiness check
Fix restoration locking issue for previous chunks restoration

* Fix tests

* Fix tests

* Fix test

* Early abort importing previous chunks

* PR Grumbles

* Update Gitlab CI config

* SyncState back to Waiting when Manifest peers disconnect

* Move fix

* Better fix

* Revert GitLab CI changes

* Fix Warning

* Refactor resuming snapshots

* Fix string construction

* Revert "Refactor resuming snapshots"

This reverts commit 75fd4b553a38e4a49dc5d6a878c70e830ff382eb.

* Update informant log

* Fix string construction

* Refactor resuming snapshots

* Fix informant

* PR Grumbles

* Update informant message : show chunks done

* PR Grumbles

* Fix

* Fix Warning

* PR Grumbles
This commit is contained in:
Nicolas Gotchac
2018-05-16 22:01:55 +02:00
committed by Afri Schoedon
parent 6ecc855c34
commit cdbcfaa7de
12 changed files with 362 additions and 103 deletions

View File

@@ -100,14 +100,27 @@ impl SyncHandler {
}
/// Called by peer when it is disconnecting
pub fn on_peer_aborting(sync: &mut ChainSync, io: &mut SyncIo, peer: PeerId) {
trace!(target: "sync", "== Disconnecting {}: {}", peer, io.peer_info(peer));
sync.handshaking_peers.remove(&peer);
if sync.peers.contains_key(&peer) {
debug!(target: "sync", "Disconnected {}", peer);
sync.clear_peer_download(peer);
sync.peers.remove(&peer);
sync.active_peers.remove(&peer);
pub fn on_peer_aborting(sync: &mut ChainSync, io: &mut SyncIo, peer_id: PeerId) {
trace!(target: "sync", "== Disconnecting {}: {}", peer_id, io.peer_info(peer_id));
sync.handshaking_peers.remove(&peer_id);
if sync.peers.contains_key(&peer_id) {
debug!(target: "sync", "Disconnected {}", peer_id);
sync.clear_peer_download(peer_id);
sync.peers.remove(&peer_id);
sync.active_peers.remove(&peer_id);
if sync.state == SyncState::SnapshotManifest {
// Check if we are asking other peers for
// the snapshot manifest as well.
// If not, return to initial state
let still_asking_manifest = sync.peers.iter()
.filter(|&(id, p)| sync.active_peers.contains(id) && p.asking == PeerAsking::SnapshotManifest)
.next().is_none();
if still_asking_manifest {
sync.state = ChainSync::get_init_state(sync.warp_sync, io.chain());
}
}
sync.continue_sync(io);
}
}
@@ -320,6 +333,10 @@ impl SyncHandler {
}
fn on_peer_confirmed(sync: &mut ChainSync, io: &mut SyncIo, peer_id: PeerId) {
{
let peer = sync.peers.get_mut(&peer_id).expect("Is only called when peer is present in peers");
peer.confirmation = ForkConfirmation::Confirmed;
}
sync.sync_peer(io, peer_id, false);
}
@@ -344,8 +361,8 @@ impl SyncHandler {
}
trace!(target: "sync", "{}: Confirmed peer", peer_id);
peer.confirmation = ForkConfirmation::Confirmed;
if !io.chain_overlay().read().contains_key(&fork_number) {
trace!(target: "sync", "Inserting (fork) block {} header", fork_number);
io.chain_overlay().write().insert(fork_number, header.to_vec());
}
}
@@ -560,6 +577,10 @@ impl SyncHandler {
sync.continue_sync(io);
return Ok(());
},
RestorationStatus::Initializing { .. } => {
trace!(target: "warp", "{}: Snapshot restoration is initializing", peer_id);
return Ok(());
}
RestorationStatus::Ongoing { .. } => {
trace!(target: "sync", "{}: Snapshot restoration is ongoing", peer_id);
},
@@ -659,11 +680,16 @@ impl SyncHandler {
// Let the current sync round complete first.
sync.active_peers.insert(peer_id.clone());
debug!(target: "sync", "Connected {}:{}", peer_id, io.peer_info(peer_id));
if let Some((fork_block, _)) = sync.fork_block {
SyncRequester::request_fork_header(sync, io, peer_id, fork_block);
} else {
SyncHandler::on_peer_confirmed(sync, io, peer_id);
match sync.fork_block {
Some((fork_block, _)) => {
SyncRequester::request_fork_header(sync, io, peer_id, fork_block);
},
_ => {
SyncHandler::on_peer_confirmed(sync, io, peer_id);
}
}
Ok(())
}

View File

@@ -245,9 +245,12 @@ pub struct SyncStatus {
impl SyncStatus {
/// Indicates if snapshot download is in progress
pub fn is_snapshot_syncing(&self) -> bool {
self.state == SyncState::SnapshotManifest
|| self.state == SyncState::SnapshotData
|| self.state == SyncState::SnapshotWaiting
match self.state {
SyncState::SnapshotManifest |
SyncState::SnapshotData |
SyncState::SnapshotWaiting => true,
_ => false,
}
}
/// Returns max no of peers to display in informants
@@ -643,7 +646,7 @@ impl ChainSync {
}
}
/// Resume downloading
/// Resume downloading
fn continue_sync(&mut self, io: &mut SyncIo) {
// Collect active peers that can sync
let confirmed_peers: Vec<(PeerId, u8)> = self.peers.iter().filter_map(|(peer_id, peer)|
@@ -751,26 +754,45 @@ impl ChainSync {
}
}
if let Some(request) = self.old_blocks.as_mut().and_then(|d| d.request_blocks(io, num_active_peers)) {
SyncRequester::request_blocks(self, io, peer_id, request, BlockSet::OldBlocks);
return;
// Only ask for old blocks if the peer has a higher difficulty
if force || higher_difficulty {
if let Some(request) = self.old_blocks.as_mut().and_then(|d| d.request_blocks(io, num_active_peers)) {
SyncRequester::request_blocks(self, io, peer_id, request, BlockSet::OldBlocks);
return;
}
} else {
trace!(target: "sync", "peer {} is not suitable for asking old blocks", peer_id);
self.deactivate_peer(io, peer_id);
}
},
SyncState::SnapshotData => {
if let RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } = io.snapshot_service().status() {
if self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize > MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
trace!(target: "sync", "Snapshot queue full, pausing sync");
self.state = SyncState::SnapshotWaiting;
match io.snapshot_service().status() {
RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => {
// Initialize the snapshot if not already done
self.snapshot.initialize(io.snapshot_service());
if self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize > MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
trace!(target: "sync", "Snapshot queue full, pausing sync");
self.state = SyncState::SnapshotWaiting;
return;
}
},
RestorationStatus::Initializing { .. } => {
trace!(target: "warp", "Snapshot is stil initializing.");
return;
}
},
_ => {
return;
},
}
if peer_snapshot_hash.is_some() && peer_snapshot_hash == self.snapshot.snapshot_hash() {
self.clear_peer_download(peer_id);
SyncRequester::request_snapshot_data(self, io, peer_id);
}
},
SyncState::SnapshotManifest | //already downloading from other peer
SyncState::Waiting | SyncState::SnapshotWaiting => ()
SyncState::Waiting |
SyncState::SnapshotWaiting => ()
}
} else {
trace!(target: "sync", "Skipping peer {}, force={}, td={:?}, our td={}, state={:?}", peer_id, force, peer_difficulty, syncing_difficulty, self.state);
@@ -861,10 +883,7 @@ impl ChainSync {
packet.append(&chain.best_block_hash);
packet.append(&chain.genesis_hash);
if warp_protocol {
let manifest = match self.old_blocks.is_some() {
true => None,
false => io.snapshot_service().manifest(),
};
let manifest = io.snapshot_service().manifest();
let block_number = manifest.as_ref().map_or(0, |m| m.block_number);
let manifest_hash = manifest.map_or(H256::new(), |m| keccak(m.into_rlp()));
packet.append(&manifest_hash);
@@ -908,29 +927,36 @@ impl ChainSync {
}
fn check_resume(&mut self, io: &mut SyncIo) {
if self.state == SyncState::Waiting && !io.chain().queue_info().is_full() {
self.state = SyncState::Blocks;
self.continue_sync(io);
} else if self.state == SyncState::SnapshotWaiting {
match io.snapshot_service().status() {
RestorationStatus::Inactive => {
trace!(target:"sync", "Snapshot restoration is complete");
self.restart(io);
},
RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => {
if !self.snapshot.is_complete() && self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize <= MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
trace!(target:"sync", "Resuming snapshot sync");
self.state = SyncState::SnapshotData;
match self.state {
SyncState::Waiting if !io.chain().queue_info().is_full() => {
self.state = SyncState::Blocks;
self.continue_sync(io);
},
SyncState::SnapshotWaiting => {
match io.snapshot_service().status() {
RestorationStatus::Inactive => {
trace!(target:"sync", "Snapshot restoration is complete");
self.restart(io);
},
RestorationStatus::Initializing { .. } => {
trace!(target:"sync", "Snapshot restoration is initializing");
},
RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => {
if !self.snapshot.is_complete() && self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize <= MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
trace!(target:"sync", "Resuming snapshot sync");
self.state = SyncState::SnapshotData;
self.continue_sync(io);
}
},
RestorationStatus::Failed => {
trace!(target: "sync", "Snapshot restoration aborted");
self.state = SyncState::WaitingPeers;
self.snapshot.clear();
self.continue_sync(io);
}
},
RestorationStatus::Failed => {
trace!(target: "sync", "Snapshot restoration aborted");
self.state = SyncState::WaitingPeers;
self.snapshot.clear();
self.continue_sync(io);
},
}
},
}
},
_ => (),
}
}

View File

@@ -120,8 +120,9 @@ impl SyncSupplier {
None => return Ok(Some((BLOCK_HEADERS_PACKET, RlpStream::new_list(0)))) //no such header, return nothing
}
} else {
trace!(target: "sync", "{} -> GetBlockHeaders (number: {}, max: {}, skip: {}, reverse:{})", peer_id, r.val_at::<BlockNumber>(0)?, max_headers, skip, reverse);
r.val_at(0)?
let number = r.val_at::<BlockNumber>(0)?;
trace!(target: "sync", "{} -> GetBlockHeaders (number: {}, max: {}, skip: {}, reverse:{})", peer_id, number, max_headers, skip, reverse);
number
};
let mut number = if reverse {
@@ -135,7 +136,10 @@ impl SyncSupplier {
let inc = (skip + 1) as BlockNumber;
let overlay = io.chain_overlay().read();
while number <= last && count < max_count {
// We are checking the `overlay` as well since it's where the ForkBlock
// header is cached : so peers can confirm we are on the right fork,
// even if we are not synced until the fork block
while (number <= last || overlay.contains_key(&number)) && count < max_count {
if let Some(hdr) = overlay.get(&number) {
trace!(target: "sync", "{}: Returning cached fork header", peer_id);
data.extend_from_slice(hdr);
@@ -152,8 +156,7 @@ impl SyncSupplier {
break;
}
number -= inc;
}
else {
} else {
number += inc;
}
}
@@ -237,20 +240,20 @@ impl SyncSupplier {
/// Respond to GetSnapshotManifest request
fn return_snapshot_manifest(io: &SyncIo, r: &Rlp, peer_id: PeerId) -> RlpResponseResult {
let count = r.item_count().unwrap_or(0);
trace!(target: "sync", "{} -> GetSnapshotManifest", peer_id);
trace!(target: "warp", "{} -> GetSnapshotManifest", peer_id);
if count != 0 {
debug!(target: "sync", "Invalid GetSnapshotManifest request, ignoring.");
debug!(target: "warp", "Invalid GetSnapshotManifest request, ignoring.");
return Ok(None);
}
let rlp = match io.snapshot_service().manifest() {
Some(manifest) => {
trace!(target: "sync", "{} <- SnapshotManifest", peer_id);
trace!(target: "warp", "{} <- SnapshotManifest", peer_id);
let mut rlp = RlpStream::new_list(1);
rlp.append_raw(&manifest.into_rlp(), 1);
rlp
},
None => {
trace!(target: "sync", "{}: No manifest to return", peer_id);
trace!(target: "warp", "{}: No snapshot manifest to return", peer_id);
RlpStream::new_list(0)
}
};
@@ -260,15 +263,16 @@ impl SyncSupplier {
/// Respond to GetSnapshotData request
fn return_snapshot_data(io: &SyncIo, r: &Rlp, peer_id: PeerId) -> RlpResponseResult {
let hash: H256 = r.val_at(0)?;
trace!(target: "sync", "{} -> GetSnapshotData {:?}", peer_id, hash);
trace!(target: "warp", "{} -> GetSnapshotData {:?}", peer_id, hash);
let rlp = match io.snapshot_service().chunk(hash) {
Some(data) => {
let mut rlp = RlpStream::new_list(1);
trace!(target: "sync", "{} <- SnapshotData", peer_id);
trace!(target: "warp", "{} <- SnapshotData", peer_id);
rlp.append(&data);
rlp
},
None => {
trace!(target: "warp", "{}: No snapshot data to return", peer_id);
RlpStream::new_list(0)
}
};

View File

@@ -14,10 +14,13 @@
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
use hash::keccak;
use ethcore::snapshot::{ManifestData, SnapshotService};
use ethereum_types::H256;
use hash::keccak;
use rand::{thread_rng, Rng};
use std::collections::HashSet;
use ethcore::snapshot::ManifestData;
use std::iter::FromIterator;
#[derive(PartialEq, Eq, Debug)]
pub enum ChunkType {
@@ -32,6 +35,7 @@ pub struct Snapshot {
completed_chunks: HashSet<H256>,
snapshot_hash: Option<H256>,
bad_hashes: HashSet<H256>,
initialized: bool,
}
impl Snapshot {
@@ -44,9 +48,29 @@ impl Snapshot {
completed_chunks: HashSet::new(),
snapshot_hash: None,
bad_hashes: HashSet::new(),
initialized: false,
}
}
/// Sync the Snapshot completed chunks with the Snapshot Service
pub fn initialize(&mut self, snapshot_service: &SnapshotService) {
if self.initialized {
return;
}
if let Some(completed_chunks) = snapshot_service.completed_chunks() {
self.completed_chunks = HashSet::from_iter(completed_chunks);
}
trace!(
target: "snapshot",
"Snapshot is now initialized with {} completed chunks.",
self.completed_chunks.len(),
);
self.initialized = true;
}
/// Clear everything.
pub fn clear(&mut self) {
self.pending_state_chunks.clear();
@@ -54,6 +78,7 @@ impl Snapshot {
self.downloading_chunks.clear();
self.completed_chunks.clear();
self.snapshot_hash = None;
self.initialized = false;
}
/// Check if currently downloading a snapshot.
@@ -89,18 +114,35 @@ impl Snapshot {
Err(())
}
/// Find a chunk to download
/// Find a random chunk to download
pub fn needed_chunk(&mut self) -> Option<H256> {
// check state chunks first
let chunk = self.pending_state_chunks.iter()
.chain(self.pending_block_chunks.iter())
.find(|&h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h))
.cloned();
// Find all random chunks: first blocks, then state
let needed_chunks = {
let chunk_filter = |h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h);
let needed_block_chunks = self.pending_block_chunks.iter()
.filter(|&h| chunk_filter(h))
.map(|h| *h)
.collect::<Vec<H256>>();
// If no block chunks to download, get the state chunks
if needed_block_chunks.len() == 0 {
self.pending_state_chunks.iter()
.filter(|&h| chunk_filter(h))
.map(|h| *h)
.collect::<Vec<H256>>()
} else {
needed_block_chunks
}
};
// Get a random chunk
let chunk = thread_rng().choose(&needed_chunks);
if let Some(hash) = chunk {
self.downloading_chunks.insert(hash.clone());
}
chunk
chunk.map(|h| *h)
}
pub fn clear_chunk_download(&mut self, hash: &H256) {
@@ -185,8 +227,15 @@ mod test {
let requested: Vec<H256> = (0..40).map(|_| snapshot.needed_chunk().unwrap()).collect();
assert!(snapshot.needed_chunk().is_none());
assert_eq!(&requested[0..20], &manifest.state_hashes[..]);
assert_eq!(&requested[20..40], &manifest.block_hashes[..]);
let requested_all_block_chunks = manifest.block_hashes.iter()
.all(|h| requested.iter().any(|rh| rh == h));
assert!(requested_all_block_chunks);
let requested_all_state_chunks = manifest.state_hashes.iter()
.all(|h| requested.iter().any(|rh| rh == h));
assert!(requested_all_state_chunks);
assert_eq!(snapshot.downloading_chunks.len(), 40);
assert_eq!(snapshot.validate_chunk(&state_chunks[4]), Ok(ChunkType::State(manifest.state_hashes[4].clone())));

View File

@@ -80,6 +80,10 @@ impl SnapshotService for TestSnapshotService {
Some((1, 2))
}
fn completed_chunks(&self) -> Option<Vec<H256>> {
Some(vec![])
}
fn chunk(&self, hash: H256) -> Option<Bytes> {
self.chunks.get(&hash).cloned()
}