Resumable warp-sync / Seed downloaded snapshots (#8544)
* Start dividing sync chain : first supplier method * WIP - updated chain sync supplier * Finish refactoring the Chain Sync Supplier * Create Chain Sync Requester * Add Propagator for Chain Sync * Add the Chain Sync Handler * Move tests from mod -> handler * Move tests to propagator * Refactor SyncRequester arguments * Refactoring peer fork header handler * Fix wrong highest block number in snapshot sync * Small refactor... * Resume warp-sync downloaded chunks * Add comments * Refactoring the previous chunks import * Fix tests * Address PR grumbles * Fix not seeding current snapshot * Address PR Grumbles * Address PR grumble * Retry failed CI job * Update SnapshotService readiness check Fix restoration locking issue for previous chunks restoration * Fix tests * Fix tests * Fix test * Early abort importing previous chunks * PR Grumbles * Update Gitlab CI config * SyncState back to Waiting when Manifest peers disconnect * Move fix * Better fix * Revert GitLab CI changes * Fix Warning * Refactor resuming snapshots * Fix string construction * Revert "Refactor resuming snapshots" This reverts commit 75fd4b553a38e4a49dc5d6a878c70e830ff382eb. * Update informant log * Fix string construction * Refactor resuming snapshots * Fix informant * PR Grumbles * Update informant message : show chunks done * PR Grumbles * Fix * Fix Warning * PR Grumbles
This commit is contained in:
committed by
Afri Schoedon
parent
6ecc855c34
commit
cdbcfaa7de
@@ -100,14 +100,27 @@ impl SyncHandler {
|
||||
}
|
||||
|
||||
/// Called by peer when it is disconnecting
|
||||
pub fn on_peer_aborting(sync: &mut ChainSync, io: &mut SyncIo, peer: PeerId) {
|
||||
trace!(target: "sync", "== Disconnecting {}: {}", peer, io.peer_info(peer));
|
||||
sync.handshaking_peers.remove(&peer);
|
||||
if sync.peers.contains_key(&peer) {
|
||||
debug!(target: "sync", "Disconnected {}", peer);
|
||||
sync.clear_peer_download(peer);
|
||||
sync.peers.remove(&peer);
|
||||
sync.active_peers.remove(&peer);
|
||||
pub fn on_peer_aborting(sync: &mut ChainSync, io: &mut SyncIo, peer_id: PeerId) {
|
||||
trace!(target: "sync", "== Disconnecting {}: {}", peer_id, io.peer_info(peer_id));
|
||||
sync.handshaking_peers.remove(&peer_id);
|
||||
if sync.peers.contains_key(&peer_id) {
|
||||
debug!(target: "sync", "Disconnected {}", peer_id);
|
||||
sync.clear_peer_download(peer_id);
|
||||
sync.peers.remove(&peer_id);
|
||||
sync.active_peers.remove(&peer_id);
|
||||
|
||||
if sync.state == SyncState::SnapshotManifest {
|
||||
// Check if we are asking other peers for
|
||||
// the snapshot manifest as well.
|
||||
// If not, return to initial state
|
||||
let still_asking_manifest = sync.peers.iter()
|
||||
.filter(|&(id, p)| sync.active_peers.contains(id) && p.asking == PeerAsking::SnapshotManifest)
|
||||
.next().is_none();
|
||||
|
||||
if still_asking_manifest {
|
||||
sync.state = ChainSync::get_init_state(sync.warp_sync, io.chain());
|
||||
}
|
||||
}
|
||||
sync.continue_sync(io);
|
||||
}
|
||||
}
|
||||
@@ -320,6 +333,10 @@ impl SyncHandler {
|
||||
}
|
||||
|
||||
fn on_peer_confirmed(sync: &mut ChainSync, io: &mut SyncIo, peer_id: PeerId) {
|
||||
{
|
||||
let peer = sync.peers.get_mut(&peer_id).expect("Is only called when peer is present in peers");
|
||||
peer.confirmation = ForkConfirmation::Confirmed;
|
||||
}
|
||||
sync.sync_peer(io, peer_id, false);
|
||||
}
|
||||
|
||||
@@ -344,8 +361,8 @@ impl SyncHandler {
|
||||
}
|
||||
|
||||
trace!(target: "sync", "{}: Confirmed peer", peer_id);
|
||||
peer.confirmation = ForkConfirmation::Confirmed;
|
||||
if !io.chain_overlay().read().contains_key(&fork_number) {
|
||||
trace!(target: "sync", "Inserting (fork) block {} header", fork_number);
|
||||
io.chain_overlay().write().insert(fork_number, header.to_vec());
|
||||
}
|
||||
}
|
||||
@@ -560,6 +577,10 @@ impl SyncHandler {
|
||||
sync.continue_sync(io);
|
||||
return Ok(());
|
||||
},
|
||||
RestorationStatus::Initializing { .. } => {
|
||||
trace!(target: "warp", "{}: Snapshot restoration is initializing", peer_id);
|
||||
return Ok(());
|
||||
}
|
||||
RestorationStatus::Ongoing { .. } => {
|
||||
trace!(target: "sync", "{}: Snapshot restoration is ongoing", peer_id);
|
||||
},
|
||||
@@ -659,11 +680,16 @@ impl SyncHandler {
|
||||
// Let the current sync round complete first.
|
||||
sync.active_peers.insert(peer_id.clone());
|
||||
debug!(target: "sync", "Connected {}:{}", peer_id, io.peer_info(peer_id));
|
||||
if let Some((fork_block, _)) = sync.fork_block {
|
||||
SyncRequester::request_fork_header(sync, io, peer_id, fork_block);
|
||||
} else {
|
||||
SyncHandler::on_peer_confirmed(sync, io, peer_id);
|
||||
|
||||
match sync.fork_block {
|
||||
Some((fork_block, _)) => {
|
||||
SyncRequester::request_fork_header(sync, io, peer_id, fork_block);
|
||||
},
|
||||
_ => {
|
||||
SyncHandler::on_peer_confirmed(sync, io, peer_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -245,9 +245,12 @@ pub struct SyncStatus {
|
||||
impl SyncStatus {
|
||||
/// Indicates if snapshot download is in progress
|
||||
pub fn is_snapshot_syncing(&self) -> bool {
|
||||
self.state == SyncState::SnapshotManifest
|
||||
|| self.state == SyncState::SnapshotData
|
||||
|| self.state == SyncState::SnapshotWaiting
|
||||
match self.state {
|
||||
SyncState::SnapshotManifest |
|
||||
SyncState::SnapshotData |
|
||||
SyncState::SnapshotWaiting => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns max no of peers to display in informants
|
||||
@@ -643,7 +646,7 @@ impl ChainSync {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resume downloading
|
||||
/// Resume downloading
|
||||
fn continue_sync(&mut self, io: &mut SyncIo) {
|
||||
// Collect active peers that can sync
|
||||
let confirmed_peers: Vec<(PeerId, u8)> = self.peers.iter().filter_map(|(peer_id, peer)|
|
||||
@@ -751,26 +754,45 @@ impl ChainSync {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(request) = self.old_blocks.as_mut().and_then(|d| d.request_blocks(io, num_active_peers)) {
|
||||
SyncRequester::request_blocks(self, io, peer_id, request, BlockSet::OldBlocks);
|
||||
return;
|
||||
// Only ask for old blocks if the peer has a higher difficulty
|
||||
if force || higher_difficulty {
|
||||
if let Some(request) = self.old_blocks.as_mut().and_then(|d| d.request_blocks(io, num_active_peers)) {
|
||||
SyncRequester::request_blocks(self, io, peer_id, request, BlockSet::OldBlocks);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
trace!(target: "sync", "peer {} is not suitable for asking old blocks", peer_id);
|
||||
self.deactivate_peer(io, peer_id);
|
||||
}
|
||||
},
|
||||
SyncState::SnapshotData => {
|
||||
if let RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } = io.snapshot_service().status() {
|
||||
if self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize > MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
|
||||
trace!(target: "sync", "Snapshot queue full, pausing sync");
|
||||
self.state = SyncState::SnapshotWaiting;
|
||||
match io.snapshot_service().status() {
|
||||
RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => {
|
||||
// Initialize the snapshot if not already done
|
||||
self.snapshot.initialize(io.snapshot_service());
|
||||
if self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize > MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
|
||||
trace!(target: "sync", "Snapshot queue full, pausing sync");
|
||||
self.state = SyncState::SnapshotWaiting;
|
||||
return;
|
||||
}
|
||||
},
|
||||
RestorationStatus::Initializing { .. } => {
|
||||
trace!(target: "warp", "Snapshot is stil initializing.");
|
||||
return;
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
return;
|
||||
},
|
||||
}
|
||||
|
||||
if peer_snapshot_hash.is_some() && peer_snapshot_hash == self.snapshot.snapshot_hash() {
|
||||
self.clear_peer_download(peer_id);
|
||||
SyncRequester::request_snapshot_data(self, io, peer_id);
|
||||
}
|
||||
},
|
||||
SyncState::SnapshotManifest | //already downloading from other peer
|
||||
SyncState::Waiting | SyncState::SnapshotWaiting => ()
|
||||
SyncState::Waiting |
|
||||
SyncState::SnapshotWaiting => ()
|
||||
}
|
||||
} else {
|
||||
trace!(target: "sync", "Skipping peer {}, force={}, td={:?}, our td={}, state={:?}", peer_id, force, peer_difficulty, syncing_difficulty, self.state);
|
||||
@@ -861,10 +883,7 @@ impl ChainSync {
|
||||
packet.append(&chain.best_block_hash);
|
||||
packet.append(&chain.genesis_hash);
|
||||
if warp_protocol {
|
||||
let manifest = match self.old_blocks.is_some() {
|
||||
true => None,
|
||||
false => io.snapshot_service().manifest(),
|
||||
};
|
||||
let manifest = io.snapshot_service().manifest();
|
||||
let block_number = manifest.as_ref().map_or(0, |m| m.block_number);
|
||||
let manifest_hash = manifest.map_or(H256::new(), |m| keccak(m.into_rlp()));
|
||||
packet.append(&manifest_hash);
|
||||
@@ -908,29 +927,36 @@ impl ChainSync {
|
||||
}
|
||||
|
||||
fn check_resume(&mut self, io: &mut SyncIo) {
|
||||
if self.state == SyncState::Waiting && !io.chain().queue_info().is_full() {
|
||||
self.state = SyncState::Blocks;
|
||||
self.continue_sync(io);
|
||||
} else if self.state == SyncState::SnapshotWaiting {
|
||||
match io.snapshot_service().status() {
|
||||
RestorationStatus::Inactive => {
|
||||
trace!(target:"sync", "Snapshot restoration is complete");
|
||||
self.restart(io);
|
||||
},
|
||||
RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => {
|
||||
if !self.snapshot.is_complete() && self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize <= MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
|
||||
trace!(target:"sync", "Resuming snapshot sync");
|
||||
self.state = SyncState::SnapshotData;
|
||||
match self.state {
|
||||
SyncState::Waiting if !io.chain().queue_info().is_full() => {
|
||||
self.state = SyncState::Blocks;
|
||||
self.continue_sync(io);
|
||||
},
|
||||
SyncState::SnapshotWaiting => {
|
||||
match io.snapshot_service().status() {
|
||||
RestorationStatus::Inactive => {
|
||||
trace!(target:"sync", "Snapshot restoration is complete");
|
||||
self.restart(io);
|
||||
},
|
||||
RestorationStatus::Initializing { .. } => {
|
||||
trace!(target:"sync", "Snapshot restoration is initializing");
|
||||
},
|
||||
RestorationStatus::Ongoing { state_chunks_done, block_chunks_done, .. } => {
|
||||
if !self.snapshot.is_complete() && self.snapshot.done_chunks() - (state_chunks_done + block_chunks_done) as usize <= MAX_SNAPSHOT_CHUNKS_DOWNLOAD_AHEAD {
|
||||
trace!(target:"sync", "Resuming snapshot sync");
|
||||
self.state = SyncState::SnapshotData;
|
||||
self.continue_sync(io);
|
||||
}
|
||||
},
|
||||
RestorationStatus::Failed => {
|
||||
trace!(target: "sync", "Snapshot restoration aborted");
|
||||
self.state = SyncState::WaitingPeers;
|
||||
self.snapshot.clear();
|
||||
self.continue_sync(io);
|
||||
}
|
||||
},
|
||||
RestorationStatus::Failed => {
|
||||
trace!(target: "sync", "Snapshot restoration aborted");
|
||||
self.state = SyncState::WaitingPeers;
|
||||
self.snapshot.clear();
|
||||
self.continue_sync(io);
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -120,8 +120,9 @@ impl SyncSupplier {
|
||||
None => return Ok(Some((BLOCK_HEADERS_PACKET, RlpStream::new_list(0)))) //no such header, return nothing
|
||||
}
|
||||
} else {
|
||||
trace!(target: "sync", "{} -> GetBlockHeaders (number: {}, max: {}, skip: {}, reverse:{})", peer_id, r.val_at::<BlockNumber>(0)?, max_headers, skip, reverse);
|
||||
r.val_at(0)?
|
||||
let number = r.val_at::<BlockNumber>(0)?;
|
||||
trace!(target: "sync", "{} -> GetBlockHeaders (number: {}, max: {}, skip: {}, reverse:{})", peer_id, number, max_headers, skip, reverse);
|
||||
number
|
||||
};
|
||||
|
||||
let mut number = if reverse {
|
||||
@@ -135,7 +136,10 @@ impl SyncSupplier {
|
||||
let inc = (skip + 1) as BlockNumber;
|
||||
let overlay = io.chain_overlay().read();
|
||||
|
||||
while number <= last && count < max_count {
|
||||
// We are checking the `overlay` as well since it's where the ForkBlock
|
||||
// header is cached : so peers can confirm we are on the right fork,
|
||||
// even if we are not synced until the fork block
|
||||
while (number <= last || overlay.contains_key(&number)) && count < max_count {
|
||||
if let Some(hdr) = overlay.get(&number) {
|
||||
trace!(target: "sync", "{}: Returning cached fork header", peer_id);
|
||||
data.extend_from_slice(hdr);
|
||||
@@ -152,8 +156,7 @@ impl SyncSupplier {
|
||||
break;
|
||||
}
|
||||
number -= inc;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
number += inc;
|
||||
}
|
||||
}
|
||||
@@ -237,20 +240,20 @@ impl SyncSupplier {
|
||||
/// Respond to GetSnapshotManifest request
|
||||
fn return_snapshot_manifest(io: &SyncIo, r: &Rlp, peer_id: PeerId) -> RlpResponseResult {
|
||||
let count = r.item_count().unwrap_or(0);
|
||||
trace!(target: "sync", "{} -> GetSnapshotManifest", peer_id);
|
||||
trace!(target: "warp", "{} -> GetSnapshotManifest", peer_id);
|
||||
if count != 0 {
|
||||
debug!(target: "sync", "Invalid GetSnapshotManifest request, ignoring.");
|
||||
debug!(target: "warp", "Invalid GetSnapshotManifest request, ignoring.");
|
||||
return Ok(None);
|
||||
}
|
||||
let rlp = match io.snapshot_service().manifest() {
|
||||
Some(manifest) => {
|
||||
trace!(target: "sync", "{} <- SnapshotManifest", peer_id);
|
||||
trace!(target: "warp", "{} <- SnapshotManifest", peer_id);
|
||||
let mut rlp = RlpStream::new_list(1);
|
||||
rlp.append_raw(&manifest.into_rlp(), 1);
|
||||
rlp
|
||||
},
|
||||
None => {
|
||||
trace!(target: "sync", "{}: No manifest to return", peer_id);
|
||||
trace!(target: "warp", "{}: No snapshot manifest to return", peer_id);
|
||||
RlpStream::new_list(0)
|
||||
}
|
||||
};
|
||||
@@ -260,15 +263,16 @@ impl SyncSupplier {
|
||||
/// Respond to GetSnapshotData request
|
||||
fn return_snapshot_data(io: &SyncIo, r: &Rlp, peer_id: PeerId) -> RlpResponseResult {
|
||||
let hash: H256 = r.val_at(0)?;
|
||||
trace!(target: "sync", "{} -> GetSnapshotData {:?}", peer_id, hash);
|
||||
trace!(target: "warp", "{} -> GetSnapshotData {:?}", peer_id, hash);
|
||||
let rlp = match io.snapshot_service().chunk(hash) {
|
||||
Some(data) => {
|
||||
let mut rlp = RlpStream::new_list(1);
|
||||
trace!(target: "sync", "{} <- SnapshotData", peer_id);
|
||||
trace!(target: "warp", "{} <- SnapshotData", peer_id);
|
||||
rlp.append(&data);
|
||||
rlp
|
||||
},
|
||||
None => {
|
||||
trace!(target: "warp", "{}: No snapshot data to return", peer_id);
|
||||
RlpStream::new_list(0)
|
||||
}
|
||||
};
|
||||
|
||||
@@ -14,10 +14,13 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
use hash::keccak;
|
||||
use ethcore::snapshot::{ManifestData, SnapshotService};
|
||||
use ethereum_types::H256;
|
||||
use hash::keccak;
|
||||
use rand::{thread_rng, Rng};
|
||||
|
||||
use std::collections::HashSet;
|
||||
use ethcore::snapshot::ManifestData;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum ChunkType {
|
||||
@@ -32,6 +35,7 @@ pub struct Snapshot {
|
||||
completed_chunks: HashSet<H256>,
|
||||
snapshot_hash: Option<H256>,
|
||||
bad_hashes: HashSet<H256>,
|
||||
initialized: bool,
|
||||
}
|
||||
|
||||
impl Snapshot {
|
||||
@@ -44,9 +48,29 @@ impl Snapshot {
|
||||
completed_chunks: HashSet::new(),
|
||||
snapshot_hash: None,
|
||||
bad_hashes: HashSet::new(),
|
||||
initialized: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sync the Snapshot completed chunks with the Snapshot Service
|
||||
pub fn initialize(&mut self, snapshot_service: &SnapshotService) {
|
||||
if self.initialized {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(completed_chunks) = snapshot_service.completed_chunks() {
|
||||
self.completed_chunks = HashSet::from_iter(completed_chunks);
|
||||
}
|
||||
|
||||
trace!(
|
||||
target: "snapshot",
|
||||
"Snapshot is now initialized with {} completed chunks.",
|
||||
self.completed_chunks.len(),
|
||||
);
|
||||
|
||||
self.initialized = true;
|
||||
}
|
||||
|
||||
/// Clear everything.
|
||||
pub fn clear(&mut self) {
|
||||
self.pending_state_chunks.clear();
|
||||
@@ -54,6 +78,7 @@ impl Snapshot {
|
||||
self.downloading_chunks.clear();
|
||||
self.completed_chunks.clear();
|
||||
self.snapshot_hash = None;
|
||||
self.initialized = false;
|
||||
}
|
||||
|
||||
/// Check if currently downloading a snapshot.
|
||||
@@ -89,18 +114,35 @@ impl Snapshot {
|
||||
Err(())
|
||||
}
|
||||
|
||||
/// Find a chunk to download
|
||||
/// Find a random chunk to download
|
||||
pub fn needed_chunk(&mut self) -> Option<H256> {
|
||||
// check state chunks first
|
||||
let chunk = self.pending_state_chunks.iter()
|
||||
.chain(self.pending_block_chunks.iter())
|
||||
.find(|&h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h))
|
||||
.cloned();
|
||||
// Find all random chunks: first blocks, then state
|
||||
let needed_chunks = {
|
||||
let chunk_filter = |h| !self.downloading_chunks.contains(h) && !self.completed_chunks.contains(h);
|
||||
|
||||
let needed_block_chunks = self.pending_block_chunks.iter()
|
||||
.filter(|&h| chunk_filter(h))
|
||||
.map(|h| *h)
|
||||
.collect::<Vec<H256>>();
|
||||
|
||||
// If no block chunks to download, get the state chunks
|
||||
if needed_block_chunks.len() == 0 {
|
||||
self.pending_state_chunks.iter()
|
||||
.filter(|&h| chunk_filter(h))
|
||||
.map(|h| *h)
|
||||
.collect::<Vec<H256>>()
|
||||
} else {
|
||||
needed_block_chunks
|
||||
}
|
||||
};
|
||||
|
||||
// Get a random chunk
|
||||
let chunk = thread_rng().choose(&needed_chunks);
|
||||
|
||||
if let Some(hash) = chunk {
|
||||
self.downloading_chunks.insert(hash.clone());
|
||||
}
|
||||
chunk
|
||||
chunk.map(|h| *h)
|
||||
}
|
||||
|
||||
pub fn clear_chunk_download(&mut self, hash: &H256) {
|
||||
@@ -185,8 +227,15 @@ mod test {
|
||||
|
||||
let requested: Vec<H256> = (0..40).map(|_| snapshot.needed_chunk().unwrap()).collect();
|
||||
assert!(snapshot.needed_chunk().is_none());
|
||||
assert_eq!(&requested[0..20], &manifest.state_hashes[..]);
|
||||
assert_eq!(&requested[20..40], &manifest.block_hashes[..]);
|
||||
|
||||
let requested_all_block_chunks = manifest.block_hashes.iter()
|
||||
.all(|h| requested.iter().any(|rh| rh == h));
|
||||
assert!(requested_all_block_chunks);
|
||||
|
||||
let requested_all_state_chunks = manifest.state_hashes.iter()
|
||||
.all(|h| requested.iter().any(|rh| rh == h));
|
||||
assert!(requested_all_state_chunks);
|
||||
|
||||
assert_eq!(snapshot.downloading_chunks.len(), 40);
|
||||
|
||||
assert_eq!(snapshot.validate_chunk(&state_chunks[4]), Ok(ChunkType::State(manifest.state_hashes[4].clone())));
|
||||
|
||||
@@ -80,6 +80,10 @@ impl SnapshotService for TestSnapshotService {
|
||||
Some((1, 2))
|
||||
}
|
||||
|
||||
fn completed_chunks(&self) -> Option<Vec<H256>> {
|
||||
Some(vec![])
|
||||
}
|
||||
|
||||
fn chunk(&self, hash: H256) -> Option<Bytes> {
|
||||
self.chunks.get(&hash).cloned()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user