2020-01-17 14:27:28 +01:00
|
|
|
// Copyright 2015-2020 Parity Technologies (UK) Ltd.
|
2019-01-07 11:33:07 +01:00
|
|
|
// This file is part of Parity Ethereum.
|
2018-05-09 12:05:34 +02:00
|
|
|
|
2019-01-07 11:33:07 +01:00
|
|
|
// Parity Ethereum is free software: you can redistribute it and/or modify
|
2018-05-09 12:05:34 +02:00
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
|
2019-01-07 11:33:07 +01:00
|
|
|
// Parity Ethereum is distributed in the hope that it will be useful,
|
2018-05-09 12:05:34 +02:00
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
|
|
|
|
// You should have received a copy of the GNU General Public License
|
2019-01-07 11:33:07 +01:00
|
|
|
// along with Parity Ethereum. If not, see <http://www.gnu.org/licenses/>.
|
2018-05-09 12:05:34 +02:00
|
|
|
|
2019-09-19 13:12:07 +02:00
|
|
|
use std::time::Instant;
|
|
|
|
use std::{mem, cmp};
|
|
|
|
|
|
|
|
use crate::{
|
|
|
|
snapshot_sync::ChunkType,
|
|
|
|
sync_io::SyncIo,
|
2020-02-19 14:42:52 +01:00
|
|
|
api::{ETH_PROTOCOL, WARP_SYNC_PROTOCOL_ID},
|
2019-09-19 13:12:07 +02:00
|
|
|
block_sync::{BlockDownloaderImportError as DownloaderImportError, DownloadAction},
|
|
|
|
chain::{
|
|
|
|
sync_packet::{
|
|
|
|
PacketInfo,
|
|
|
|
SyncPacket::{
|
|
|
|
self, BlockBodiesPacket, BlockHeadersPacket, NewBlockHashesPacket, NewBlockPacket,
|
|
|
|
PrivateStatePacket, PrivateTransactionPacket, ReceiptsPacket, SignedPrivateTransactionPacket,
|
|
|
|
SnapshotDataPacket, SnapshotManifestPacket, StatusPacket,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
BlockSet, ChainSync, ForkConfirmation, PacketDecodeError, PeerAsking, PeerInfo, SyncRequester,
|
2020-02-19 14:42:52 +01:00
|
|
|
SyncState, ETH_PROTOCOL_VERSION_63, ETH_PROTOCOL_VERSION_64, MAX_NEW_BLOCK_AGE, MAX_NEW_HASHES,
|
2019-09-19 13:12:07 +02:00
|
|
|
PAR_PROTOCOL_VERSION_1, PAR_PROTOCOL_VERSION_3, PAR_PROTOCOL_VERSION_4,
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-05-09 12:05:34 +02:00
|
|
|
use bytes::Bytes;
|
2019-02-13 09:20:33 +01:00
|
|
|
use enum_primitive::FromPrimitive;
|
2018-05-09 12:05:34 +02:00
|
|
|
use ethereum_types::{H256, U256};
|
2019-09-19 13:12:07 +02:00
|
|
|
use keccak_hash::keccak;
|
2018-05-09 12:05:34 +02:00
|
|
|
use network::PeerId;
|
2019-02-07 15:27:09 +01:00
|
|
|
use network::client_version::ClientVersion;
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
use log::{debug, trace, error, warn};
|
2018-05-09 12:05:34 +02:00
|
|
|
use rlp::Rlp;
|
2019-09-19 13:12:07 +02:00
|
|
|
use common_types::{
|
2019-07-18 12:27:08 +02:00
|
|
|
BlockNumber,
|
|
|
|
block_status::BlockStatus,
|
|
|
|
ids::BlockId,
|
|
|
|
errors::{EthcoreError, ImportError, BlockError},
|
2019-08-15 17:59:22 +02:00
|
|
|
verification::Unverified,
|
2019-08-22 18:25:49 +02:00
|
|
|
snapshot::{ManifestData, RestorationStatus},
|
2019-07-18 12:27:08 +02:00
|
|
|
};
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
|
|
|
|
/// The Chain Sync Handler: handles responses from peers
|
|
|
|
pub struct SyncHandler;
|
|
|
|
|
|
|
|
impl SyncHandler {
|
|
|
|
/// Handle incoming packet from peer
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_packet(sync: &mut ChainSync, io: &mut dyn SyncIo, peer: PeerId, packet_id: u8, data: &[u8]) {
|
2018-05-09 12:05:34 +02:00
|
|
|
let rlp = Rlp::new(data);
|
2019-02-13 09:20:33 +01:00
|
|
|
if let Some(packet_id) = SyncPacket::from_u8(packet_id) {
|
|
|
|
let result = match packet_id {
|
|
|
|
StatusPacket => SyncHandler::on_peer_status(sync, io, peer, &rlp),
|
|
|
|
BlockHeadersPacket => SyncHandler::on_peer_block_headers(sync, io, peer, &rlp),
|
|
|
|
BlockBodiesPacket => SyncHandler::on_peer_block_bodies(sync, io, peer, &rlp),
|
|
|
|
ReceiptsPacket => SyncHandler::on_peer_block_receipts(sync, io, peer, &rlp),
|
|
|
|
NewBlockPacket => SyncHandler::on_peer_new_block(sync, io, peer, &rlp),
|
|
|
|
NewBlockHashesPacket => SyncHandler::on_peer_new_hashes(sync, io, peer, &rlp),
|
|
|
|
SnapshotManifestPacket => SyncHandler::on_snapshot_manifest(sync, io, peer, &rlp),
|
|
|
|
SnapshotDataPacket => SyncHandler::on_snapshot_data(sync, io, peer, &rlp),
|
|
|
|
PrivateTransactionPacket => SyncHandler::on_private_transaction(sync, io, peer, &rlp),
|
|
|
|
SignedPrivateTransactionPacket => SyncHandler::on_signed_private_transaction(sync, io, peer, &rlp),
|
2019-08-16 14:45:52 +02:00
|
|
|
PrivateStatePacket => SyncHandler::on_private_state_data(sync, io, peer, &rlp),
|
2019-02-13 09:20:33 +01:00
|
|
|
_ => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "sync", "{}: Unknown packet {}", peer, packet_id.id());
|
2019-02-13 09:20:33 +01:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
};
|
2018-07-19 12:46:33 +02:00
|
|
|
|
2019-02-13 09:20:33 +01:00
|
|
|
match result {
|
|
|
|
Err(DownloaderImportError::Invalid) => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target:"sync", "{} -> Invalid packet {}", peer, packet_id.id());
|
2019-02-13 09:20:33 +01:00
|
|
|
io.disable_peer(peer);
|
|
|
|
sync.deactivate_peer(io, peer);
|
|
|
|
},
|
|
|
|
Err(DownloaderImportError::Useless) => {
|
|
|
|
sync.deactivate_peer(io, peer);
|
|
|
|
},
|
|
|
|
Ok(()) => {
|
|
|
|
// give a task to the same peer first
|
|
|
|
sync.sync_peer(io, peer, false);
|
|
|
|
},
|
|
|
|
}
|
|
|
|
} else {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "sync", "{}: Unknown packet {}", peer, packet_id);
|
2018-07-19 12:46:33 +02:00
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when peer sends us new consensus packet
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_consensus_packet(io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) {
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "Received consensus packet from {:?}", peer_id);
|
|
|
|
io.chain().queue_consensus_message(r.as_raw().to_vec());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by peer when it is disconnecting
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_peer_aborting(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId) {
|
2019-02-07 15:27:09 +01:00
|
|
|
trace!(target: "sync", "== Disconnecting {}: {}", peer_id, io.peer_version(peer_id));
|
2018-05-16 22:01:55 +02:00
|
|
|
sync.handshaking_peers.remove(&peer_id);
|
|
|
|
if sync.peers.contains_key(&peer_id) {
|
|
|
|
debug!(target: "sync", "Disconnected {}", peer_id);
|
|
|
|
sync.clear_peer_download(peer_id);
|
|
|
|
sync.peers.remove(&peer_id);
|
|
|
|
sync.active_peers.remove(&peer_id);
|
|
|
|
|
|
|
|
if sync.state == SyncState::SnapshotManifest {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
// Check if we are asking other peers for a snapshot manifest as well. If not,
|
|
|
|
// set our state to initial state (`Idle` or `WaitingPeers`).
|
|
|
|
let still_seeking_manifest = sync.peers.iter()
|
2018-05-16 22:01:55 +02:00
|
|
|
.filter(|&(id, p)| sync.active_peers.contains(id) && p.asking == PeerAsking::SnapshotManifest)
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
.next().is_some();
|
2018-05-16 22:01:55 +02:00
|
|
|
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
if !still_seeking_manifest {
|
|
|
|
warn!(target: "snapshot_sync", "The peer we were downloading a snapshot from ({}) went away. Retrying.", peer_id);
|
2018-05-16 22:01:55 +02:00
|
|
|
sync.state = ChainSync::get_init_state(sync.warp_sync, io.chain());
|
|
|
|
}
|
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
sync.continue_sync(io);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when a new peer is connected
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_peer_connected(sync: &mut ChainSync, io: &mut dyn SyncIo, peer: PeerId) {
|
2019-02-07 15:27:09 +01:00
|
|
|
trace!(target: "sync", "== Connected {}: {}", peer, io.peer_version(peer));
|
2018-05-09 12:05:34 +02:00
|
|
|
if let Err(e) = sync.send_status(io, peer) {
|
|
|
|
debug!(target:"sync", "Error sending status request: {:?}", e);
|
|
|
|
io.disconnect_peer(peer);
|
|
|
|
} else {
|
|
|
|
sync.handshaking_peers.insert(peer, Instant::now());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by peer once it has new block bodies
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_peer_new_block(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
|
|
|
trace!(target: "sync", "Ignoring new block from unconfirmed peer {}", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
let difficulty: U256 = r.val_at(1)?;
|
|
|
|
if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) {
|
|
|
|
if peer.difficulty.map_or(true, |pd| difficulty > pd) {
|
|
|
|
peer.difficulty = Some(difficulty);
|
|
|
|
}
|
|
|
|
}
|
2018-08-02 11:20:46 +02:00
|
|
|
let block = Unverified::from_rlp(r.at(0)?.as_raw().to_vec())?;
|
|
|
|
let hash = block.header.hash();
|
|
|
|
let number = block.header.number();
|
|
|
|
trace!(target: "sync", "{} -> NewBlock ({})", peer_id, hash);
|
|
|
|
if number > sync.highest_block.unwrap_or(0) {
|
|
|
|
sync.highest_block = Some(number);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
let mut unknown = false;
|
2018-08-02 11:20:46 +02:00
|
|
|
|
|
|
|
if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) {
|
|
|
|
peer.latest_hash = hash;
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
2018-08-02 11:20:46 +02:00
|
|
|
|
2018-05-09 12:05:34 +02:00
|
|
|
let last_imported_number = sync.new_blocks.last_imported_block_number();
|
2018-08-02 11:20:46 +02:00
|
|
|
if last_imported_number > number && last_imported_number - number > MAX_NEW_BLOCK_AGE {
|
|
|
|
trace!(target: "sync", "Ignored ancient new block {:?}", hash);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
2018-08-02 11:20:46 +02:00
|
|
|
match io.chain().import_block(block) {
|
2019-05-06 15:06:20 +02:00
|
|
|
Err(EthcoreError::Import(ImportError::AlreadyInChain)) => {
|
2018-08-02 11:20:46 +02:00
|
|
|
trace!(target: "sync", "New block already in chain {:?}", hash);
|
2018-05-09 12:05:34 +02:00
|
|
|
},
|
2019-05-06 15:06:20 +02:00
|
|
|
Err(EthcoreError::Import(ImportError::AlreadyQueued)) => {
|
2018-08-02 11:20:46 +02:00
|
|
|
trace!(target: "sync", "New block already queued {:?}", hash);
|
2018-05-09 12:05:34 +02:00
|
|
|
},
|
|
|
|
Ok(_) => {
|
|
|
|
// abort current download of the same block
|
|
|
|
sync.complete_sync(io);
|
2018-08-02 11:20:46 +02:00
|
|
|
sync.new_blocks.mark_as_known(&hash, number);
|
|
|
|
trace!(target: "sync", "New block queued {:?} ({})", hash, number);
|
2018-05-09 12:05:34 +02:00
|
|
|
},
|
2019-05-06 15:06:20 +02:00
|
|
|
Err(EthcoreError::Block(BlockError::UnknownParent(p))) => {
|
2018-05-09 12:05:34 +02:00
|
|
|
unknown = true;
|
2018-08-02 11:20:46 +02:00
|
|
|
trace!(target: "sync", "New block with unknown parent ({:?}) {:?}", p, hash);
|
2018-05-09 12:05:34 +02:00
|
|
|
},
|
|
|
|
Err(e) => {
|
2018-08-02 11:20:46 +02:00
|
|
|
debug!(target: "sync", "Bad new block {:?} : {:?}", hash, e);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
if unknown {
|
|
|
|
if sync.state != SyncState::Idle {
|
|
|
|
trace!(target: "sync", "NewBlock ignored while seeking");
|
|
|
|
} else {
|
2018-08-02 11:20:46 +02:00
|
|
|
trace!(target: "sync", "New unknown block {:?}", hash);
|
2018-05-09 12:05:34 +02:00
|
|
|
//TODO: handle too many unknown blocks
|
|
|
|
sync.sync_peer(io, peer_id, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Handles `NewHashes` packet. Initiates headers download for any unknown hashes.
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_peer_new_hashes(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
|
|
|
trace!(target: "sync", "Ignoring new hashes from unconfirmed peer {}", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
let hashes: Vec<_> = r.iter().take(MAX_NEW_HASHES).map(|item| (item.val_at::<H256>(0), item.val_at::<BlockNumber>(1))).collect();
|
|
|
|
if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) {
|
|
|
|
// Peer has new blocks with unknown difficulty
|
|
|
|
peer.difficulty = None;
|
|
|
|
if let Some(&(Ok(ref h), _)) = hashes.last() {
|
|
|
|
peer.latest_hash = h.clone();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if sync.state != SyncState::Idle {
|
|
|
|
trace!(target: "sync", "Ignoring new hashes since we're already downloading.");
|
|
|
|
let max = r.iter().take(MAX_NEW_HASHES).map(|item| item.val_at::<BlockNumber>(1).unwrap_or(0)).fold(0u64, cmp::max);
|
|
|
|
if max > sync.highest_block.unwrap_or(0) {
|
|
|
|
sync.highest_block = Some(max);
|
|
|
|
}
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
trace!(target: "sync", "{} -> NewHashes ({} entries)", peer_id, r.item_count()?);
|
|
|
|
let mut max_height: BlockNumber = 0;
|
|
|
|
let mut new_hashes = Vec::new();
|
|
|
|
let last_imported_number = sync.new_blocks.last_imported_block_number();
|
|
|
|
for (rh, rn) in hashes {
|
|
|
|
let hash = rh?;
|
|
|
|
let number = rn?;
|
|
|
|
if number > sync.highest_block.unwrap_or(0) {
|
|
|
|
sync.highest_block = Some(number);
|
|
|
|
}
|
|
|
|
if sync.new_blocks.is_downloading(&hash) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if last_imported_number > number && last_imported_number - number > MAX_NEW_BLOCK_AGE {
|
|
|
|
trace!(target: "sync", "Ignored ancient new block hash {:?}", hash);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
match io.chain().block_status(BlockId::Hash(hash.clone())) {
|
2019-07-01 14:41:45 +02:00
|
|
|
BlockStatus::InChain => {
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "New block hash already in chain {:?}", hash);
|
|
|
|
},
|
|
|
|
BlockStatus::Queued => {
|
|
|
|
trace!(target: "sync", "New hash block already queued {:?}", hash);
|
|
|
|
},
|
2018-08-31 13:13:01 +02:00
|
|
|
BlockStatus::Unknown => {
|
2018-05-09 12:05:34 +02:00
|
|
|
new_hashes.push(hash.clone());
|
|
|
|
if number > max_height {
|
|
|
|
trace!(target: "sync", "New unknown block hash {:?}", hash);
|
|
|
|
if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) {
|
|
|
|
peer.latest_hash = hash.clone();
|
|
|
|
}
|
|
|
|
max_height = number;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
BlockStatus::Bad => {
|
|
|
|
debug!(target: "sync", "Bad new block hash {:?}", hash);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
if max_height != 0 {
|
|
|
|
trace!(target: "sync", "Downloading blocks for new hashes");
|
|
|
|
sync.new_blocks.reset_to(new_hashes);
|
|
|
|
sync.state = SyncState::NewBlocks;
|
|
|
|
sync.sync_peer(io, peer_id, true);
|
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by peer once it has new block bodies
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_peer_block_bodies(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
sync.clear_peer_download(peer_id);
|
2018-07-19 12:46:33 +02:00
|
|
|
let block_set = sync.peers.get(&peer_id)
|
|
|
|
.and_then(|p| p.block_set)
|
|
|
|
.unwrap_or(BlockSet::NewBlocks);
|
2018-10-09 15:31:40 +02:00
|
|
|
let allowed = sync.peers.get(&peer_id).map(|p| p.is_allowed()).unwrap_or(false);
|
|
|
|
|
|
|
|
if !sync.reset_peer_asking(peer_id, PeerAsking::BlockBodies) || !allowed {
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "{}: Ignored unexpected bodies", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
2018-10-03 12:35:10 +02:00
|
|
|
let expected_blocks = match sync.peers.get_mut(&peer_id) {
|
|
|
|
Some(peer) => mem::replace(&mut peer.asking_blocks, Vec::new()),
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "{}: Ignored unexpected bodies (peer not found)", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
2018-05-09 12:05:34 +02:00
|
|
|
let item_count = r.item_count()?;
|
|
|
|
trace!(target: "sync", "{} -> BlockBodies ({} entries), set = {:?}", peer_id, item_count, block_set);
|
|
|
|
if item_count == 0 {
|
2018-07-19 12:46:33 +02:00
|
|
|
Err(DownloaderImportError::Useless)
|
|
|
|
} else if sync.state == SyncState::Waiting {
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "Ignored block bodies while waiting");
|
2018-07-19 12:46:33 +02:00
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
{
|
2018-05-09 12:05:34 +02:00
|
|
|
let downloader = match block_set {
|
|
|
|
BlockSet::NewBlocks => &mut sync.new_blocks,
|
|
|
|
BlockSet::OldBlocks => match sync.old_blocks {
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "Ignored block headers while block download is inactive");
|
|
|
|
return Ok(());
|
|
|
|
},
|
|
|
|
Some(ref mut blocks) => blocks,
|
|
|
|
}
|
|
|
|
};
|
2018-10-03 12:35:10 +02:00
|
|
|
downloader.import_bodies(r, expected_blocks.as_slice())?;
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
sync.collect_blocks(io, block_set);
|
2018-07-19 12:46:33 +02:00
|
|
|
Ok(())
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_peer_fork_header(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
{
|
|
|
|
let peer = sync.peers.get_mut(&peer_id).expect("Is only called when peer is present in peers");
|
|
|
|
peer.asking = PeerAsking::Nothing;
|
|
|
|
let item_count = r.item_count()?;
|
|
|
|
let (fork_number, fork_hash) = sync.fork_block.expect("ForkHeader request is sent only fork block is Some; qed").clone();
|
|
|
|
|
|
|
|
if item_count == 0 || item_count != 1 {
|
|
|
|
trace!(target: "sync", "{}: Chain is too short to confirm the block", peer_id);
|
2018-06-02 10:29:44 +02:00
|
|
|
peer.confirmation = ForkConfirmation::TooShort;
|
2018-05-09 12:05:34 +02:00
|
|
|
|
2018-06-02 10:29:44 +02:00
|
|
|
} else {
|
|
|
|
let header = r.at(0)?.as_raw();
|
|
|
|
if keccak(&header) != fork_hash {
|
|
|
|
trace!(target: "sync", "{}: Fork mismatch", peer_id);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-06-02 10:29:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
trace!(target: "sync", "{}: Confirmed peer", peer_id);
|
|
|
|
peer.confirmation = ForkConfirmation::Confirmed;
|
2018-05-09 12:05:34 +02:00
|
|
|
|
2018-06-02 10:29:44 +02:00
|
|
|
if !io.chain_overlay().read().contains_key(&fork_number) {
|
|
|
|
trace!(target: "sync", "Inserting (fork) block {} header", fork_number);
|
|
|
|
io.chain_overlay().write().insert(fork_number, header.to_vec());
|
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
}
|
2018-06-02 10:29:44 +02:00
|
|
|
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by peer once it has new block headers during sync
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_peer_block_headers(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
let is_fork_header_request = match sync.peers.get(&peer_id) {
|
|
|
|
Some(peer) if peer.asking == PeerAsking::ForkHeader => true,
|
|
|
|
_ => false,
|
|
|
|
};
|
|
|
|
|
|
|
|
if is_fork_header_request {
|
|
|
|
return SyncHandler::on_peer_fork_header(sync, io, peer_id, r);
|
|
|
|
}
|
|
|
|
|
|
|
|
sync.clear_peer_download(peer_id);
|
|
|
|
let expected_hash = sync.peers.get(&peer_id).and_then(|p| p.asking_hash);
|
|
|
|
let allowed = sync.peers.get(&peer_id).map(|p| p.is_allowed()).unwrap_or(false);
|
|
|
|
let block_set = sync.peers.get(&peer_id).and_then(|p| p.block_set).unwrap_or(BlockSet::NewBlocks);
|
2018-10-03 12:35:10 +02:00
|
|
|
|
|
|
|
if !sync.reset_peer_asking(peer_id, PeerAsking::BlockHeaders) {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "sync", "{}: Ignored unexpected headers", peer_id);
|
2018-10-03 12:35:10 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
let expected_hash = match expected_hash {
|
|
|
|
Some(hash) => hash,
|
|
|
|
None => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "sync", "{}: Ignored unexpected headers (expected_hash is None)", peer_id);
|
2018-10-03 12:35:10 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
if !allowed {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "sync", "{}: Ignored unexpected headers (peer not allowed)", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
2018-10-03 12:35:10 +02:00
|
|
|
|
2018-05-09 12:05:34 +02:00
|
|
|
let item_count = r.item_count()?;
|
|
|
|
trace!(target: "sync", "{} -> BlockHeaders ({} entries), state = {:?}, set = {:?}", peer_id, item_count, sync.state, block_set);
|
|
|
|
if (sync.state == SyncState::Idle || sync.state == SyncState::WaitingPeers) && sync.old_blocks.is_none() {
|
|
|
|
trace!(target: "sync", "Ignored unexpected block headers");
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
if sync.state == SyncState::Waiting {
|
|
|
|
trace!(target: "sync", "Ignored block headers while waiting");
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2018-07-19 12:46:33 +02:00
|
|
|
let result = {
|
2018-05-09 12:05:34 +02:00
|
|
|
let downloader = match block_set {
|
|
|
|
BlockSet::NewBlocks => &mut sync.new_blocks,
|
|
|
|
BlockSet::OldBlocks => {
|
|
|
|
match sync.old_blocks {
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "Ignored block headers while block download is inactive");
|
|
|
|
return Ok(());
|
|
|
|
},
|
|
|
|
Some(ref mut blocks) => blocks,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
2018-07-19 12:46:33 +02:00
|
|
|
downloader.import_headers(io, r, expected_hash)?
|
2018-05-09 12:05:34 +02:00
|
|
|
};
|
|
|
|
|
2018-10-09 15:31:40 +02:00
|
|
|
if result == DownloadAction::Reset {
|
|
|
|
sync.reset_downloads(block_set);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
sync.collect_blocks(io, block_set);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by peer once it has new block receipts
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_peer_block_receipts(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
sync.clear_peer_download(peer_id);
|
|
|
|
let block_set = sync.peers.get(&peer_id).and_then(|p| p.block_set).unwrap_or(BlockSet::NewBlocks);
|
2018-10-09 15:31:40 +02:00
|
|
|
let allowed = sync.peers.get(&peer_id).map(|p| p.is_allowed()).unwrap_or(false);
|
|
|
|
if !sync.reset_peer_asking(peer_id, PeerAsking::BlockReceipts) || !allowed {
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "{}: Ignored unexpected receipts", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
2018-10-03 12:35:10 +02:00
|
|
|
let expected_blocks = match sync.peers.get_mut(&peer_id) {
|
|
|
|
Some(peer) => mem::replace(&mut peer.asking_blocks, Vec::new()),
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "{}: Ignored unexpected bodies (peer not found)", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
2018-05-09 12:05:34 +02:00
|
|
|
let item_count = r.item_count()?;
|
|
|
|
trace!(target: "sync", "{} -> BlockReceipts ({} entries)", peer_id, item_count);
|
|
|
|
if item_count == 0 {
|
2018-07-19 12:46:33 +02:00
|
|
|
Err(DownloaderImportError::Useless)
|
|
|
|
} else if sync.state == SyncState::Waiting {
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "Ignored block receipts while waiting");
|
2018-07-19 12:46:33 +02:00
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
{
|
2018-05-09 12:05:34 +02:00
|
|
|
let downloader = match block_set {
|
|
|
|
BlockSet::NewBlocks => &mut sync.new_blocks,
|
|
|
|
BlockSet::OldBlocks => match sync.old_blocks {
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "Ignored block headers while block download is inactive");
|
|
|
|
return Ok(());
|
|
|
|
},
|
|
|
|
Some(ref mut blocks) => blocks,
|
|
|
|
}
|
|
|
|
};
|
2018-10-03 12:35:10 +02:00
|
|
|
downloader.import_receipts(r, expected_blocks.as_slice())?;
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
sync.collect_blocks(io, block_set);
|
2018-07-19 12:46:33 +02:00
|
|
|
Ok(())
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when snapshot manifest is downloaded from a peer.
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_snapshot_manifest(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "Ignoring snapshot manifest from unconfirmed peer {}", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
sync.clear_peer_download(peer_id);
|
|
|
|
if !sync.reset_peer_asking(peer_id, PeerAsking::SnapshotManifest) || sync.state != SyncState::SnapshotManifest {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Ignored unexpected/expired manifest", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let manifest_rlp = r.at(0)?;
|
2018-07-19 12:46:33 +02:00
|
|
|
let manifest = ManifestData::from_rlp(manifest_rlp.as_raw())?;
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
let is_supported_version = io.snapshot_service().supported_versions()
|
|
|
|
.map_or(false, |(l, h)| manifest.version >= l && manifest.version <= h);
|
|
|
|
|
|
|
|
if !is_supported_version {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
warn!(target: "snapshot_sync", "{}: Snapshot manifest version not supported: {}", peer_id, manifest.version);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
sync.snapshot.reset_to(&manifest, &keccak(manifest_rlp.as_raw()));
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
debug!(target: "snapshot_sync", "{}: Peer sent a snapshot manifest we can use. Block number #{}, block chunks: {}, state chunks: {}",
|
|
|
|
peer_id, manifest.block_number, manifest.block_hashes.len(), manifest.state_hashes.len());
|
2018-05-09 12:05:34 +02:00
|
|
|
io.snapshot_service().begin_restore(manifest);
|
|
|
|
sync.state = SyncState::SnapshotData;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when snapshot data is downloaded from a peer.
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_snapshot_data(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "Ignoring snapshot data from unconfirmed peer {}", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
sync.clear_peer_download(peer_id);
|
|
|
|
if !sync.reset_peer_asking(peer_id, PeerAsking::SnapshotData) || (sync.state != SyncState::SnapshotData && sync.state != SyncState::SnapshotWaiting) {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Ignored unexpected snapshot data", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
// check service status
|
|
|
|
let status = io.snapshot_service().status();
|
|
|
|
match status {
|
|
|
|
RestorationStatus::Inactive | RestorationStatus::Failed => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Snapshot restoration status: {:?}", peer_id, status);
|
2018-05-09 12:05:34 +02:00
|
|
|
sync.state = SyncState::WaitingPeers;
|
|
|
|
|
|
|
|
// only note bad if restoration failed.
|
|
|
|
if let (Some(hash), RestorationStatus::Failed) = (sync.snapshot.snapshot_hash(), status) {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
debug!(target: "snapshot_sync", "Marking snapshot manifest hash {} as bad", hash);
|
2018-05-09 12:05:34 +02:00
|
|
|
sync.snapshot.note_bad(hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
sync.snapshot.clear();
|
|
|
|
return Ok(());
|
|
|
|
},
|
2019-07-01 14:41:45 +02:00
|
|
|
RestorationStatus::Initializing { .. } => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Snapshot restoration is initializing. Can't accept data right now.", peer_id);
|
2018-05-16 22:01:55 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
2019-07-01 14:41:45 +02:00
|
|
|
RestorationStatus::Finalizing => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Snapshot finalizing restoration. Can't accept data right now.", peer_id);
|
2019-07-01 14:41:45 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
RestorationStatus::Ongoing { .. } => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Snapshot restoration is ongoing", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
let snapshot_data: Bytes = r.val_at(0)?;
|
|
|
|
match sync.snapshot.validate_chunk(&snapshot_data) {
|
|
|
|
Ok(ChunkType::Block(hash)) => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Processing block chunk", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
io.snapshot_service().restore_block_chunk(hash, snapshot_data);
|
|
|
|
}
|
|
|
|
Ok(ChunkType::State(hash)) => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Processing state chunk", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
io.snapshot_service().restore_state_chunk(hash, snapshot_data);
|
|
|
|
}
|
|
|
|
Err(()) => {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
trace!(target: "snapshot_sync", "{}: Got bad snapshot chunk", peer_id);
|
2018-05-09 12:05:34 +02:00
|
|
|
io.disconnect_peer(peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if sync.snapshot.is_complete() {
|
|
|
|
// wait for snapshot restoration process to complete
|
|
|
|
sync.state = SyncState::SnapshotWaiting;
|
|
|
|
}
|
2018-07-19 12:46:33 +02:00
|
|
|
|
2018-05-09 12:05:34 +02:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called by peer to report status
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_peer_status(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2020-02-19 14:42:52 +01:00
|
|
|
let mut r = r.iter();
|
2018-05-09 12:05:34 +02:00
|
|
|
sync.handshaking_peers.remove(&peer_id);
|
2020-02-19 14:42:52 +01:00
|
|
|
let protocol_version: u8 = r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?;
|
|
|
|
let eth_protocol_version = io.protocol_version(Ð_PROTOCOL, peer_id);
|
2019-01-04 19:57:01 +01:00
|
|
|
let warp_protocol_version = io.protocol_version(&WARP_SYNC_PROTOCOL_ID, peer_id);
|
|
|
|
let warp_protocol = warp_protocol_version != 0;
|
|
|
|
let private_tx_protocol = warp_protocol_version >= PAR_PROTOCOL_VERSION_3.0;
|
2020-02-19 14:42:52 +01:00
|
|
|
let network_id = r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?;
|
|
|
|
let difficulty = Some(r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?);
|
|
|
|
let latest_hash = r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?;
|
|
|
|
let genesis = r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?;
|
|
|
|
let forkid_validation_error = {
|
|
|
|
if eth_protocol_version >= ETH_PROTOCOL_VERSION_64.0 {
|
|
|
|
let fork_id = r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?;
|
|
|
|
sync.fork_filter.is_compatible(io.chain(), fork_id).err().map(|e| (fork_id, e))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
};
|
|
|
|
let snapshot_hash = if warp_protocol { Some(r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?) } else { None };
|
|
|
|
let snapshot_number = if warp_protocol { Some(r.next().ok_or(rlp::DecoderError::RlpIsTooShort)?.as_val()?) } else { None };
|
|
|
|
let private_tx_enabled = if private_tx_protocol { r.next().and_then(|v| v.as_val().ok()).unwrap_or(false) } else { false };
|
2018-05-09 12:05:34 +02:00
|
|
|
let peer = PeerInfo {
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
protocol_version,
|
2020-02-19 14:42:52 +01:00
|
|
|
network_id,
|
|
|
|
difficulty,
|
|
|
|
latest_hash,
|
|
|
|
genesis,
|
2018-05-09 12:05:34 +02:00
|
|
|
asking: PeerAsking::Nothing,
|
|
|
|
asking_blocks: Vec::new(),
|
|
|
|
asking_hash: None,
|
2019-08-16 14:45:52 +02:00
|
|
|
asking_private_state: None,
|
2018-05-09 12:05:34 +02:00
|
|
|
ask_time: Instant::now(),
|
2018-11-28 11:30:05 +01:00
|
|
|
last_sent_transactions: Default::default(),
|
|
|
|
last_sent_private_transactions: Default::default(),
|
2018-05-09 12:05:34 +02:00
|
|
|
expired: false,
|
|
|
|
confirmation: if sync.fork_block.is_none() { ForkConfirmation::Confirmed } else { ForkConfirmation::Unconfirmed },
|
|
|
|
asking_snapshot_data: None,
|
2020-02-19 14:42:52 +01:00
|
|
|
snapshot_hash,
|
|
|
|
snapshot_number,
|
2018-05-09 12:05:34 +02:00
|
|
|
block_set: None,
|
2020-02-19 14:42:52 +01:00
|
|
|
private_tx_enabled,
|
2019-02-07 15:27:09 +01:00
|
|
|
client_version: ClientVersion::from(io.peer_version(peer_id)),
|
2018-05-09 12:05:34 +02:00
|
|
|
};
|
|
|
|
|
2019-01-04 19:57:01 +01:00
|
|
|
trace!(target: "sync", "New peer {} (\
|
|
|
|
protocol: {}, \
|
|
|
|
network: {:?}, \
|
|
|
|
difficulty: {:?}, \
|
|
|
|
latest:{}, \
|
|
|
|
genesis:{}, \
|
|
|
|
snapshot:{:?}, \
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
private_tx_enabled:{}, \
|
|
|
|
client_version: {})",
|
2019-01-04 19:57:01 +01:00
|
|
|
peer_id,
|
|
|
|
peer.protocol_version,
|
|
|
|
peer.network_id,
|
|
|
|
peer.difficulty,
|
|
|
|
peer.latest_hash,
|
|
|
|
peer.genesis,
|
|
|
|
peer.snapshot_number,
|
Snapshot restoration overhaul (#11219)
* Comments and todos
Use `snapshot_sync` as logging target
* fix compilation
* More todos, more logs
* Fix picking snapshot peer: prefer the one with the highest block number
More docs, comments, todos
* Adjust WAIT_PEERS_TIMEOUT to be a multiple of MAINTAIN_SYNC_TIMER to try to fix snapshot startup problems
Docs, todos, comments
* Tabs
* Formatting
* Don't build new rlp::EMPTY_LIST_RLP instances
* Dial down debug logging
* Don't warn about missing hashes in the manifest: it's normal
Log client version on peer connect
* Cleanup
* Do not skip snapshots further away than 30k block from the highest block seen
Currently we look for peers that seed snapshots that are close to the highest block seen on the network (where "close" means withing 30k blocks). When a node starts up we wait for some time (5sec, increased here to 10sec) to let peers connect and if we have found a suitable peer to sync a snapshot from at the end of that delay, we start the download; if none is found and --warp-barrier is used we stall, otherwise we start a slow-sync.
When looking for a suitable snapshot, we use the highest block seen on the network to check if a peer has a snapshot that is within 30k blocks of that highest block number. This means that in a situation where all available snapshots are older than that, we will often fail to start a snapshot at all. What's worse is that the longer we delay starting a snapshot sync (to let more peers connect, in the hope of finding a good snapshot), the more likely we are to have seen a high block and thus the more likely we become to accept a snapshot.
This commit removes this comparison with the highest blocknumber criteria entirely and picks the best snapshot we find in 10sec.
* lockfile
* Add a `ChunkType::Dupe` variant so that we do not disconnect a peer if they happen to send us a duplicate chunk (just ignore the chunk and keep going)
Resolve some documentation todos, add more
* tweak log message
* Don't warp sync twice
Check if our own block is beyond the given warp barrier (can happen after we've completed a warp sync but are not quite yet synced up to the tip) and if so, don't sync.
More docs, resolve todos.
Dial down some `sync` debug level logging to trace
* Avoid iterating over all snapshot block/state hashes to find the next work item
Use a HashSet instead of a Vec and remove items from the set as chunks are processed. Calculate and store the total number of chunks in the `Snapshot` struct instead of counting pending chunks each time.
* Address review grumbles
* Log correct number of bytes written to disk
* Revert ChunkType::Dup change
* whitespace grumble
* Cleanup debugging code
* Fix docs
* Fix import and a typo
* Fix test impl
* Use `indexmap::IndexSet` to ensure chunk hashes are accessed in order
* Revert increased SNAPSHOT_MANIFEST_TIMEOUT: 5sec should be enough
2019-10-31 16:07:21 +01:00
|
|
|
peer.private_tx_enabled,
|
|
|
|
peer.client_version,
|
2019-01-04 19:57:01 +01:00
|
|
|
);
|
2018-05-09 12:05:34 +02:00
|
|
|
if io.is_expired() {
|
2019-02-07 15:27:09 +01:00
|
|
|
trace!(target: "sync", "Status packet from expired session {}:{}", peer_id, io.peer_version(peer_id));
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
if sync.peers.contains_key(&peer_id) {
|
2019-02-07 15:27:09 +01:00
|
|
|
debug!(target: "sync", "Unexpected status packet from {}:{}", peer_id, io.peer_version(peer_id));
|
2018-05-09 12:05:34 +02:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
let chain_info = io.chain().chain_info();
|
|
|
|
if peer.genesis != chain_info.genesis_hash {
|
|
|
|
trace!(target: "sync", "Peer {} genesis hash mismatch (ours: {}, theirs: {})", peer_id, chain_info.genesis_hash, peer.genesis);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
if peer.network_id != sync.network_id {
|
|
|
|
trace!(target: "sync", "Peer {} network id mismatch (ours: {}, theirs: {})", peer_id, sync.network_id, peer.network_id);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
2020-02-19 14:42:52 +01:00
|
|
|
if let Some((fork_id, reason)) = forkid_validation_error {
|
|
|
|
trace!(target: "sync", "Peer {} incompatible fork id (fork id: {:#x}/{}, error: {:?})", peer_id, fork_id.hash.0, fork_id.next, reason);
|
|
|
|
return Err(DownloaderImportError::Invalid);
|
|
|
|
}
|
2018-05-14 10:09:05 +02:00
|
|
|
|
|
|
|
if false
|
2019-08-16 14:45:52 +02:00
|
|
|
|| (warp_protocol && (peer.protocol_version < PAR_PROTOCOL_VERSION_1.0 || peer.protocol_version > PAR_PROTOCOL_VERSION_4.0))
|
2020-02-19 14:42:52 +01:00
|
|
|
|| (!warp_protocol && (peer.protocol_version < ETH_PROTOCOL_VERSION_63.0 || peer.protocol_version > ETH_PROTOCOL_VERSION_64.0))
|
2018-05-14 10:09:05 +02:00
|
|
|
{
|
2018-05-09 12:05:34 +02:00
|
|
|
trace!(target: "sync", "Peer {} unsupported eth protocol ({})", peer_id, peer.protocol_version);
|
2018-07-19 12:46:33 +02:00
|
|
|
return Err(DownloaderImportError::Invalid);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if sync.sync_start_time.is_none() {
|
|
|
|
sync.sync_start_time = Some(Instant::now());
|
|
|
|
}
|
|
|
|
|
|
|
|
sync.peers.insert(peer_id.clone(), peer);
|
|
|
|
// Don't activate peer immediatelly when searching for common block.
|
|
|
|
// Let the current sync round complete first.
|
|
|
|
sync.active_peers.insert(peer_id.clone());
|
2019-02-07 15:27:09 +01:00
|
|
|
debug!(target: "sync", "Connected {}:{}", peer_id, io.peer_version(peer_id));
|
2018-05-16 22:01:55 +02:00
|
|
|
|
2018-07-19 12:46:33 +02:00
|
|
|
if let Some((fork_block, _)) = sync.fork_block {
|
|
|
|
SyncRequester::request_fork_header(sync, io, peer_id, fork_block);
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
2018-05-16 22:01:55 +02:00
|
|
|
|
2018-05-09 12:05:34 +02:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when peer sends us new transactions
|
2019-07-09 10:04:20 +02:00
|
|
|
pub fn on_peer_transactions(sync: &ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), PacketDecodeError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
// Accept transactions only when fully synced
|
|
|
|
if !io.is_chain_queue_empty() || (sync.state != SyncState::Idle && sync.state != SyncState::NewBlocks) {
|
|
|
|
trace!(target: "sync", "{} Ignoring transactions while syncing", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
|
|
|
trace!(target: "sync", "{} Ignoring transactions from unconfirmed/unknown peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let item_count = r.item_count()?;
|
|
|
|
trace!(target: "sync", "{:02} -> Transactions ({} entries)", peer_id, item_count);
|
|
|
|
let mut transactions = Vec::with_capacity(item_count);
|
|
|
|
for i in 0 .. item_count {
|
|
|
|
let rlp = r.at(i)?;
|
|
|
|
let tx = rlp.as_raw().to_vec();
|
|
|
|
transactions.push(tx);
|
|
|
|
}
|
|
|
|
io.chain().queue_transactions(transactions, peer_id);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when peer sends us signed private transaction packet
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_signed_private_transaction(sync: &mut ChainSync, _io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
|
|
|
trace!(target: "sync", "{} Ignoring packet from unconfirmed/unknown peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
2019-01-04 19:57:01 +01:00
|
|
|
let private_handler = match sync.private_tx_handler {
|
|
|
|
Some(ref handler) => handler,
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "{} Ignoring private tx packet from peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
2019-08-16 14:45:52 +02:00
|
|
|
trace!(target: "privatetx", "Received signed private transaction packet from {:?}", peer_id);
|
2019-01-04 19:57:01 +01:00
|
|
|
match private_handler.import_signed_private_transaction(r.as_raw()) {
|
2018-08-29 14:31:04 +02:00
|
|
|
Ok(transaction_hash) => {
|
|
|
|
//don't send the packet back
|
|
|
|
if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) {
|
|
|
|
peer.last_sent_private_transactions.insert(transaction_hash);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
Err(e) => {
|
2019-08-16 14:45:52 +02:00
|
|
|
trace!(target: "privatetx", "Ignoring the message, error queueing: {}", e);
|
2018-08-29 14:31:04 +02:00
|
|
|
}
|
2019-09-03 11:29:25 +02:00
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Called when peer sends us new private transaction packet
|
2019-07-09 10:04:20 +02:00
|
|
|
fn on_private_transaction(sync: &mut ChainSync, _io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2018-05-09 12:05:34 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
|
|
|
trace!(target: "sync", "{} Ignoring packet from unconfirmed/unknown peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
2019-01-04 19:57:01 +01:00
|
|
|
let private_handler = match sync.private_tx_handler {
|
|
|
|
Some(ref handler) => handler,
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "{} Ignoring private tx packet from peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
2019-08-16 14:45:52 +02:00
|
|
|
trace!(target: "privatetx", "Received private transaction packet from {:?}", peer_id);
|
2019-01-04 19:57:01 +01:00
|
|
|
match private_handler.import_private_transaction(r.as_raw()) {
|
2018-08-29 14:31:04 +02:00
|
|
|
Ok(transaction_hash) => {
|
|
|
|
//don't send the packet back
|
|
|
|
if let Some(ref mut peer) = sync.peers.get_mut(&peer_id) {
|
|
|
|
peer.last_sent_private_transactions.insert(transaction_hash);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
Err(e) => {
|
2019-08-16 14:45:52 +02:00
|
|
|
trace!(target: "privatetx", "Ignoring the message, error queueing: {}", e);
|
2018-08-29 14:31:04 +02:00
|
|
|
}
|
2019-09-03 11:29:25 +02:00
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
Ok(())
|
|
|
|
}
|
2019-08-16 14:45:52 +02:00
|
|
|
|
2019-08-27 17:29:33 +02:00
|
|
|
fn on_private_state_data(sync: &mut ChainSync, io: &mut dyn SyncIo, peer_id: PeerId, r: &Rlp) -> Result<(), DownloaderImportError> {
|
2019-08-16 14:45:52 +02:00
|
|
|
if !sync.peers.get(&peer_id).map_or(false, |p| p.can_sync()) {
|
|
|
|
trace!(target: "sync", "{} Ignoring packet from unconfirmed/unknown peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
if !sync.reset_peer_asking(peer_id, PeerAsking::PrivateState) {
|
|
|
|
trace!(target: "sync", "{}: Ignored unexpected private state data", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
let requested_hash = sync.peers.get(&peer_id).and_then(|p| p.asking_private_state);
|
|
|
|
let requested_hash = match requested_hash {
|
|
|
|
Some(hash) => hash,
|
|
|
|
None => {
|
|
|
|
debug!(target: "sync", "{}: Ignored unexpected private state (requested_hash is None)", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
let private_handler = match sync.private_tx_handler {
|
|
|
|
Some(ref handler) => handler,
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "{} Ignoring private tx packet from peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
trace!(target: "privatetx", "Received private state data packet from {:?}", peer_id);
|
|
|
|
let private_state_data: Bytes = r.val_at(0)?;
|
|
|
|
match io.private_state() {
|
|
|
|
Some(db) => {
|
|
|
|
// Check hash of the rececived data before submitting it to DB
|
|
|
|
let received_hash = db.state_hash(&private_state_data).unwrap_or_default();
|
|
|
|
if received_hash != requested_hash {
|
|
|
|
trace!(target: "sync", "{} Ignoring private state data with unexpected hash from peer", peer_id);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
match db.save_state(&private_state_data) {
|
|
|
|
Ok(hash) => {
|
|
|
|
if let Err(err) = private_handler.private_state_synced(&hash) {
|
|
|
|
trace!(target: "privatetx", "Ignoring received private state message, error queueing: {}", err);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
error!(target: "privatetx", "Cannot save received private state {:?}", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => {
|
|
|
|
trace!(target: "sync", "{} Ignoring private tx packet from peer", peer_id);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
Ok(())
|
|
|
|
}
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2019-09-19 13:12:07 +02:00
|
|
|
use std::collections::VecDeque;
|
|
|
|
|
|
|
|
use super::{
|
|
|
|
super::tests::{dummy_sync_with_peer, get_dummy_block, get_dummy_blocks, get_dummy_hashes},
|
|
|
|
SyncHandler
|
|
|
|
};
|
|
|
|
|
|
|
|
use crate::tests::{helpers::TestIo, snapshot::TestSnapshotService};
|
|
|
|
|
2019-08-15 17:59:22 +02:00
|
|
|
use client_traits::ChainInfo;
|
2019-09-17 16:42:22 +02:00
|
|
|
use ethcore::test_helpers::{EachBlockWith, TestBlockChainClient};
|
2018-05-09 12:05:34 +02:00
|
|
|
use parking_lot::RwLock;
|
2019-08-15 17:59:22 +02:00
|
|
|
use rlp::Rlp;
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn handles_peer_new_hashes() {
|
|
|
|
let mut client = TestBlockChainClient::new();
|
|
|
|
client.add_blocks(10, EachBlockWith::Uncle);
|
|
|
|
let queue = RwLock::new(VecDeque::new());
|
|
|
|
let mut sync = dummy_sync_with_peer(client.block_hash_delta_minus(5), &client);
|
|
|
|
let ss = TestSnapshotService::new();
|
2019-08-16 14:45:52 +02:00
|
|
|
let mut io = TestIo::new(&mut client, &ss, &queue, None, None);
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
let hashes_data = get_dummy_hashes();
|
|
|
|
let hashes_rlp = Rlp::new(&hashes_data);
|
|
|
|
|
|
|
|
let result = SyncHandler::on_peer_new_hashes(&mut sync, &mut io, 0, &hashes_rlp);
|
|
|
|
|
|
|
|
assert!(result.is_ok());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn handles_peer_new_block_malformed() {
|
|
|
|
let mut client = TestBlockChainClient::new();
|
|
|
|
client.add_blocks(10, EachBlockWith::Uncle);
|
|
|
|
|
|
|
|
let block_data = get_dummy_block(11, client.chain_info().best_block_hash);
|
|
|
|
|
|
|
|
let queue = RwLock::new(VecDeque::new());
|
|
|
|
let mut sync = dummy_sync_with_peer(client.block_hash_delta_minus(5), &client);
|
|
|
|
//sync.have_common_block = true;
|
|
|
|
let ss = TestSnapshotService::new();
|
2019-08-16 14:45:52 +02:00
|
|
|
let mut io = TestIo::new(&mut client, &ss, &queue, None, None);
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
let block = Rlp::new(&block_data);
|
|
|
|
|
|
|
|
let result = SyncHandler::on_peer_new_block(&mut sync, &mut io, 0, &block);
|
|
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn handles_peer_new_block() {
|
|
|
|
let mut client = TestBlockChainClient::new();
|
|
|
|
client.add_blocks(10, EachBlockWith::Uncle);
|
|
|
|
|
|
|
|
let block_data = get_dummy_blocks(11, client.chain_info().best_block_hash);
|
|
|
|
|
|
|
|
let queue = RwLock::new(VecDeque::new());
|
|
|
|
let mut sync = dummy_sync_with_peer(client.block_hash_delta_minus(5), &client);
|
|
|
|
let ss = TestSnapshotService::new();
|
2019-08-16 14:45:52 +02:00
|
|
|
let mut io = TestIo::new(&mut client, &ss, &queue, None, None);
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
let block = Rlp::new(&block_data);
|
|
|
|
|
2018-10-09 22:07:25 +02:00
|
|
|
SyncHandler::on_peer_new_block(&mut sync, &mut io, 0, &block).expect("result to be ok");
|
2018-05-09 12:05:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn handles_peer_new_block_empty() {
|
|
|
|
let mut client = TestBlockChainClient::new();
|
|
|
|
client.add_blocks(10, EachBlockWith::Uncle);
|
|
|
|
let queue = RwLock::new(VecDeque::new());
|
|
|
|
let mut sync = dummy_sync_with_peer(client.block_hash_delta_minus(5), &client);
|
|
|
|
let ss = TestSnapshotService::new();
|
2019-08-16 14:45:52 +02:00
|
|
|
let mut io = TestIo::new(&mut client, &ss, &queue, None, None);
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
let empty_data = vec![];
|
|
|
|
let block = Rlp::new(&empty_data);
|
|
|
|
|
|
|
|
let result = SyncHandler::on_peer_new_block(&mut sync, &mut io, 0, &block);
|
|
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn handles_peer_new_hashes_empty() {
|
|
|
|
let mut client = TestBlockChainClient::new();
|
|
|
|
client.add_blocks(10, EachBlockWith::Uncle);
|
|
|
|
let queue = RwLock::new(VecDeque::new());
|
|
|
|
let mut sync = dummy_sync_with_peer(client.block_hash_delta_minus(5), &client);
|
|
|
|
let ss = TestSnapshotService::new();
|
2019-08-16 14:45:52 +02:00
|
|
|
let mut io = TestIo::new(&mut client, &ss, &queue, None, None);
|
2018-05-09 12:05:34 +02:00
|
|
|
|
|
|
|
let empty_hashes_data = vec![];
|
|
|
|
let hashes_rlp = Rlp::new(&empty_hashes_data);
|
|
|
|
|
|
|
|
let result = SyncHandler::on_peer_new_hashes(&mut sync, &mut io, 0, &hashes_rlp);
|
|
|
|
|
|
|
|
assert!(result.is_ok());
|
|
|
|
}
|
|
|
|
}
|