openethereum/ethcore/sync/src/light_sync/mod.rs

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

789 lines
27 KiB
Rust
Raw Normal View History

// Copyright 2015-2019 Parity Technologies (UK) Ltd.
// This file is part of Parity Ethereum.
2016-12-13 21:09:43 +01:00
// Parity Ethereum is free software: you can redistribute it and/or modify
2016-12-13 21:09:43 +01:00
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity Ethereum is distributed in the hope that it will be useful,
2016-12-13 21:09:43 +01:00
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity Ethereum. If not, see <http://www.gnu.org/licenses/>.
2016-12-13 21:09:43 +01:00
//! Light client synchronization.
//!
2017-03-16 23:51:47 +01:00
//! This will synchronize the header chain using PIP messages.
2016-12-13 21:09:43 +01:00
//! Dataflow is largely one-directional as headers are pushed into
//! the light client queue for import. Where possible, they are batched
//! in groups.
//!
//! This is written assuming that the client and sync service are running
2016-12-14 23:26:15 +01:00
//! in the same binary; unlike a full node which might communicate via IPC.
//!
//!
//! Sync strategy:
//! - Find a common ancestor with peers.
//! - Split the chain up into subchains, which are downloaded in parallel from various peers in rounds.
//! - When within a certain distance of the head of the chain, aggressively download all
//! announced blocks.
//! - On bad block/response, punish peer and reset.
2016-12-13 21:09:43 +01:00
use std::{
collections::{HashMap, HashSet},
2016-12-15 21:51:08 +01:00
mem,
ops::Deref,
2016-12-13 21:09:43 +01:00
sync::Arc,
time::{Duration, Instant},
};
2016-12-13 21:09:43 +01:00
use ethereum_types::{H256, U256};
use light::{
client::{AsLightClient, LightChainClient},
2016-12-15 21:51:08 +01:00
net::{
Announcement, BasicContext, Capabilities, Error as NetError, EventContext, Handler,
PeerStatus, ReqId, Status,
},
request::{self, CompleteHeadersRequest as HeadersRequest},
2016-12-15 21:51:08 +01:00
};
2016-12-13 21:09:43 +01:00
use network::PeerId;
use parking_lot::{Mutex, RwLock};
use rand::{OsRng, Rng};
use types::encoded;
2016-12-13 21:09:43 +01:00
use self::sync_round::{AbortReason, ResponseContext, SyncRound};
2016-12-15 21:51:08 +01:00
2016-12-14 23:26:15 +01:00
mod response;
mod sync_round;
2016-12-13 21:09:43 +01:00
#[cfg(test)]
mod tests;
// Base value for the header request timeout.
const REQ_TIMEOUT_BASE: Duration = Duration::from_secs(7);
// Additional value for each requested header.
// If we request N headers, then the timeout will be:
// REQ_TIMEOUT_BASE + N * REQ_TIMEOUT_PER_HEADER
const REQ_TIMEOUT_PER_HEADER: Duration = Duration::from_millis(10);
2016-12-13 21:09:43 +01:00
/// Peer chain info.
#[derive(Debug, Clone, PartialEq, Eq)]
2016-12-13 21:09:43 +01:00
struct ChainInfo {
head_td: U256,
head_hash: H256,
head_num: u64,
}
impl PartialOrd for ChainInfo {
fn partial_cmp(&self, other: &Self) -> Option<::std::cmp::Ordering> {
self.head_td.partial_cmp(&other.head_td)
}
}
impl Ord for ChainInfo {
fn cmp(&self, other: &Self) -> ::std::cmp::Ordering {
self.head_td.cmp(&other.head_td)
}
}
2016-12-14 23:26:15 +01:00
struct Peer {
status: ChainInfo,
2016-12-13 21:09:43 +01:00
}
2016-12-14 23:26:15 +01:00
impl Peer {
2016-12-16 15:26:39 +01:00
// Create a new peer.
2016-12-14 23:26:15 +01:00
fn new(chain_info: ChainInfo) -> Self {
2016-12-16 15:26:39 +01:00
Peer { status: chain_info }
2016-12-13 21:09:43 +01:00
}
}
2016-12-16 14:53:36 +01:00
// search for a common ancestor with the best chain.
#[derive(Debug)]
2016-12-16 14:53:36 +01:00
enum AncestorSearch {
Queued(u64), // queued to search for blocks starting from here.
2017-03-16 23:51:47 +01:00
Awaiting(ReqId, u64, HeadersRequest), // awaiting response for this request.
2016-12-16 14:53:36 +01:00
Prehistoric, // prehistoric block found. TODO: start to roll back CHTs.
FoundCommon(u64, H256), // common block found.
Genesis, // common ancestor is the genesis.
}
impl AncestorSearch {
fn begin(best_num: u64) -> Self {
match best_num {
0 => AncestorSearch::Genesis,
2016-12-16 15:26:39 +01:00
_ => AncestorSearch::Queued(best_num),
2016-12-16 14:53:36 +01:00
}
}
2020-08-05 06:08:03 +02:00
2020-07-29 10:36:15 +02:00
fn process_response<L>(self, ctx: &dyn ResponseContext, client: &L) -> AncestorSearch
where
L: AsLightClient,
2016-12-16 14:53:36 +01:00
{
let client = client.as_light_client();
2016-12-16 14:53:36 +01:00
let first_num = client.chain_info().first_block_number.unwrap_or(0);
match self {
AncestorSearch::Awaiting(id, start, req) => {
if &id == ctx.req_id() {
2017-03-16 23:51:47 +01:00
match response::verify(ctx.data(), &req) {
2016-12-16 14:53:36 +01:00
Ok(headers) => {
for header in &headers {
if client.is_known(&header.hash()) {
debug!(target: "sync", "Found common ancestor with best chain");
return AncestorSearch::FoundCommon(
header.number(),
header.hash(),
);
}
2020-08-05 06:08:03 +02:00
if header.number() < first_num {
2016-12-16 14:53:36 +01:00
debug!(target: "sync", "Prehistoric common ancestor with best chain.");
return AncestorSearch::Prehistoric;
}
2020-08-05 06:08:03 +02:00
}
let probe = start - headers.len() as u64;
if probe == 0 {
AncestorSearch::Genesis
} else {
AncestorSearch::Queued(probe)
2016-12-16 14:53:36 +01:00
}
2020-08-05 06:08:03 +02:00
}
2016-12-16 14:53:36 +01:00
Err(e) => {
trace!(target: "sync", "Bad headers response from {}: {}", ctx.responder(), e);
2020-08-05 06:08:03 +02:00
2016-12-16 14:53:36 +01:00
ctx.punish_responder();
AncestorSearch::Queued(start)
}
2020-08-05 06:08:03 +02:00
}
2016-12-16 14:53:36 +01:00
} else {
AncestorSearch::Awaiting(id, start, req)
}
2020-08-05 06:08:03 +02:00
}
2016-12-16 14:53:36 +01:00
other => other,
}
2020-08-05 06:08:03 +02:00
}
fn requests_abandoned(self, req_ids: &[ReqId]) -> AncestorSearch {
match self {
AncestorSearch::Awaiting(id, start, req) => {
if req_ids.iter().find(|&x| x == &id).is_some() {
AncestorSearch::Queued(start)
} else {
AncestorSearch::Awaiting(id, start, req)
}
}
other => other,
}
}
2020-08-05 06:08:03 +02:00
2016-12-16 15:26:39 +01:00
fn dispatch_request<F>(self, mut dispatcher: F) -> AncestorSearch
2017-03-16 23:51:47 +01:00
where
F: FnMut(HeadersRequest) -> Option<ReqId>,
2016-12-16 15:26:39 +01:00
{
2017-03-16 23:51:47 +01:00
const BATCH_SIZE: u64 = 64;
2020-08-05 06:08:03 +02:00
2016-12-16 15:26:39 +01:00
match self {
AncestorSearch::Queued(start) => {
2017-03-16 23:51:47 +01:00
let batch_size = ::std::cmp::min(start, BATCH_SIZE);
trace!(target: "sync", "Requesting {} reverse headers from {} to find common ancestor",
batch_size, start);
2017-03-16 23:51:47 +01:00
let req = HeadersRequest {
2016-12-16 15:26:39 +01:00
start: start.into(),
max: batch_size,
2016-12-16 15:26:39 +01:00
skip: 0,
reverse: true,
};
2020-08-05 06:08:03 +02:00
2016-12-16 15:26:39 +01:00
match dispatcher(req.clone()) {
Some(req_id) => AncestorSearch::Awaiting(req_id, start, req),
None => AncestorSearch::Queued(start),
}
}
other => other,
}
}
2016-12-15 21:51:08 +01:00
}
// synchronization state machine.
#[derive(Debug)]
2016-12-15 21:51:08 +01:00
enum SyncState {
// Idle (waiting for peers) or at chain head.
2016-12-15 21:51:08 +01:00
Idle,
// searching for common ancestor with best chain.
// queue should be cleared at this phase.
AncestorSearch(AncestorSearch),
// Doing sync rounds.
Rounds(SyncRound),
}
/// A wrapper around the SyncState that makes sure to
/// update the giving reference to `is_idle`
#[derive(Debug)]
struct SyncStateWrapper {
state: SyncState,
}
impl SyncStateWrapper {
/// Create a new wrapper for SyncState::Idle
pub fn idle() -> Self {
SyncStateWrapper {
state: SyncState::Idle,
}
}
2020-08-05 06:08:03 +02:00
/// Set the new state's value, making sure `is_idle` gets updated
pub fn set(&mut self, state: SyncState, is_idle_handle: &mut bool) {
*is_idle_handle = match state {
SyncState::Idle => true,
_ => false,
};
self.state = state;
}
2020-08-05 06:08:03 +02:00
/// Returns the internal state's value
pub fn into_inner(self) -> SyncState {
self.state
}
}
impl Deref for SyncStateWrapper {
type Target = SyncState;
fn deref(&self) -> &SyncState {
&self.state
}
}
2016-12-15 21:51:08 +01:00
struct ResponseCtx<'a> {
peer: PeerId,
req_id: ReqId,
2020-07-29 10:36:15 +02:00
ctx: &'a dyn BasicContext,
2017-03-16 23:51:47 +01:00
data: &'a [encoded::Header],
2016-12-15 21:51:08 +01:00
}
impl<'a> ResponseContext for ResponseCtx<'a> {
fn responder(&self) -> PeerId {
self.peer
}
fn req_id(&self) -> &ReqId {
&self.req_id
}
2017-03-16 23:51:47 +01:00
fn data(&self) -> &[encoded::Header] {
self.data
}
2016-12-15 21:51:08 +01:00
fn punish_responder(&self) {
self.ctx.disable_peer(self.peer)
}
}
2016-12-13 21:09:43 +01:00
/// Light client synchronization manager. See module docs for more details.
pub struct LightSync<L: AsLightClient> {
2017-03-23 03:23:53 +01:00
start_block_number: u64,
best_seen: Mutex<Option<ChainInfo>>, // best seen block on the network.
peers: RwLock<HashMap<PeerId, Mutex<Peer>>>, // peers which are relevant to synchronization.
pending_reqs: Mutex<HashMap<ReqId, PendingReq>>, // requests from this handler
2016-12-14 23:26:15 +01:00
client: Arc<L>,
2016-12-16 15:26:39 +01:00
rng: Mutex<OsRng>,
state: Mutex<SyncStateWrapper>,
// We duplicate this state tracking to avoid deadlocks in `is_major_importing`.
is_idle: Mutex<bool>,
2016-12-13 21:09:43 +01:00
}
#[derive(Debug, Clone)]
struct PendingReq {
started: Instant,
timeout: Duration,
}
impl<L: AsLightClient + Send + Sync> Handler for LightSync<L> {
fn on_connect(
&self,
2020-07-29 10:36:15 +02:00
ctx: &dyn EventContext,
status: &Status,
capabilities: &Capabilities,
) -> PeerStatus {
use std::cmp;
2020-08-05 06:08:03 +02:00
if capabilities.serve_headers {
let chain_info = ChainInfo {
head_td: status.head_td,
head_hash: status.head_hash,
head_num: status.head_num,
};
2020-08-05 06:08:03 +02:00
{
let mut best = self.best_seen.lock();
*best = cmp::max(best.clone(), Some(chain_info.clone()));
}
2020-08-05 06:08:03 +02:00
self.peers
.write()
.insert(ctx.peer(), Mutex::new(Peer::new(chain_info)));
self.maintain_sync(ctx.as_basic());
2020-08-05 06:08:03 +02:00
PeerStatus::Kept
} else {
PeerStatus::Unkept
2016-12-15 21:51:08 +01:00
}
}
2020-08-05 06:08:03 +02:00
2020-07-29 10:36:15 +02:00
fn on_disconnect(&self, ctx: &dyn EventContext, unfulfilled: &[ReqId]) {
2016-12-14 23:26:15 +01:00
let peer_id = ctx.peer();
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
let peer = match self.peers.write().remove(&peer_id).map(|p| p.into_inner()) {
Some(peer) => peer,
None => return,
};
2020-08-05 06:08:03 +02:00
2016-12-16 22:09:29 +01:00
trace!(target: "sync", "peer {} disconnecting", peer_id);
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
let new_best = {
let mut best = self.best_seen.lock();
2020-08-05 06:08:03 +02:00
if best.as_ref().map_or(false, |b| b == &peer.status) {
2016-12-15 21:51:08 +01:00
// search for next-best block.
let next_best: Option<ChainInfo> = self
.peers
.read()
.values()
.map(|p| p.lock().status.clone())
.map(Some)
.fold(None, ::std::cmp::max);
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
*best = next_best;
}
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
best.clone()
};
2020-08-05 06:08:03 +02:00
{
let mut pending_reqs = self.pending_reqs.lock();
for unfulfilled in unfulfilled {
pending_reqs.remove(&unfulfilled);
}
}
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
if new_best.is_none() {
debug!(target: "sync", "No peers remain. Reverting to idle");
self.set_state(&mut self.state.lock(), SyncState::Idle);
2016-12-15 21:51:08 +01:00
} else {
let mut state = self.state.lock();
2020-08-05 06:08:03 +02:00
let next_state = match mem::replace(&mut *state, SyncStateWrapper::idle()).into_inner()
{
2016-12-15 21:51:08 +01:00
SyncState::Idle => SyncState::Idle,
SyncState::AncestorSearch(search) => {
SyncState::AncestorSearch(search.requests_abandoned(unfulfilled))
2020-08-05 06:08:03 +02:00
}
2016-12-15 21:51:08 +01:00
SyncState::Rounds(round) => {
SyncState::Rounds(round.requests_abandoned(unfulfilled))
2020-08-05 06:08:03 +02:00
}
2016-12-15 21:51:08 +01:00
};
self.set_state(&mut state, next_state);
2016-12-15 21:51:08 +01:00
}
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
self.maintain_sync(ctx.as_basic());
}
2020-08-05 06:08:03 +02:00
2020-07-29 10:36:15 +02:00
fn on_announcement(&self, ctx: &dyn EventContext, announcement: &Announcement) {
let (last_td, chain_info) = {
2016-12-15 21:51:08 +01:00
let peers = self.peers.read();
2016-12-16 22:09:29 +01:00
match peers.get(&ctx.peer()) {
2016-12-15 21:51:08 +01:00
None => return,
Some(peer) => {
let mut peer = peer.lock();
let last_td = peer.status.head_td;
peer.status = ChainInfo {
head_td: announcement.head_td,
head_hash: announcement.head_hash,
head_num: announcement.head_num,
};
(last_td, peer.status.clone())
2016-12-15 21:51:08 +01:00
}
}
};
2020-08-05 06:08:03 +02:00
trace!(target: "sync", "Announcement from peer {}: new chain head {:?}, reorg depth {}",
ctx.peer(), (announcement.head_hash, announcement.head_num), announcement.reorg_depth);
2016-12-15 21:51:08 +01:00
2016-12-16 23:53:36 +01:00
if last_td > announcement.head_td {
2016-12-15 21:51:08 +01:00
trace!(target: "sync", "Peer {} moved backwards.", ctx.peer());
self.peers.write().remove(&ctx.peer());
ctx.disconnect_peer(ctx.peer());
return;
2016-12-15 21:51:08 +01:00
}
2020-08-05 06:08:03 +02:00
2016-12-16 22:09:29 +01:00
{
let mut best = self.best_seen.lock();
*best = ::std::cmp::max(best.clone(), Some(chain_info));
2016-12-15 21:51:08 +01:00
}
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
self.maintain_sync(ctx.as_basic());
}
2020-08-05 06:08:03 +02:00
2020-07-29 10:36:15 +02:00
fn on_responses(&self, ctx: &dyn EventContext, req_id: ReqId, responses: &[request::Response]) {
2017-03-16 23:51:47 +01:00
let peer = ctx.peer();
if !self.peers.read().contains_key(&peer) {
return;
2016-12-15 21:51:08 +01:00
}
2020-08-05 06:08:03 +02:00
if self.pending_reqs.lock().remove(&req_id).is_none() {
return;
}
2020-08-05 06:08:03 +02:00
2017-03-16 23:51:47 +01:00
let headers = match responses.get(0) {
Some(&request::Response::Headers(ref response)) => &response.headers[..],
Some(_) => {
trace!("Disabling peer {} for wrong response type.", peer);
ctx.disable_peer(peer);
&[]
}
None => &[],
};
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
{
let mut state = self.state.lock();
2020-08-05 06:08:03 +02:00
2016-12-16 14:53:36 +01:00
let ctx = ResponseCtx {
peer: ctx.peer(),
req_id: req_id,
ctx: ctx.as_basic(),
data: headers,
};
2020-08-05 06:08:03 +02:00
let next_state = match mem::replace(&mut *state, SyncStateWrapper::idle()).into_inner()
{
2016-12-15 21:51:08 +01:00
SyncState::Idle => SyncState::Idle,
2016-12-16 14:53:36 +01:00
SyncState::AncestorSearch(search) => {
SyncState::AncestorSearch(search.process_response(&ctx, &*self.client))
2020-08-05 06:08:03 +02:00
}
2016-12-16 14:53:36 +01:00
SyncState::Rounds(round) => SyncState::Rounds(round.process_response(&ctx)),
2016-12-15 21:51:08 +01:00
};
self.set_state(&mut state, next_state);
2016-12-15 21:51:08 +01:00
}
2020-08-05 06:08:03 +02:00
2016-12-15 21:51:08 +01:00
self.maintain_sync(ctx.as_basic());
}
2020-08-05 06:08:03 +02:00
2020-07-29 10:36:15 +02:00
fn tick(&self, ctx: &dyn BasicContext) {
2016-12-15 21:51:08 +01:00
self.maintain_sync(ctx);
}
}
// private helpers
impl<L: AsLightClient> LightSync<L> {
/// Sets the LightSync's state, and update
/// `is_idle`
fn set_state(&self, state: &mut SyncStateWrapper, next_state: SyncState) {
state.set(next_state, &mut self.is_idle.lock());
}
2020-08-05 06:08:03 +02:00
// Begins a search for the common ancestor and our best block.
// does not lock state, instead has a mutable reference to it passed.
fn begin_search(&self, state: &mut SyncStateWrapper) {
2016-12-16 14:53:36 +01:00
if let None = *self.best_seen.lock() {
// no peers.
self.set_state(state, SyncState::Idle);
2016-12-16 14:53:36 +01:00
return;
}
2020-08-05 06:08:03 +02:00
self.client.as_light_client().flush_queue();
let chain_info = self.client.as_light_client().chain_info();
2020-08-05 06:08:03 +02:00
trace!(target: "sync", "Beginning search for common ancestor from {:?}",
(chain_info.best_block_number, chain_info.best_block_hash));
let next_state =
SyncState::AncestorSearch(AncestorSearch::begin(chain_info.best_block_number));
self.set_state(state, next_state);
}
2020-08-05 06:08:03 +02:00
// handles request dispatch, block import, state machine transitions, and timeouts.
2020-07-29 10:36:15 +02:00
fn maintain_sync(&self, ctx: &dyn BasicContext) {
use ethcore::error::{
Error as EthcoreError, ErrorKind as EthcoreErrorKind, ImportErrorKind,
};
2020-08-05 06:08:03 +02:00
const DRAIN_AMOUNT: usize = 128;
2020-08-05 06:08:03 +02:00
let client = self.client.as_light_client();
let chain_info = client.chain_info();
2020-08-05 06:08:03 +02:00
let mut state = self.state.lock();
debug!(target: "sync", "Maintaining sync ({:?})", **state);
2020-08-05 06:08:03 +02:00
// drain any pending blocks into the queue.
{
let mut sink = Vec::with_capacity(DRAIN_AMOUNT);
2020-08-05 06:08:03 +02:00
'a: loop {
if client.queue_info().is_full() {
break;
}
2020-08-05 06:08:03 +02:00
let next_state =
match mem::replace(&mut *state, SyncStateWrapper::idle()).into_inner() {
SyncState::Rounds(round) => {
SyncState::Rounds(round.drain(&mut sink, Some(DRAIN_AMOUNT)))
2020-08-05 06:08:03 +02:00
}
other => other,
};
self.set_state(&mut state, next_state);
2020-08-05 06:08:03 +02:00
if sink.is_empty() {
break;
}
trace!(target: "sync", "Drained {} headers to import", sink.len());
2020-08-05 06:08:03 +02:00
for header in sink.drain(..) {
match client.queue_header(header) {
Ok(_) => {}
Err(EthcoreError(
EthcoreErrorKind::Import(ImportErrorKind::AlreadyInChain),
_,
)) => {
trace!(target: "sync", "Block already in chain. Continuing.");
}
Err(EthcoreError(
EthcoreErrorKind::Import(ImportErrorKind::AlreadyQueued),
_,
)) => {
trace!(target: "sync", "Block already queued. Continuing.");
}
Err(e) => {
debug!(target: "sync", "Found bad header ({:?}). Reset to search state.", e);
2020-08-05 06:08:03 +02:00
self.begin_search(&mut state);
break 'a;
}
}
}
2020-08-05 06:08:03 +02:00
}
}
2016-12-16 14:53:36 +01:00
// handle state transitions.
{
let best_td = chain_info.pending_total_difficulty;
let sync_target = match *self.best_seen.lock() {
Some(ref target) if target.head_td > best_td => (target.head_num, target.head_hash),
ref other => {
let network_score = other.as_ref().map(|target| target.head_td);
trace!(target: "sync", "No target to sync to. Network score: {:?}, Local score: {:?}",
network_score, best_td);
self.set_state(&mut state, SyncState::Idle);
return;
}
};
2020-08-05 06:08:03 +02:00
match mem::replace(&mut *state, SyncStateWrapper::idle()).into_inner() {
SyncState::Rounds(SyncRound::Abort(reason, remaining)) => {
if remaining.len() > 0 {
self.set_state(
&mut state,
SyncState::Rounds(SyncRound::Abort(reason, remaining)),
);
return;
}
2020-08-05 06:08:03 +02:00
match reason {
AbortReason::BadScaffold(bad_peers) => {
debug!(target: "sync", "Disabling peers responsible for bad scaffold");
for peer in bad_peers {
ctx.disable_peer(peer);
}
}
AbortReason::NoResponses => {}
AbortReason::TargetReached => {
debug!(target: "sync", "Sync target reached. Going idle");
self.set_state(&mut state, SyncState::Idle);
return;
2020-08-05 06:08:03 +02:00
}
}
2020-08-05 06:08:03 +02:00
debug!(target: "sync", "Beginning search after aborted sync round");
self.begin_search(&mut state);
}
2016-12-16 14:53:36 +01:00
SyncState::AncestorSearch(AncestorSearch::FoundCommon(num, hash)) => {
self.set_state(
&mut state,
SyncState::Rounds(SyncRound::begin((num, hash), sync_target)),
);
2016-12-16 14:53:36 +01:00
}
SyncState::AncestorSearch(AncestorSearch::Genesis) => {
// Same here.
let g_hash = chain_info.genesis_hash;
self.set_state(
&mut state,
SyncState::Rounds(SyncRound::begin((0, g_hash), sync_target)),
);
2016-12-16 14:53:36 +01:00
}
SyncState::Idle => self.begin_search(&mut state),
other => self.set_state(&mut state, other), // restore displaced state.
}
2020-08-05 06:08:03 +02:00
}
// handle requests timeouts
{
let mut pending_reqs = self.pending_reqs.lock();
let mut unfulfilled = Vec::new();
for (req_id, info) in pending_reqs.iter() {
if info.started.elapsed() >= info.timeout {
debug!(target: "sync", "{} timed out", req_id);
unfulfilled.push(req_id.clone());
}
}
2020-08-05 06:08:03 +02:00
if !unfulfilled.is_empty() {
for unfulfilled in unfulfilled.iter() {
pending_reqs.remove(unfulfilled);
}
drop(pending_reqs);
2020-08-05 06:08:03 +02:00
let next_state =
match mem::replace(&mut *state, SyncStateWrapper::idle()).into_inner() {
SyncState::Idle => SyncState::Idle,
SyncState::AncestorSearch(search) => {
SyncState::AncestorSearch(search.requests_abandoned(&unfulfilled))
2020-08-05 06:08:03 +02:00
}
SyncState::Rounds(round) => {
SyncState::Rounds(round.requests_abandoned(&unfulfilled))
2020-08-05 06:08:03 +02:00
}
};
self.set_state(&mut state, next_state);
}
2020-08-05 06:08:03 +02:00
}
// allow dispatching of requests.
{
2016-12-16 15:26:39 +01:00
let peers = self.peers.read();
let mut peer_ids: Vec<_> = peers
.iter()
.filter_map(|(id, p)| {
if p.lock().status.head_td > chain_info.pending_total_difficulty {
Some(*id)
} else {
None
}
})
.collect();
2020-08-05 06:08:03 +02:00
2016-12-16 15:26:39 +01:00
let mut rng = self.rng.lock();
let mut requested_from = HashSet::new();
2020-08-05 06:08:03 +02:00
2016-12-16 15:26:39 +01:00
// naive request dispatcher: just give to any peer which says it will
// give us responses. but only one request per peer per state transition.
2017-03-16 23:51:47 +01:00
let dispatcher = move |req: HeadersRequest| {
2016-12-16 15:26:39 +01:00
rng.shuffle(&mut peer_ids);
2020-08-05 06:08:03 +02:00
2017-03-16 23:51:47 +01:00
let request = {
2017-09-24 19:18:17 +02:00
let mut builder = request::Builder::default();
2017-03-16 23:51:47 +01:00
builder.push(request::Request::Headers(request::IncompleteHeadersRequest {
start: req.start.into(),
skip: req.skip,
max: req.max,
reverse: req.reverse,
})).expect("request provided fully complete with no unresolved back-references; qed");
builder.build()
};
2016-12-16 15:26:39 +01:00
for peer in &peer_ids {
if requested_from.contains(peer) {
continue;
}
2017-03-16 23:51:47 +01:00
match ctx.request_from(*peer, request.clone()) {
Ok(id) => {
assert!(
req.max <= u32::max_value() as u64,
"requesting more than 2^32 headers at a time would overflow"
);
let timeout =
REQ_TIMEOUT_BASE + REQ_TIMEOUT_PER_HEADER * req.max as u32;
self.pending_reqs.lock().insert(
id.clone(),
PendingReq {
started: Instant::now(),
timeout,
},
);
requested_from.insert(peer.clone());
2020-08-05 06:08:03 +02:00
2017-03-16 23:51:47 +01:00
return Some(id);
2016-12-16 15:26:39 +01:00
}
2017-03-16 23:51:47 +01:00
Err(NetError::NoCredits) => {}
Err(e) => {
trace!(target: "sync", "Error requesting headers from viable peer: {}", e)
2016-12-16 15:26:39 +01:00
}
2020-08-05 06:08:03 +02:00
}
}
2016-12-16 15:26:39 +01:00
None
};
2020-08-05 06:08:03 +02:00
let next_state = match mem::replace(&mut *state, SyncStateWrapper::idle()).into_inner()
{
2016-12-16 15:26:39 +01:00
SyncState::Rounds(round) => SyncState::Rounds(round.dispatch_requests(dispatcher)),
SyncState::AncestorSearch(search) => {
SyncState::AncestorSearch(search.dispatch_request(dispatcher))
2020-08-05 06:08:03 +02:00
}
other => other,
};
self.set_state(&mut state, next_state);
Fix light client deadlock (#9385) This PR is fixing deadlock for #8918 It avoids some recursive calls on light_sync by making state check optional for Informant. The current behavior is to display the information when informant checks if block is major version. This change a bit the informant behavior, but not on most cases. To remember where and how this kind of deadlock are likely to happen (not seen with Parkinglot deadlock detection because it uses std condvar), I am adding a description of the deadlock. Also, for the reviewers there may be better solution than modifying the informant. ### Thread1 - ethcore/sync/light_sync/mod.rs A call to the light handler through any Io (having a loop of rpc query running on like client makes the dead lock way more likely). At the end of those calls we systematically call `maintain_sync` method. Here maintain_sync locks `state` (it is the deadlock cause), with a write purpose `maintain_sync` -> `begin_search` with the state locked open `begin_search` -> lightcliennt `flush_queue` method - ethcore/light/src/client/mod.rs `flush_queue` -> `flush` on queue (HeaderQueue aka VerificationQueue of headers) - ethcore/src/verification/queue/mod.rs Condition there is some unverified or verifying content `flush` wait on a condvar until the queue is empty. The only way to unlock condvar is that worker is empty and unlock it (so thread 2 is Verification worker). ### Thread2 A verification worker at the end of a verify loop (new block). - ethcore/src/verification/queue/mod.rs thread loops on `verify` method. End of loop condition is_ready -> Import the block immediately calls `set_sync` on QueueSignal which send a BlockVerified ClientIoMessage in inner channel (IoChannel of ClientIoMessage) using `send_sync` - util/io/src/service_mio.rs IoChannel `send_sync` method calls all handlers with `message` method; one of the handlers is ImportBlocks IoHandler (with a single inner Client service field) - ethcore/light/src/client/service.rs `message` trigger inner method `import_verified` - core/light/src/client/mod.rs `import_verified` at the very end notify the listeners of a new_headers, one of the listeners is Informant `listener` method - parity/informant.rs `newHeaders` run up to call to `is_major_importing` on its target (again clinet) - ethcore/sync/src/light_sync/mod.rs Here `is_major_importing` tries to get state lock (read purpose only) but cannot because of previous state lock, thus deadlock
2018-09-04 16:36:34 +02:00
}
}
2016-12-13 21:09:43 +01:00
}
// public API
impl<L: AsLightClient> LightSync<L> {
2016-12-13 21:09:43 +01:00
/// Create a new instance of `LightSync`.
///
/// This won't do anything until registered as a handler
/// so it can act on events.
pub fn new(client: Arc<L>) -> Result<Self, ::std::io::Error> {
Ok(LightSync {
2017-03-23 03:23:53 +01:00
start_block_number: client.as_light_client().chain_info().best_block_number,
2016-12-13 21:09:43 +01:00
best_seen: Mutex::new(None),
peers: RwLock::new(HashMap::new()),
pending_reqs: Mutex::new(HashMap::new()),
2016-12-13 21:09:43 +01:00
client: client,
2017-06-18 16:15:44 +02:00
rng: Mutex::new(OsRng::new()?),
state: Mutex::new(SyncStateWrapper::idle()),
is_idle: Mutex::new(true),
})
2016-12-13 21:09:43 +01:00
}
}
2017-03-23 03:23:53 +01:00
/// Trait for erasing the type of a light sync object and exposing read-only methods.
pub trait SyncInfo {
/// Get the highest block advertised on the network.
fn highest_block(&self) -> Option<u64>;
/// Get the block number at the time of sync start.
fn start_block(&self) -> u64;
/// Whether major sync is underway.
fn is_major_importing(&self) -> bool;
}
impl<L: AsLightClient> SyncInfo for LightSync<L> {
fn highest_block(&self) -> Option<u64> {
self.best_seen.lock().as_ref().map(|x| x.head_num)
}
fn start_block(&self) -> u64 {
self.start_block_number
2020-08-05 06:08:03 +02:00
}
fn is_major_importing(&self) -> bool {
const EMPTY_QUEUE: usize = 3;
2017-03-23 03:23:53 +01:00
let queue_info = self.client.as_light_client().queue_info();
let is_verifying =
queue_info.unverified_queue_size + queue_info.verified_queue_size > EMPTY_QUEUE;
let is_syncing = !*self.is_idle.lock();
Fix light client deadlock (#9385) This PR is fixing deadlock for #8918 It avoids some recursive calls on light_sync by making state check optional for Informant. The current behavior is to display the information when informant checks if block is major version. This change a bit the informant behavior, but not on most cases. To remember where and how this kind of deadlock are likely to happen (not seen with Parkinglot deadlock detection because it uses std condvar), I am adding a description of the deadlock. Also, for the reviewers there may be better solution than modifying the informant. ### Thread1 - ethcore/sync/light_sync/mod.rs A call to the light handler through any Io (having a loop of rpc query running on like client makes the dead lock way more likely). At the end of those calls we systematically call `maintain_sync` method. Here maintain_sync locks `state` (it is the deadlock cause), with a write purpose `maintain_sync` -> `begin_search` with the state locked open `begin_search` -> lightcliennt `flush_queue` method - ethcore/light/src/client/mod.rs `flush_queue` -> `flush` on queue (HeaderQueue aka VerificationQueue of headers) - ethcore/src/verification/queue/mod.rs Condition there is some unverified or verifying content `flush` wait on a condvar until the queue is empty. The only way to unlock condvar is that worker is empty and unlock it (so thread 2 is Verification worker). ### Thread2 A verification worker at the end of a verify loop (new block). - ethcore/src/verification/queue/mod.rs thread loops on `verify` method. End of loop condition is_ready -> Import the block immediately calls `set_sync` on QueueSignal which send a BlockVerified ClientIoMessage in inner channel (IoChannel of ClientIoMessage) using `send_sync` - util/io/src/service_mio.rs IoChannel `send_sync` method calls all handlers with `message` method; one of the handlers is ImportBlocks IoHandler (with a single inner Client service field) - ethcore/light/src/client/service.rs `message` trigger inner method `import_verified` - core/light/src/client/mod.rs `import_verified` at the very end notify the listeners of a new_headers, one of the listeners is Informant `listener` method - parity/informant.rs `newHeaders` run up to call to `is_major_importing` on its target (again clinet) - ethcore/sync/src/light_sync/mod.rs Here `is_major_importing` tries to get state lock (read purpose only) but cannot because of previous state lock, thus deadlock
2018-09-04 16:36:34 +02:00
2016-12-13 21:09:43 +01:00
is_verifying || is_syncing
2020-08-05 06:08:03 +02:00
}
2017-03-23 03:23:53 +01:00
}