From 1b6ebe1a6d3425612b4483f34c5a8e5610ce5039 Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Mon, 5 Dec 2016 18:18:56 +0100 Subject: [PATCH] possible fix for queue drop deadlock (#3702) * possible fix for #3686 * queue: simplify conclusion, don't block on joining * queue: park verifiers with timeout to prevent race * more robust verification loop * queue: re-introduce wait for verifier joining --- ethcore/src/verification/queue/mod.rs | 201 +++++++++++--------------- 1 file changed, 84 insertions(+), 117 deletions(-) diff --git a/ethcore/src/verification/queue/mod.rs b/ethcore/src/verification/queue/mod.rs index 686a1d093..14df0819c 100644 --- a/ethcore/src/verification/queue/mod.rs +++ b/ethcore/src/verification/queue/mod.rs @@ -17,7 +17,7 @@ //! A queue of blocks. Sits between network or other I/O and the `BlockChain`. //! Sorts them ready for blockchain insertion. -use std::thread::{JoinHandle, self}; +use std::thread::{self, JoinHandle}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering as AtomicOrdering}; use std::sync::{Condvar as SCondvar, Mutex as SMutex}; use util::*; @@ -64,35 +64,11 @@ impl Default for Config { } } -struct VerifierHandle { - deleting: Arc, - sleep: Arc, - thread: JoinHandle<()>, -} - -impl VerifierHandle { - // signal to the verifier thread that it should sleep. - fn sleep(&self) { - self.sleep.store(true, AtomicOrdering::SeqCst); - } - - // signal to the verifier thread that it should wake up. - fn wake_up(&self) { - self.sleep.store(false, AtomicOrdering::SeqCst); - self.thread.thread().unpark(); - } - - // signal to the verifier thread that it should conclude its - // operations. - fn conclude(&self) { - self.wake_up(); - self.deleting.store(true, AtomicOrdering::Release); - } - - // join the verifier thread. - fn join(self) { - self.thread.join().expect("Verifier thread panicked"); - } +// pool states +enum State { + // all threads with id < inner value are to work. + Work(usize), + Exit, } /// An item which is in the process of being verified. @@ -131,7 +107,6 @@ pub struct VerificationQueue { engine: Arc, more_to_verify: Arc, verification: Arc>, - verifiers: Mutex<(Vec, usize)>, deleting: Arc, ready_signal: Arc, empty: Arc, @@ -139,6 +114,8 @@ pub struct VerificationQueue { ticks_since_adjustment: AtomicUsize, max_queue_size: usize, max_mem_use: usize, + verifier_handles: Vec>, + state: Arc<(Mutex, Condvar)>, } struct QueueSignal { @@ -224,40 +201,39 @@ impl VerificationQueue { let max_verifiers = min(::num_cpus::get(), MAX_VERIFIERS); let default_amount = max(::num_cpus::get(), 3) - 2; - let mut verifiers = Vec::with_capacity(max_verifiers); + let state = Arc::new((Mutex::new(State::Work(default_amount)), Condvar::new())); + let mut verifier_handles = Vec::with_capacity(max_verifiers); debug!(target: "verification", "Allocating {} verifiers, {} initially active", max_verifiers, default_amount); for i in 0..max_verifiers { debug!(target: "verification", "Adding verification thread #{}", i); - let deleting = deleting.clone(); let panic_handler = panic_handler.clone(); let verification = verification.clone(); let engine = engine.clone(); let wait = more_to_verify.clone(); let ready = ready_signal.clone(); let empty = empty.clone(); + let state = state.clone(); - // enable only the first few verifiers. - let sleep = if i < default_amount { - Arc::new(AtomicBool::new(false)) - } else { - Arc::new(AtomicBool::new(true)) - }; - - verifiers.push(VerifierHandle { - deleting: deleting.clone(), - sleep: sleep.clone(), - thread: thread::Builder::new() - .name(format!("Verifier #{}", i)) - .spawn(move || { - panic_handler.catch_panic(move || { - VerificationQueue::verify(verification, engine, wait, ready, deleting, empty, sleep) - }).unwrap() - }) - .expect("Failed to create verifier thread.") - }); + let handle = thread::Builder::new() + .name(format!("Verifier #{}", i)) + .spawn(move || { + panic_handler.catch_panic(move || { + VerificationQueue::verify( + verification, + engine, + wait, + ready, + empty, + state, + i, + ) + }).unwrap() + }) + .expect("Failed to create verifier thread."); + verifier_handles.push(handle); } VerificationQueue { @@ -266,13 +242,14 @@ impl VerificationQueue { ready_signal: ready_signal, more_to_verify: more_to_verify, verification: verification, - verifiers: Mutex::new((verifiers, default_amount)), deleting: deleting, processing: RwLock::new(HashSet::new()), empty: empty, ticks_since_adjustment: AtomicUsize::new(0), max_queue_size: max(config.max_queue_size, MIN_QUEUE_LIMIT), max_mem_use: max(config.max_mem_use, MIN_MEM_LIMIT), + verifier_handles: verifier_handles, + state: state, } } @@ -281,23 +258,30 @@ impl VerificationQueue { engine: Arc, wait: Arc, ready: Arc, - deleting: Arc, empty: Arc, - sleep: Arc, + state: Arc<(Mutex, Condvar)>, + id: usize, ) { - while !deleting.load(AtomicOrdering::Acquire) { + loop { + // check current state. { - while sleep.load(AtomicOrdering::SeqCst) { - trace!(target: "verification", "Verifier sleeping"); - ::std::thread::park(); - trace!(target: "verification", "Verifier waking up"); + let mut cur_state = state.0.lock(); + while let State::Work(x) = *cur_state { + // sleep until this thread is required. + if id < x { break } - if deleting.load(AtomicOrdering::Acquire) { - return; - } + debug!(target: "verification", "verifier {} sleeping", id); + state.1.wait(&mut cur_state); + debug!(target: "verification", "verifier {} waking up", id); + } + + if let State::Exit = *cur_state { + debug!(target: "verification", "verifier {} exiting", id); + break; } } + // wait for work if empty. { let mut more_to_verify = verification.more_to_verify.lock().unwrap(); @@ -305,15 +289,22 @@ impl VerificationQueue { empty.notify_all(); } - while verification.unverified.lock().is_empty() && !deleting.load(AtomicOrdering::Acquire) { + while verification.unverified.lock().is_empty() { + if let State::Exit = *state.0.lock() { + debug!(target: "verification", "verifier {} exiting", id); + return; + } + more_to_verify = wait.wait(more_to_verify).unwrap(); } - if deleting.load(AtomicOrdering::Acquire) { + if let State::Exit = *state.0.lock() { + debug!(target: "verification", "verifier {} exiting", id); return; } } + // do work. let item = { // acquire these locks before getting the item to verify. let mut unverified = verification.unverified.lock(); @@ -568,6 +559,14 @@ impl VerificationQueue { } } + /// Get the current number of working verifiers. + pub fn num_verifiers(&self) -> usize { + match *self.state.0.lock() { + State::Work(x) => x, + State::Exit => panic!("state only set to exit on drop; queue live now; qed"), + } + } + /// Optimise memory footprint of the heap fields, and adjust the number of threads /// to better suit the workload. pub fn collect_garbage(&self) { @@ -604,7 +603,7 @@ impl VerificationQueue { return; } - let current = self.verifiers.lock().1; + let current = self.num_verifiers(); let diff = (v_len - u_len).abs(); let total = v_len + u_len; @@ -626,27 +625,14 @@ impl VerificationQueue { // possible, never going over the amount of initially allocated threads // or below 1. fn scale_verifiers(&self, target: usize) { - let mut verifiers = self.verifiers.lock(); - let &mut (ref mut verifiers, ref mut verifier_count) = &mut *verifiers; - - let target = min(verifiers.len(), target); + let current = self.num_verifiers(); + let target = min(self.verifier_handles.len(), target); let target = max(1, target); - debug!(target: "verification", "Scaling from {} to {} verifiers", verifier_count, target); + debug!(target: "verification", "Scaling from {} to {} verifiers", current, target); - // scaling up - for i in *verifier_count..target { - debug!(target: "verification", "Waking up verifier {}", i); - verifiers[i].wake_up(); - } - - // scaling down. - for i in target..*verifier_count { - debug!(target: "verification", "Putting verifier {} to sleep", i); - verifiers[i].sleep(); - } - - *verifier_count = target; + *self.state.0.lock() = State::Work(target); + self.state.1.notify_all(); } } @@ -660,22 +646,18 @@ impl Drop for VerificationQueue { fn drop(&mut self) { trace!(target: "shutdown", "[VerificationQueue] Closing..."); self.clear(); - self.deleting.store(true, AtomicOrdering::Release); + self.deleting.store(true, AtomicOrdering::SeqCst); - let mut verifiers = self.verifiers.get_mut(); - let mut verifiers = &mut verifiers.0; - - // first pass to signal conclusion. must be done before - // notify or deadlock possible. - for handle in verifiers.iter() { - handle.conclude(); - } + // set exit state; should be done before `more_to_verify` notification. + *self.state.0.lock() = State::Exit; + self.state.1.notify_all(); + // wake up all threads waiting for more work. self.more_to_verify.notify_all(); - // second pass to join. - for handle in verifiers.drain(..) { - handle.join(); + // wait for all verifier threads to join. + for thread in self.verifier_handles.drain(..) { + thread.join().expect("Propagating verifier thread panic on shutdown"); } trace!(target: "shutdown", "[VerificationQueue] Closed."); @@ -687,7 +669,7 @@ mod tests { use util::*; use io::*; use spec::*; - use super::{BlockQueue, Config}; + use super::{BlockQueue, Config, State}; use super::kind::blocks::Unverified; use tests::helpers::*; use error::*; @@ -784,11 +766,11 @@ mod tests { let queue = get_test_queue(); queue.scale_verifiers(MAX_VERIFIERS + 1); - assert!(queue.verifiers.lock().1 < MAX_VERIFIERS + 1); + assert!(queue.num_verifiers() < MAX_VERIFIERS + 1); queue.scale_verifiers(0); - assert!(queue.verifiers.lock().1 == 1); + assert!(queue.num_verifiers() == 1); } #[test] @@ -797,14 +779,7 @@ mod tests { // put all the verifiers to sleep to ensure // the test isn't timing sensitive. - let num_verifiers = { - let verifiers = queue.verifiers.lock(); - for i in 0..verifiers.1 { - verifiers.0[i].sleep(); - } - - verifiers.1 - }; + *queue.state.0.lock() = State::Work(0); for block in get_good_dummy_block_seq(5000) { queue.import(Unverified::new(block)).expect("Block good by definition; qed"); @@ -812,20 +787,12 @@ mod tests { // almost all unverified == bump verifier count. queue.collect_garbage(); - assert_eq!(queue.verifiers.lock().1, num_verifiers + 1); - - // wake them up again and verify everything. - { - let verifiers = queue.verifiers.lock(); - for i in 0..verifiers.1 { - verifiers.0[i].wake_up(); - } - } + assert_eq!(queue.num_verifiers(), 1); queue.flush(); // nothing to verify == use minimum number of verifiers. queue.collect_garbage(); - assert_eq!(queue.verifiers.lock().1, 1); + assert_eq!(queue.num_verifiers(), 1); } }