From 1110e79e5e65f392397feb7012fad6ac31dc5abf Mon Sep 17 00:00:00 2001 From: nolash Date: Sat, 24 Apr 2021 08:03:49 +0200 Subject: [PATCH] Add health check for signer, redis, shutdown on missing signer --- apps/cic-eth/cic_eth/admin/ctrl.py | 8 ++++++- apps/cic-eth/cic_eth/error.py | 16 +++++++++----- apps/cic-eth/cic_eth/eth/account.py | 9 +++++++- apps/cic-eth/cic_eth/eth/erc20.py | 8 ++++++- .../cic_eth/runnable/daemons/tasker.py | 20 ++++++++++++----- apps/cic-eth/cic_eth/task.py | 22 ++++++++++++------- apps/cic-eth/config/cic.ini | 2 +- apps/cic-eth/config/docker/cic.ini | 2 +- apps/cic-eth/requirements.txt | 2 +- 9 files changed, 64 insertions(+), 25 deletions(-) diff --git a/apps/cic-eth/cic_eth/admin/ctrl.py b/apps/cic-eth/cic_eth/admin/ctrl.py index 0b55ffc5..af6dd647 100644 --- a/apps/cic-eth/cic_eth/admin/ctrl.py +++ b/apps/cic-eth/cic_eth/admin/ctrl.py @@ -2,7 +2,7 @@ import datetime import logging -# third-party imports +# external imports import celery from chainlib.eth.constant import ZERO_ADDRESS from chainlib.chain import ChainSpec @@ -145,3 +145,9 @@ def check_lock(chained_input, chain_spec_dict, lock_flags, address=None): session.flush() session.close() return chained_input + + +@celery_app.task() +def shutdown(message): + logg.critical('shutdown called: {}'.format(message)) + celery_app.control.shutdown() #broadcast('shutdown') diff --git a/apps/cic-eth/cic_eth/error.py b/apps/cic-eth/cic_eth/error.py index 17053158..781fed85 100644 --- a/apps/cic-eth/cic_eth/error.py +++ b/apps/cic-eth/cic_eth/error.py @@ -48,6 +48,8 @@ class RoleMissingError(Exception): pass + + class IntegrityError(Exception): """Exception raised to signal irregularities with deduplication and ordering of tasks @@ -62,15 +64,19 @@ class LockedError(Exception): pass -class SignerError(Exception): +class SeppukuError(Exception): + """Exception base class for all errors that should cause system shutdown + + """ + + +class SignerError(SeppukuError): """Exception raised when signer is unavailable or generates an error """ pass -class EthError(Exception): - """Exception raised when unspecified error from evm node is encountered - +class RoleAgencyError(SeppukuError): + """Exception raise when a role cannot perform its function. This is a critical exception """ - pass diff --git a/apps/cic-eth/cic_eth/eth/account.py b/apps/cic-eth/cic_eth/eth/account.py index e35760ce..8b521dfa 100644 --- a/apps/cic-eth/cic_eth/eth/account.py +++ b/apps/cic-eth/cic_eth/eth/account.py @@ -70,11 +70,18 @@ def create(self, password, chain_spec_dict): a = None conn = RPCConnection.connect(chain_spec, 'signer') o = new_account() - a = conn.do(o) + try: + a = conn.do(o) + except ConnectionError as e: + raise SignerError(e) + except FileNotFoundError as e: + raise SignerError(e) conn.disconnect() + # TODO: It seems infeasible that a can be None in any case, verify if a == None: raise SignerError('create account') + logg.debug('created account {}'.format(a)) # Initialize nonce provider record for account diff --git a/apps/cic-eth/cic_eth/eth/erc20.py b/apps/cic-eth/cic_eth/eth/erc20.py index 5bde32b9..bc2d593f 100644 --- a/apps/cic-eth/cic_eth/eth/erc20.py +++ b/apps/cic-eth/cic_eth/eth/erc20.py @@ -108,7 +108,13 @@ def transfer(self, tokens, holder_address, receiver_address, value, chain_spec_d nonce_oracle = CustodialTaskNonceOracle(holder_address, self.request.root_id, session=session) gas_oracle = self.create_gas_oracle(rpc, MaxGasOracle.gas) c = ERC20(chain_spec, signer=rpc_signer, gas_oracle=gas_oracle, nonce_oracle=nonce_oracle) - (tx_hash_hex, tx_signed_raw_hex) = c.transfer(t['address'], holder_address, receiver_address, value, tx_format=TxFormat.RLP_SIGNED) + try: + (tx_hash_hex, tx_signed_raw_hex) = c.transfer(t['address'], holder_address, receiver_address, value, tx_format=TxFormat.RLP_SIGNED) + except FileNotFoundError as e: + raise SignerError(e) + except ConnectionError as e: + raise SignerError(e) + rpc_signer.disconnect() rpc.disconnect() diff --git a/apps/cic-eth/cic_eth/runnable/daemons/tasker.py b/apps/cic-eth/cic_eth/runnable/daemons/tasker.py index 51886920..62a90080 100644 --- a/apps/cic-eth/cic_eth/runnable/daemons/tasker.py +++ b/apps/cic-eth/cic_eth/runnable/daemons/tasker.py @@ -11,8 +11,14 @@ import websocket # external imports import celery import confini -from chainlib.connection import RPCConnection -from chainlib.eth.connection import EthUnixSignerConnection +from chainlib.connection import ( + RPCConnection, + ConnType, + ) +from chainlib.eth.connection import ( + EthUnixSignerConnection, + EthHTTPSignerConnection, + ) from chainlib.chain import ChainSpec from chainqueue.db.models.otx import Otx from cic_eth_registry.error import UnknownContractError @@ -143,8 +149,10 @@ else: }) chain_spec = ChainSpec.from_chain_str(config.get('CIC_CHAIN_SPEC')) +RPCConnection.register_constructor(ConnType.UNIX, EthUnixSignerConnection, 'signer') +RPCConnection.register_constructor(ConnType.HTTP, EthHTTPSignerConnection, 'signer') +RPCConnection.register_constructor(ConnType.HTTP_SSL, EthHTTPSignerConnection, 'signer') RPCConnection.register_location(config.get('ETH_PROVIDER'), chain_spec, 'default') -#RPCConnection.register_location(config.get('SIGNER_SOCKET_PATH'), chain_spec, 'signer', constructor=EthUnixSignerConnection) RPCConnection.register_location(config.get('SIGNER_SOCKET_PATH'), chain_spec, 'signer') Otx.tracing = config.true('TASKS_TRACE_QUEUE_STATUS') @@ -152,7 +160,7 @@ Otx.tracing = config.true('TASKS_TRACE_QUEUE_STATUS') #import cic_eth.checks.gas #if not cic_eth.checks.gas.health(config=config): # raise RuntimeError() -liveness.linux.load(health_modules, rundir=config.get('CIC_RUN_DIR'), config=config) +liveness.linux.load(health_modules, rundir=config.get('CIC_RUN_DIR'), config=config, unit='cic-eth-tasker') def main(): argv = ['worker'] @@ -198,9 +206,9 @@ def main(): BaseTask.default_token_address = registry.by_name(BaseTask.default_token_symbol) logg.info('default token set to {} {}'.format(BaseTask.default_token_symbol, BaseTask.default_token_address)) - liveness.linux.set() + liveness.linux.set(rundir=config.get('CIC_RUN_DIR')) current_app.worker_main(argv) - liveness.linux.reset() + liveness.linux.reset(rundir=config.get('CIC_RUN_DIR')) @celery.signals.eventlet_pool_postshutdown.connect diff --git a/apps/cic-eth/cic_eth/task.py b/apps/cic-eth/cic_eth/task.py index 4c4a7f78..24802185 100644 --- a/apps/cic-eth/cic_eth/task.py +++ b/apps/cic-eth/cic_eth/task.py @@ -12,10 +12,7 @@ from chainlib.eth.nonce import RPCNonceOracle from chainlib.eth.gas import RPCGasOracle # local imports -from cic_eth.error import ( - SignerError, - EthError, - ) +from cic_eth.error import SeppukuError from cic_eth.db.models.base import SessionBase logg = logging.getLogger(__name__) @@ -40,6 +37,18 @@ class BaseTask(celery.Task): logg.debug('task {} root uuid {}'.format(self.__class__.__name__, self.request.root_id)) return + + def on_failure(self, exc, task_id, args, kwargs, einfo): + if isinstance(exc, SeppukuError): + logg.critical(einfo) + msg = 'received critical exception {}, calling shutdown'.format(str(exc)) + s = celery.signature( + 'cic_eth.admin.ctrl.shutdown', + [msg], + queue=self.request.delivery_info.get('routing_key'), + ) + s.apply_async() + class CriticalTask(BaseTask): retry_jitter = True @@ -69,7 +78,6 @@ class CriticalSQLAlchemyAndWeb3Task(CriticalTask): sqlalchemy.exc.TimeoutError, requests.exceptions.ConnectionError, sqlalchemy.exc.ResourceClosedError, - EthError, ) safe_gas_threshold_amount = 2000000000 * 60000 * 3 safe_gas_refill_amount = safe_gas_threshold_amount * 5 @@ -80,13 +88,11 @@ class CriticalSQLAlchemyAndSignerTask(CriticalTask): sqlalchemy.exc.DatabaseError, sqlalchemy.exc.TimeoutError, sqlalchemy.exc.ResourceClosedError, - SignerError, ) class CriticalWeb3AndSignerTask(CriticalTask): autoretry_for = ( requests.exceptions.ConnectionError, - SignerError, ) safe_gas_threshold_amount = 2000000000 * 60000 * 3 safe_gas_refill_amount = safe_gas_threshold_amount * 5 @@ -100,4 +106,4 @@ def hello(self): @celery_app.task() def check_health(self): - celery.app.control.shutdown() + pass diff --git a/apps/cic-eth/config/cic.ini b/apps/cic-eth/config/cic.ini index bce15e14..d3840fac 100644 --- a/apps/cic-eth/config/cic.ini +++ b/apps/cic-eth/config/cic.ini @@ -4,5 +4,5 @@ chain_spec = evm:bloxberg:8996 tx_retry_delay = trust_address = default_token_symbol = GFT -health_modules = cic_eth.check.db,cic_eth.check.signer,cic_eth.check.gas +health_modules = cic_eth.check.db,cic_eth.check.redis,cic_eth.check.signer,cic_eth.check.gas run_dir = /run diff --git a/apps/cic-eth/config/docker/cic.ini b/apps/cic-eth/config/docker/cic.ini index e4b9e0bd..4fdefca0 100644 --- a/apps/cic-eth/config/docker/cic.ini +++ b/apps/cic-eth/config/docker/cic.ini @@ -4,5 +4,5 @@ chain_spec = evm:bloxberg:8996 trust_address = 0xEb3907eCad74a0013c259D5874AE7f22DcBcC95C tx_retry_delay = 20 default_token_symbol = GFT -health_modules = cic_eth.check.db,cic_eth.check.signer,cic_eth.check.gas +health_modules = cic_eth.check.db,cic_eth.check.redis,cic_eth.check.signer,cic_eth.check.gas run_dir = /run diff --git a/apps/cic-eth/requirements.txt b/apps/cic-eth/requirements.txt index 44d8b272..8d8cd68b 100644 --- a/apps/cic-eth/requirements.txt +++ b/apps/cic-eth/requirements.txt @@ -1,4 +1,4 @@ -cic-base==0.1.2b1 +cic-base==0.1.2b2 celery==4.4.7 crypto-dev-signer~=0.4.14b3 confini~=0.3.6rc3