Add remaining health checks and shutdown on critical errors

This commit is contained in:
Louis Holbrook 2021-04-24 17:53:45 +00:00
parent cd0e702e3a
commit f50da54274
20 changed files with 261 additions and 42 deletions

View File

@ -2,7 +2,7 @@
import datetime
import logging
# third-party imports
# external imports
import celery
from chainlib.eth.constant import ZERO_ADDRESS
from chainlib.chain import ChainSpec
@ -145,3 +145,9 @@ def check_lock(chained_input, chain_spec_dict, lock_flags, address=None):
session.flush()
session.close()
return chained_input
@celery_app.task()
def shutdown(message):
logg.critical('shutdown called: {}'.format(message))
celery_app.control.shutdown() #broadcast('shutdown')

View File

@ -0,0 +1,19 @@
# standard imports
import logging
# external imports
import celery
# local imports
from cic_eth.task import BaseTask
celery_app = celery.current_app
logg = logging.getLogger()
@celery_app.task(bind=True, base=BaseTask)
def default_token(self):
return {
'symbol': self.default_token_symbol,
'address': self.default_token_address,
}

View File

@ -0,0 +1,18 @@
# external imports
import redis
import os
def health(*args, **kwargs):
r = redis.Redis(
host=kwargs['config'].get('REDIS_HOST'),
port=kwargs['config'].get('REDIS_PORT'),
db=kwargs['config'].get('REDIS_DB'),
)
try:
r.set(kwargs['unit'], os.getpid())
except redis.connection.ConnectionError:
return False
except redis.connection.ResponseError:
return False
return True

View File

@ -48,6 +48,8 @@ class RoleMissingError(Exception):
pass
class IntegrityError(Exception):
"""Exception raised to signal irregularities with deduplication and ordering of tasks
@ -62,15 +64,19 @@ class LockedError(Exception):
pass
class SignerError(Exception):
class SeppukuError(Exception):
"""Exception base class for all errors that should cause system shutdown
"""
class SignerError(SeppukuError):
"""Exception raised when signer is unavailable or generates an error
"""
pass
class EthError(Exception):
"""Exception raised when unspecified error from evm node is encountered
class RoleAgencyError(SeppukuError):
"""Exception raise when a role cannot perform its function. This is a critical exception
"""
pass

View File

@ -4,10 +4,10 @@ import logging
# external imports
import celery
from erc20_single_shot_faucet import SingleShotFaucet as Faucet
from chainlib.eth.constant import ZERO_ADDRESS
from hexathon import (
strip_0x,
)
from chainlib.eth.constant import ZERO_ADDRESS
from chainlib.connection import RPCConnection
from chainlib.eth.sign import (
new_account,
@ -19,6 +19,7 @@ from chainlib.eth.tx import (
unpack,
)
from chainlib.chain import ChainSpec
from chainlib.error import JSONRPCException
from eth_accounts_index import AccountRegistry
from sarafu_faucet import MinterFaucet as Faucet
from chainqueue.db.models.tx import TxCache
@ -70,11 +71,18 @@ def create(self, password, chain_spec_dict):
a = None
conn = RPCConnection.connect(chain_spec, 'signer')
o = new_account()
a = conn.do(o)
try:
a = conn.do(o)
except ConnectionError as e:
raise SignerError(e)
except FileNotFoundError as e:
raise SignerError(e)
conn.disconnect()
# TODO: It seems infeasible that a can be None in any case, verify
if a == None:
raise SignerError('create account')
logg.debug('created account {}'.format(a))
# Initialize nonce provider record for account
@ -219,21 +227,22 @@ def have(self, account, chain_spec_dict):
"""
chain_spec = ChainSpec.from_dict(chain_spec_dict)
o = sign_message(account, '0x2a')
try:
conn = RPCConnection.connect(chain_spec, 'signer')
except Exception as e:
logg.debug('cannot sign with {}: {}'.format(account, e))
return None
conn = RPCConnection.connect(chain_spec, 'signer')
try:
conn.do(o)
conn.disconnect()
return account
except Exception as e:
except ConnectionError as e:
raise SignerError(e)
except FileNotFoundError as e:
raise SignerError(e)
except JSONRPCException as e:
logg.debug('cannot sign with {}: {}'.format(account, e))
conn.disconnect()
return None
conn.disconnect()
return account
@celery_app.task(bind=True, base=CriticalSQLAlchemyTask)
def set_role(self, tag, address, chain_spec_dict):

View File

@ -108,7 +108,13 @@ def transfer(self, tokens, holder_address, receiver_address, value, chain_spec_d
nonce_oracle = CustodialTaskNonceOracle(holder_address, self.request.root_id, session=session)
gas_oracle = self.create_gas_oracle(rpc, MaxGasOracle.gas)
c = ERC20(chain_spec, signer=rpc_signer, gas_oracle=gas_oracle, nonce_oracle=nonce_oracle)
(tx_hash_hex, tx_signed_raw_hex) = c.transfer(t['address'], holder_address, receiver_address, value, tx_format=TxFormat.RLP_SIGNED)
try:
(tx_hash_hex, tx_signed_raw_hex) = c.transfer(t['address'], holder_address, receiver_address, value, tx_format=TxFormat.RLP_SIGNED)
except FileNotFoundError as e:
raise SignerError(e)
except ConnectionError as e:
raise SignerError(e)
rpc_signer.disconnect()
rpc.disconnect()
@ -171,7 +177,12 @@ def approve(self, tokens, holder_address, spender_address, value, chain_spec_dic
nonce_oracle = CustodialTaskNonceOracle(holder_address, self.request.root_id, session=session)
gas_oracle = self.create_gas_oracle(rpc, MaxGasOracle.gas)
c = ERC20(chain_spec, signer=rpc_signer, gas_oracle=gas_oracle, nonce_oracle=nonce_oracle)
(tx_hash_hex, tx_signed_raw_hex) = c.approve(t['address'], holder_address, spender_address, value, tx_format=TxFormat.RLP_SIGNED)
try:
(tx_hash_hex, tx_signed_raw_hex) = c.approve(t['address'], holder_address, spender_address, value, tx_format=TxFormat.RLP_SIGNED)
except FileNotFoundError as e:
raise SignerError(e)
except ConnectionError as e:
raise SignerError(e)
rpc_signer.disconnect()
rpc.disconnect()

View File

@ -328,7 +328,12 @@ def refill_gas(self, recipient_address, chain_spec_dict):
# build and add transaction
logg.debug('tx send gas amount {} from provider {} to {}'.format(refill_amount, gas_provider, recipient_address))
(tx_hash_hex, tx_signed_raw_hex) = c.create(gas_provider, recipient_address, refill_amount, tx_format=TxFormat.RLP_SIGNED)
try:
(tx_hash_hex, tx_signed_raw_hex) = c.create(gas_provider, recipient_address, refill_amount, tx_format=TxFormat.RLP_SIGNED)
except ConnectionError as e:
raise SignerError(e)
except FileNotFoundError as e:
raise SignerError(e)
logg.debug('adding queue refill gas tx {}'.format(tx_hash_hex))
cache_task = 'cic_eth.eth.gas.cache_gas_data'
register_tx(tx_hash_hex, tx_signed_raw_hex, chain_spec, queue, cache_task=cache_task, session=session)
@ -404,7 +409,12 @@ def resend_with_higher_gas(self, txold_hash_hex, chain_spec_dict, gas=None, defa
c = TxFactory(chain_spec, signer=rpc_signer, gas_oracle=gas_oracle)
logg.debug('change gas price from old {} to new {} for tx {}'.format(tx['gasPrice'], new_gas_price, tx))
tx['gasPrice'] = new_gas_price
(tx_hash_hex, tx_signed_raw_hex) = c.build_raw(tx)
try:
(tx_hash_hex, tx_signed_raw_hex) = c.build_raw(tx)
except ConnectionError as e:
raise SignerError(e)
except FileNotFoundError as e:
raise SignerError(e)
queue_create(
chain_spec,
tx['nonce'],

View File

@ -11,8 +11,14 @@ import websocket
# external imports
import celery
import confini
from chainlib.connection import RPCConnection
from chainlib.eth.connection import EthUnixSignerConnection
from chainlib.connection import (
RPCConnection,
ConnType,
)
from chainlib.eth.connection import (
EthUnixSignerConnection,
EthHTTPSignerConnection,
)
from chainlib.chain import ChainSpec
from chainqueue.db.models.otx import Otx
from cic_eth_registry.error import UnknownContractError
@ -143,8 +149,10 @@ else:
})
chain_spec = ChainSpec.from_chain_str(config.get('CIC_CHAIN_SPEC'))
RPCConnection.register_constructor(ConnType.UNIX, EthUnixSignerConnection, 'signer')
RPCConnection.register_constructor(ConnType.HTTP, EthHTTPSignerConnection, 'signer')
RPCConnection.register_constructor(ConnType.HTTP_SSL, EthHTTPSignerConnection, 'signer')
RPCConnection.register_location(config.get('ETH_PROVIDER'), chain_spec, 'default')
#RPCConnection.register_location(config.get('SIGNER_SOCKET_PATH'), chain_spec, 'signer', constructor=EthUnixSignerConnection)
RPCConnection.register_location(config.get('SIGNER_SOCKET_PATH'), chain_spec, 'signer')
Otx.tracing = config.true('TASKS_TRACE_QUEUE_STATUS')
@ -152,7 +160,7 @@ Otx.tracing = config.true('TASKS_TRACE_QUEUE_STATUS')
#import cic_eth.checks.gas
#if not cic_eth.checks.gas.health(config=config):
# raise RuntimeError()
liveness.linux.load(health_modules, rundir=config.get('CIC_RUN_DIR'), config=config)
liveness.linux.load(health_modules, rundir=config.get('CIC_RUN_DIR'), config=config, unit='cic-eth-tasker')
def main():
argv = ['worker']
@ -195,11 +203,12 @@ def main():
BaseTask.default_token_symbol = config.get('CIC_DEFAULT_TOKEN_SYMBOL')
BaseTask.default_token_address = registry.by_name(BaseTask.default_token_symbol)
BaseTask.run_dir = config.get('CIC_RUN_DIR')
logg.info('default token set to {} {}'.format(BaseTask.default_token_symbol, BaseTask.default_token_address))
liveness.linux.set()
liveness.linux.set(rundir=config.get('CIC_RUN_DIR'))
current_app.worker_main(argv)
liveness.linux.reset()
liveness.linux.reset(rundir=config.get('CIC_RUN_DIR'))
@celery.signals.eventlet_pool_postshutdown.connect

View File

@ -0,0 +1,65 @@
#!python3
# SPDX-License-Identifier: GPL-3.0-or-later
# standard imports
import logging
import argparse
import os
# external imports
import confini
import celery
# local imports
from cic_eth.api import Api
logging.basicConfig(level=logging.WARNING)
logg = logging.getLogger()
default_format = 'terminal'
default_config_dir = os.environ.get('CONFINI_DIR', '/usr/local/etc/cic')
argparser = argparse.ArgumentParser()
argparser.add_argument('-i', '--chain-spec', dest='i', type=str, help='chain spec')
argparser.add_argument('-c', type=str, default=default_config_dir, help='config root to use')
argparser.add_argument('-q', type=str, default='cic-eth', help='celery queue to submit transaction tasks to')
argparser.add_argument('--env-prefix', default=os.environ.get('CONFINI_ENV_PREFIX'), dest='env_prefix', type=str, help='environment prefix for variables to overwrite configuration')
argparser.add_argument('-v', action='store_true', help='Be verbose')
argparser.add_argument('-vv', help='be more verbose', action='store_true')
args = argparser.parse_args()
if args.v == True:
logging.getLogger().setLevel(logging.INFO)
elif args.vv == True:
logging.getLogger().setLevel(logging.DEBUG)
config_dir = os.path.join(args.c)
os.makedirs(config_dir, 0o777, True)
config = confini.Config(config_dir, args.env_prefix)
config.process()
args_override = {
'CIC_CHAIN_SPEC': getattr(args, 'i'),
}
config.dict_override(args_override, 'cli args')
config.censor('PASSWORD', 'DATABASE')
config.censor('PASSWORD', 'SSL')
logg.debug('config loaded from {}:\n{}'.format(config_dir, config))
celery_app = celery.Celery(broker=config.get('CELERY_BROKER_URL'), backend=config.get('CELERY_RESULT_URL'))
queue = args.q
api = Api(config.get('CIC_CHAIN_SPEC'), queue=queue)
def main():
t = api.default_token()
token_info = t.get()
print('Default token symbol: {}'.format(token_info['symbol']))
print('Default token address: {}'.format(token_info['address']))
if __name__ == '__main__':
main()

View File

@ -10,15 +10,13 @@ import sqlalchemy
from chainlib.eth.constant import ZERO_ADDRESS
from chainlib.eth.nonce import RPCNonceOracle
from chainlib.eth.gas import RPCGasOracle
import liveness.linux
# local imports
from cic_eth.error import (
SignerError,
EthError,
)
from cic_eth.error import SeppukuError
from cic_eth.db.models.base import SessionBase
logg = logging.getLogger(__name__)
logg = logging.getLogger().getChild(__name__)
celery_app = celery.current_app
@ -31,6 +29,7 @@ class BaseTask(celery.Task):
create_gas_oracle = RPCGasOracle
default_token_address = None
default_token_symbol = None
run_dir = '/run'
def create_session(self):
return BaseTask.session_func()
@ -40,6 +39,19 @@ class BaseTask(celery.Task):
logg.debug('task {} root uuid {}'.format(self.__class__.__name__, self.request.root_id))
return
def on_failure(self, exc, task_id, args, kwargs, einfo):
if isinstance(exc, SeppukuError):
liveness.linux.reset(rundir=self.run_dir)
logg.critical(einfo)
msg = 'received critical exception {}, calling shutdown'.format(str(exc))
s = celery.signature(
'cic_eth.admin.ctrl.shutdown',
[msg],
queue=self.request.delivery_info.get('routing_key'),
)
s.apply_async()
class CriticalTask(BaseTask):
retry_jitter = True
@ -69,7 +81,6 @@ class CriticalSQLAlchemyAndWeb3Task(CriticalTask):
sqlalchemy.exc.TimeoutError,
requests.exceptions.ConnectionError,
sqlalchemy.exc.ResourceClosedError,
EthError,
)
safe_gas_threshold_amount = 2000000000 * 60000 * 3
safe_gas_refill_amount = safe_gas_threshold_amount * 5
@ -80,13 +91,11 @@ class CriticalSQLAlchemyAndSignerTask(CriticalTask):
sqlalchemy.exc.DatabaseError,
sqlalchemy.exc.TimeoutError,
sqlalchemy.exc.ResourceClosedError,
SignerError,
)
class CriticalWeb3AndSignerTask(CriticalTask):
autoretry_for = (
requests.exceptions.ConnectionError,
SignerError,
)
safe_gas_threshold_amount = 2000000000 * 60000 * 3
safe_gas_refill_amount = safe_gas_threshold_amount * 5
@ -100,4 +109,4 @@ def hello(self):
@celery_app.task()
def check_health(self):
celery.app.control.shutdown()
pass

View File

@ -4,5 +4,5 @@ chain_spec = evm:bloxberg:8996
tx_retry_delay =
trust_address =
default_token_symbol = GFT
health_modules = cic_eth.check.db,cic_eth.check.signer,cic_eth.check.gas
health_modules = cic_eth.check.db,cic_eth.check.redis,cic_eth.check.signer,cic_eth.check.gas
run_dir = /run

View File

@ -4,5 +4,5 @@ chain_spec = evm:bloxberg:8996
trust_address = 0xEb3907eCad74a0013c259D5874AE7f22DcBcC95C
tx_retry_delay = 20
default_token_symbol = GFT
health_modules = cic_eth.check.db,cic_eth.check.signer,cic_eth.check.gas
health_modules = cic_eth.check.db,cic_eth.check.redis,cic_eth.check.signer,cic_eth.check.gas
run_dir = /run

View File

@ -1,4 +1,3 @@
[eth]
provider = http://localhost:63545
health_modules = cic_eth.check.db,cic_eth.check.gas
gas_gifter_minimum_balance = 10000000000000000000000

View File

@ -1,4 +1,3 @@
[eth]
provider = http://localhost:8545
gas_gifter_minimum_balance = 10000000000000000000000
health_modules = cic_eth.check.db,cic_eth.check.gas

View File

@ -1,4 +1,4 @@
cic-base==0.1.2b1
cic-base==0.1.2b2
celery==4.4.7
crypto-dev-signer~=0.4.14b3
confini~=0.3.6rc3

View File

@ -3,8 +3,12 @@ import os
import sys
import logging
# external imports
from chainlib.eth.erc20 import ERC20
# local imports
from cic_eth.api import Api
from cic_eth.task import BaseTask
script_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.dirname(script_dir)
@ -28,3 +32,26 @@ def api(
):
chain_str = str(default_chain_spec)
return Api(chain_str, queue=None, callback_param='foo')
@pytest.fixture(scope='function')
def foo_token_symbol(
default_chain_spec,
foo_token,
eth_rpc,
contract_roles,
):
c = ERC20(default_chain_spec)
o = c.symbol(foo_token, sender_address=contract_roles['CONTRACT_DEPLOYER'])
r = eth_rpc.do(o)
return c.parse_symbol(r)
@pytest.fixture(scope='function')
def default_token(
foo_token,
foo_token_symbol,
):
BaseTask.default_token_symbol = foo_token_symbol
BaseTask.default_token_address = foo_token

View File

@ -34,6 +34,7 @@ def celery_includes():
'cic_eth.admin.ctrl',
'cic_eth.admin.nonce',
'cic_eth.admin.debug',
'cic_eth.admin.token',
'cic_eth.eth.account',
'cic_eth.callbacks.noop',
'cic_eth.callbacks.http',

View File

@ -0,0 +1,21 @@
# external imports
import celery
def test_default_token(
default_token,
celery_session_worker,
foo_token,
foo_token_symbol,
):
s = celery.signature(
'cic_eth.admin.token.default_token',
[],
queue=None,
)
t = s.apply_async()
r = t.get()
assert r['address'] == foo_token
assert r['symbol'] == foo_token_symbol

View File

@ -42,6 +42,7 @@ def load(check_strs, namespace=default_namespace, rundir='/run', *args, **kwargs
def set(error=0, namespace=default_namespace, rundir='/run'):
logg.info('liveness SET error {} for namespace {}'.format(error, namespace))
app_rundir = os.path.join(rundir, namespace)
f = open(os.path.join(app_rundir, 'error'), 'w')
f.write(str(error))
@ -49,6 +50,13 @@ def set(error=0, namespace=default_namespace, rundir='/run'):
def reset(namespace=default_namespace, rundir='/run'):
logg.info('liveness RESET for namespace {}'.format(namespace))
app_rundir = os.path.join(rundir, namespace)
os.unlink(os.path.join(app_rundir, 'pid'))
os.unlink(os.path.join(app_rundir, 'error'))
try:
os.unlink(os.path.join(app_rundir, 'pid'))
except FileNotFoundError:
pass
try:
os.unlink(os.path.join(app_rundir, 'error'))
except FileNotFoundError:
pass

View File

@ -240,6 +240,8 @@ services:
DATABASE_DRIVER: ${DATABASE_DRIVER:-psycopg2}
DATABASE_DEBUG: ${DATABASE_DEBUG:-0}
DATABASE_POOL_SIZE: 0
REDIS_PORT: 6379
REDIS_HOST: redis
PGPASSWORD: ${DATABASE_PASSWORD:-tralala}
CIC_CHAIN_SPEC: ${CIC_CHAIN_SPEC:-evm:bloxberg:8996}
BANCOR_DIR: ${BANCOR_DIR:-/usr/local/share/cic/bancor}