Add redis health check, reset liveness on critical basetask failure

This commit is contained in:
nolash 2021-04-24 08:12:07 +02:00
parent 1110e79e5e
commit 542ec89e3c
Signed by untrusted user who does not match committer: lash
GPG Key ID: 21D2E7BB88C2A746
3 changed files with 15 additions and 3 deletions

View File

@ -204,6 +204,7 @@ def main():
BaseTask.default_token_symbol = config.get('CIC_DEFAULT_TOKEN_SYMBOL')
BaseTask.default_token_address = registry.by_name(BaseTask.default_token_symbol)
BaseTask.run_dir = config.get('CIC_RUN_DIR')
logg.info('default token set to {} {}'.format(BaseTask.default_token_symbol, BaseTask.default_token_address))
liveness.linux.set(rundir=config.get('CIC_RUN_DIR'))

View File

@ -10,12 +10,13 @@ import sqlalchemy
from chainlib.eth.constant import ZERO_ADDRESS
from chainlib.eth.nonce import RPCNonceOracle
from chainlib.eth.gas import RPCGasOracle
import liveness.linux
# local imports
from cic_eth.error import SeppukuError
from cic_eth.db.models.base import SessionBase
logg = logging.getLogger(__name__)
logg = logging.getLogger().getChild(__name__)
celery_app = celery.current_app
@ -28,6 +29,7 @@ class BaseTask(celery.Task):
create_gas_oracle = RPCGasOracle
default_token_address = None
default_token_symbol = None
run_dir = '/run'
def create_session(self):
return BaseTask.session_func()
@ -40,6 +42,7 @@ class BaseTask(celery.Task):
def on_failure(self, exc, task_id, args, kwargs, einfo):
if isinstance(exc, SeppukuError):
liveness.linux.reset(rundir=self.run_dir)
logg.critical(einfo)
msg = 'received critical exception {}, calling shutdown'.format(str(exc))
s = celery.signature(

View File

@ -42,6 +42,7 @@ def load(check_strs, namespace=default_namespace, rundir='/run', *args, **kwargs
def set(error=0, namespace=default_namespace, rundir='/run'):
logg.info('liveness SET error {} for namespace {}'.format(error, namespace))
app_rundir = os.path.join(rundir, namespace)
f = open(os.path.join(app_rundir, 'error'), 'w')
f.write(str(error))
@ -49,6 +50,13 @@ def set(error=0, namespace=default_namespace, rundir='/run'):
def reset(namespace=default_namespace, rundir='/run'):
logg.info('liveness RESET for namespace {}'.format(namespace))
app_rundir = os.path.join(rundir, namespace)
os.unlink(os.path.join(app_rundir, 'pid'))
os.unlink(os.path.join(app_rundir, 'error'))
try:
os.unlink(os.path.join(app_rundir, 'pid'))
except FileNotFoundError:
pass
try:
os.unlink(os.path.join(app_rundir, 'error'))
except FileNotFoundError:
pass