From 49fe2ac0fd59182c966489907b7ad85edc1b54ac Mon Sep 17 00:00:00 2001 From: lash Date: Sun, 27 Feb 2022 11:48:15 +0000 Subject: [PATCH] Add override args, add individual include list modification options --- Makefile | 8 ++ eth_monitor/filters/base.py | 5 +- eth_monitor/filters/out.py | 4 + eth_monitor/rules.py | 73 ++++++++++++------ eth_monitor/runnable/sync.py | 141 +++++++++++++++++++++++++--------- man/eth-monitor.custom.groff | 36 ++++++--- man/eth-monitor.head.groff | 10 +-- man/eth-monitor.overrides | 13 ++++ man/eth-monitor.seealso.groff | 0 9 files changed, 209 insertions(+), 81 deletions(-) create mode 100644 Makefile create mode 100644 man/eth-monitor.overrides create mode 100644 man/eth-monitor.seealso.groff diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..245caa1 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +PREFIX ?= /usr/local +BUILD_DIR = build/$(PREFIX)/share/man + +man: + mkdir -vp $(BUILD_DIR) + chainlib-man.py -b 0xbf -v -n eth-monitor -d $(BUILD_DIR)/ man + +.PHONY: man diff --git a/eth_monitor/filters/base.py b/eth_monitor/filters/base.py index a0c6a4b..983f04f 100644 --- a/eth_monitor/filters/base.py +++ b/eth_monitor/filters/base.py @@ -34,6 +34,5 @@ class RuledFilter: if self.rules_filter != None: if not self.rules_filter.apply_rules(tx): logg.debug('rule match failed for tx {}'.format(tx.hash)) - return - logg.debug('applying filter {}'.format(self)) - self.ruled_filter(conn, block, tx, db_session=db_session) + return False + return True diff --git a/eth_monitor/filters/out.py b/eth_monitor/filters/out.py index 1a3436b..ece506a 100644 --- a/eth_monitor/filters/out.py +++ b/eth_monitor/filters/out.py @@ -25,6 +25,10 @@ class OutFilter(RuledFilter): def filter(self, conn, block, tx, db_session=None): + r = super(OutFilter, self).filter(conn, block, tx, db_session=db_session) + if r == False: + return True + s = None for renderer in self.renderers: diff --git a/eth_monitor/rules.py b/eth_monitor/rules.py index f91b13f..92db54d 100644 --- a/eth_monitor/rules.py +++ b/eth_monitor/rules.py @@ -1,5 +1,6 @@ # standard imports import logging +import uuid # external imports from chainlib.eth.address import is_same_address @@ -7,6 +8,40 @@ from chainlib.eth.address import is_same_address logg = logging.getLogger() +class RuleSimple: + + def __init__(self, outputs, inputs, executables, description=None): + self.description = description + if self.description == None: + self.description = str(uuid.uuid4()) + self.outputs = outputs + self.inputs = inputs + self.executables = executables + + + def check(self, sender, recipient, tx_hash): + for rule in self.outputs: + if rule != None and is_same_address(sender, rule): + logg.debug('tx {} rule INCLUDE match in SENDER {}'.format(tx_hash, sender)) + return True + for rule in self.inputs: + if rule != None and is_same_address(recipient, rule): + logg.debug('tx {} rule INCLUDE match in RECIPIENT {}'.format(tx_hash, recipient)) + return True + for rule in self.executables: + if rule != None and is_same_address(recipient, rule): + logg.debug('tx {} rule INCLUDE match in ExECUTABLE {}'.format(tx_hash, recipient)) + return True + + + def __str__(self): + return 'Simple ' + self.description + ' outputs {} inputs {} execs {}'.format( + self.outputs, + self.inputs, + self.executables, + ) + + class AddressRules: def __init__(self, include_by_default=False): @@ -15,14 +50,14 @@ class AddressRules: self.include_by_default = include_by_default - def exclude(self, sender=None, recipient=None, executable=None): - self.excludes.append((sender, recipient, executable,)) - logg.info('cache filter added EXCLUDE rule sender {} recipient {} executable {}'.format(sender, recipient, executable)) + def exclude(self, rule): + self.excludes.append(rule) + logg.info('cache filter added EXCLUDE rule {}'.format(rule)) - - def include(self, sender=None, recipient=None, executable=None): - self.includes.append((sender, recipient, executable,)) - logg.info('cache filter added INCLUDE rule sender {} recipient {} executable {}'.format(sender, recipient, executable)) + + def include(self, rule): + self.includes.append(rule) + logg.info('cache filter added EXCLUDE rule {}'.format(rule)) def apply_rules(self, tx): @@ -33,23 +68,15 @@ class AddressRules: v = self.include_by_default for rule in self.includes: - if rule[0] != None and is_same_address(sender, rule[0]): - logg.debug('tx {} rule INCLUDE match in SENDER {}'.format(tx_hash, sender)) - v = True - elif rule[1] != None and is_same_address(recipient, rule[1]): - logg.debug('tx {} rule INCLUDE match in RECIPIENT {}'.format(tx_hash, recipient)) - v = True - elif rule[2] != None and is_same_address(recipient, rule[2]): - logg.debug('tx {} rule INCLUDE match in ExECUTABLE {}'.format(tx_hash, recipient)) + if rule.check(sender, recipient, tx_hash): v = True + logg.info('match in includes rule: {}'.format(rule)) + break + for rule in self.excludes: - if rule[0] != None and is_same_address(sender, rule[0]): - logg.debug('tx {} rule INCLUDE match in SENDER {}'.format(tx_hash, sender)) - v = False - elif rule[1] != None and is_same_address(recipient, rule[1]): - logg.debug('tx {} rule INCLUDE match in ExECUTABLE {}'.format(tx_hash, recipient)) - v = False - elif rule[2] != None and is_same_address(recipient, rule[2]): - logg.debug('tx {} rule INCLUDE match in ExECUTABLE {}'.format(tx_hash, recipient)) + if rule.check(sender, recipient, tx_hash): v = False + logg.info('match in excludes rule: {}'.format(rule)) + break + return v diff --git a/eth_monitor/runnable/sync.py b/eth_monitor/runnable/sync.py index 099a8fd..637448a 100644 --- a/eth_monitor/runnable/sync.py +++ b/eth_monitor/runnable/sync.py @@ -23,15 +23,16 @@ from chainsyncer.filter import NoopFilter # local imports from eth_monitor.chain import EthChainInterface from eth_monitor.filters.cache import Filter as CacheFilter -from eth_monitor.rules import AddressRules +from eth_monitor.rules import ( + AddressRules, + RuleSimple, + ) from eth_monitor.filters import RuledFilter from eth_monitor.filters.out import OutFilter from eth_monitor.store.file import FileStore logging.basicConfig(level=logging.WARNING) logg = logging.getLogger() -#logging.getLogger('leveldir.hex').setLevel(level=logging.DEBUG) -#logging.getLogger('leveldir.numeric').setLevel(level=logging.DEBUG) default_eth_provider = os.environ.get('RPC_PROVIDER') if default_eth_provider == None: @@ -47,27 +48,50 @@ argparser.add_argument('-p', '--provider', dest='p', default=default_eth_provide argparser.add_argument('-c', type=str, help='config file') argparser.add_argument('-i', '--chain-spec', dest='i', type=str, help='Chain specification string') argparser.add_argument('--offset', type=int, default=0, help='Start sync on this block') -#argparser.add_argument('--until', type=int, default=0, help='Start sync on this block') +argparser.add_argument('--until', type=int, default=0, help='Terminate sync on this block') argparser.add_argument('--head', action='store_true', help='Start at current block height (overrides --offset, assumes --keep-alive)') argparser.add_argument('--seq', action='store_true', help='Use sequential rpc ids') argparser.add_argument('--skip-history', action='store_true', dest='skip_history', help='Skip history sync') +argparser.add_argument('--keep-alive', action='store_true', dest='keep_alive', help='Continue to sync head after history sync complete') +argparser.add_argument('--input', default=[], action='append', type=str, help='Add input (recipient) addresses to includes list') +argparser.add_argument('--output', default=[], action='append', type=str, help='Add output (sender) addresses to includes list') +argparser.add_argument('--exec', default=[], action='append', type=str, help='Add exec (contract) addresses to includes list') +argparser.add_argument('--address', default=[], action='append', type=str, help='Add addresses as input, output and exec to includes list') +argparser.add_argument('--x-input', default=[], action='append', type=str, dest='xinput', help='Add input (recipient) addresses to excludes list') +argparser.add_argument('--x-output', default=[], action='append', type=str, dest='xoutput', help='Add output (sender) addresses to excludes list') +argparser.add_argument('--x-exec', default=[], action='append', type=str, dest='xexec', help='Add exec (contract) addresses to excludes list') +argparser.add_argument('--x-address', default=[], action='append', type=str, dest='xaddress', help='Add addresses as input, output and exec to excludes list') argparser.add_argument('--includes-file', type=str, dest='includes_file', help='Load include rules from file') argparser.add_argument('--include-default', dest='include_default', action='store_true', help='Include all transactions by default') argparser.add_argument('--store-tx-data', dest='store_tx_data', action='store_true', help='Include all transaction data objects by default') argparser.add_argument('--store-block-data', dest='store_block_data', action='store_true', help='Include all block data objects by default') -argparser.add_argument('--excludes-file', type=str, dest='excludes_file', help='Load exclude rules from file') +argparser.add_argument('--address-file', type=str, dest='excludes_file', help='Load exclude rules from file') argparser.add_argument('--renderer', type=str, action='append', default=[], help='Python modules to dynamically load for rendering of transaction output') argparser.add_argument('--filter', type=str, action='append', help='Add python module filter path') argparser.add_argument('--cache-dir', dest='cache_dir', type=str, help='Directory to store tx data') argparser.add_argument('--single', action='store_true', help='Execute a single sync, regardless of previous states') argparser.add_argument('-v', action='store_true', help='Be verbose') argparser.add_argument('-vv', action='store_true', help='Be more verbose') +argparser.add_argument('-vvv', action='store_true', help='Be incredibly verbose') args = argparser.parse_args(sys.argv[1:]) -if args.vv: +if args.vvv: logg.setLevel(logging.DEBUG) -elif args.v: - logg.setLevel(logging.INFO) +else: + logging.getLogger('chainlib.connection').setLevel(logging.WARNING) + logging.getLogger('chainlib.eth.tx').setLevel(logging.WARNING) + logging.getLogger('chainsyncer.driver.history').setLevel(logging.WARNING) + logging.getLogger('chainsyncer.driver.head').setLevel(logging.WARNING) + logging.getLogger('chainsyncer.backend.file').setLevel(logging.WARNING) + logging.getLogger('chainsyncer.backend.sql').setLevel(logging.WARNING) + logging.getLogger('chainsyncer.filter').setLevel(logging.WARNING) + logging.getLogger('leveldir.hex').setLevel(level=logging.DEBUG) + logging.getLogger('leveldir.numeric').setLevel(level=logging.DEBUG) + + if args.vv: + logg.setLevel(logging.DEBUG) + elif args.v: + logg.setLevel(logging.INFO) config_dir = args.c config = confini.Config(base_config_dir, os.environ.get('CONFINI_ENV_PREFIX'), override_dirs=args.c) @@ -89,10 +113,10 @@ else: block_offset = args.offset block_limit = 0 -#if args.until > 0: -# if not args.head and args.until <= block_offset: -# raise ValueError('sync termination block number must be later than offset ({} >= {})'.format(block_offset, args.until)) -# block_limit = args.until +if args.until > 0: + if not args.head and args.until <= block_offset: + raise ValueError('sync termination block number must be later than offset ({} >= {})'.format(block_offset, args.until)) + block_limit = args.until logg.debug('config loaded:\n{}'.format(config)) @@ -111,9 +135,34 @@ if os.environ.get('RPC_AUTHENTICATION') == 'basic': rpc = EthHTTPConnection(args.p) -def setup_address_rules(includes_file=None, excludes_file=None, include_default=False, include_block_default=False): +def setup_address_arg_rules(rules, args): + include_inputs = args.input + include_outputs = args.output + include_exec = args.exec + exclude_inputs = args.xinput + exclude_outputs = args.xoutput + exclude_exec = args.xexec - rules = AddressRules(include_by_default=include_default) + for address in args.address: + include_inputs.append(address) + include_outputs.append(address) + include_exec.append(address) + + for address in args.xaddress: + exclude_inputs.append(address) + exclude_outputs.append(address) + exclude_exec.append(address) + + includes = RuleSimple(include_outputs, include_inputs, include_exec) + rules.include(includes) + + excludes = RuleSimple(exclude_outputs, exclude_inputs, exclude_exec) + rules.exclude(excludes) + + return rules + + +def setup_address_file_rules(rules, includes_file=None, excludes_file=None, include_default=False, include_block_default=False): if includes_file != None: f = open(includes_file, 'r') @@ -123,20 +172,32 @@ def setup_address_rules(includes_file=None, excludes_file=None, include_default= if r == '': break r = r.rstrip() - v = r.split(",") + v = r.split("\t") - sender = None - recipient = None - executable = None + sender = [] + recipient = [] + executable = [] - if v[0] != '': - sender = v[0] - if v[1] != '': - recipient = v[1] - if v[2] != '': - executable = v[2] + try: + if v[0] != '': + sender = v[0].split(',') + except IndexError: + pass - rules.include(sender=sender, recipient=recipient, executable=executable) + try: + if v[1] != '': + recipient = v[1].split(',') + except IndexError: + pass + + try: + if v[2] != '': + executable = v[2].split(',') + except IndexError: + pass + + rule = RuleSimple(sender, recipient, executable) + rules.include(rule) if excludes_file != None: f = open(includes_file, 'r') @@ -146,20 +207,21 @@ def setup_address_rules(includes_file=None, excludes_file=None, include_default= if r == '': break r = r.rstrip() - v = r.split(",") + v = r.split("\t") sender = None recipient = None executable = None if v[0] != '': - sender = v[0] + sender = v[0].strip(',') if v[1] != '': - recipient = v[1] + recipient = v[1].strip(',') if v[2] != '': - executable = v[2] + executable = v[2].strip(',') - rules.exclude(sender=sender, recipient=recipient, executable=executable) + rule = RuleSimple(sender, recipient, executable) + rules.exclude(rule) return rules @@ -222,6 +284,8 @@ def setup_backend_head(chain_spec, block_offset, block_limit, state_dir, callbac def main(): + global block_limit + o = block_latest() r = rpc.do(o) block_offset = int(strip_0x(r), 16) + 1 @@ -229,14 +293,19 @@ def main(): if block_offset == -1: block_offset = block_latest -# elif not config.true('_KEEP_ALIVE'): -# if block_limit == 0: -# block_limit = block_latest -# - address_rules = setup_address_rules( + elif not config.true('_KEEP_ALIVE'): + if block_limit == 0: + block_limit = block_latest + + address_rules = AddressRules(include_by_default=args.include_default) + address_rules = setup_address_file_rules( + address_rules, includes_file=args.includes_file, excludes_file=args.excludes_file, - include_default=bool(args.include_default), + ) + address_rules = setup_address_arg_rules( + address_rules, + args, ) setup_filter( diff --git a/man/eth-monitor.custom.groff b/man/eth-monitor.custom.groff index 43c353f..3c1803a 100644 --- a/man/eth-monitor.custom.groff +++ b/man/eth-monitor.custom.groff @@ -1,3 +1,27 @@ +.SH MATCHING ADDRESSES +By default, addresses to match against transactions need to be explicitly specified. This behavior can be reversed with the \fB--include-default\fP option. Addresses to match are defined using the \fB--input\fP, \fB--output\fP and \fB--exec\fP options. Addresses specified multiple times will be deduplicated. +.P +Inclusion rules may also be loaded from file by specifying the \fB--includes-file\fP and \fB--excludes-file\fP options. Each file must specify the outputs, inputs and exec addresses as comma separated lists respectively, separated by tabs. + + +.SH SYNCING +When a sync is initiated, the state of this sync is persisted. This way, previous syncs that did not complete for some reason will be resumed where they left off. +.P +A special sync type \fB--head\fP starts syncing at the current head of the chain, and continue to sync until interrupted. When resuming sync, a new sync range between the current block head and the block height at which the previous \fB--head\fP sync left off will automatically be created. +.P +Syncs can be forced to (re)run for ranges regardless of previous state by using the \fB--single\fP option. However, there is no protection in place from preventing code filters from being executed again on the same transaction when this is done. See \fBDEFINING FILTERS\fP below. + + +.SH CACHE +.P +When syncing, the hash of a block and transaction matching the address criteria will be stored in the cache. The hashes can be used for future data lookups. +.P +If \fB--store-block-data\fP and/or \fB--store-tx-data\fP is set, a copy of the block and/or transaction data will also be stored, respectively. + + +.SH RENDERING + + .SH DEFINING FILTERS A python module used for filter must fulfill two conditions: @@ -10,18 +34,6 @@ A python module used for filter must fulfill two conditions: Filters will strictly be executed in the order which they are defined on the command line. -.SH SYNCING -When a sync is initiated, the state of this sync is persisted. This way, previous syncs that did not complete for some reason will be resumed where they left off. -.P -A special sync type \fB--head\fP starts syncing at the current head of the chain, and continue to sync until interrupted. When resuming sync, a new sync range between the current block head and the block height at which the previous \fB--head\fP sync left off will automatically be created. -.P -Syncs can be forced to (re)run for ranges regardless of previous state by using the \fB--single\fP option. However, there is no protection in place from preventing code filters from being executed again on the same transaction when this is done. - - -.SH RENDERING - - - .SH FURTHER READING Refer to the \fBchainsyncer\fP chapter n \fIinfo chaintool\fP for in-depth information on the subjects of syncing and filtering. diff --git a/man/eth-monitor.head.groff b/man/eth-monitor.head.groff index 18fe6ab..b47eac1 100644 --- a/man/eth-monitor.head.groff +++ b/man/eth-monitor.head.groff @@ -23,16 +23,12 @@ The \fBeth-monitor\fP has fulfills three distinct but related functions: 3. Arbitrary code executions using a transaction (and its block) as input. .P -Using an EVM RPC endpoint, the \fBeth-monitor\fP tool will retrieve blocks within a given range and provides arbitrary processing of each transaction. Sync behavior is controlled using the \fB--offset\fP, \fB--until\fP and \fB--head\fP options. \fBSee EXAMPLES\fP on sync example usage. +Using an EVM RPC endpoint, the \fBeth-monitor\fP tool will retrieve blocks within a given range and provides arbitrary processing of each transaction. .P -Processing is done by python modules implementing a filter interface. Filter modules are specified by the \fB--filter\fP option, which can be defined multiple times. See \fBDEFINING FILTERS\fP for details. +A collection of options is provided to control the behavior of which block ranges to sync, which criteria to use for display and cache, and what code to execute for matching transactions. Details on each topic can be found in the \fBSYNCING\fP, \fBMATCHING ADDRESSES\fP and \fBDEFINING FILTERS\fP sections below, respectively. .P -Each chain spec persists its own sync state. E.g. if a historical sync between blocks 100 and 200 was executed against chain \fIevm:foo:42:bar\fP and halted at block 150, then next execution will resume at this block height. The state of which filters were executed for the last transaction processed is also kept. The \fB--single\fP option can be used to override this behavior. See \fBSYNCING\fP below for more details. +Example executions of the tool can be found in the \fBEXAMPLES\fP section. .P -.P -By default, no transactions are matched, and input and output addresses to match against transactions need to be explicitly specified. This behavior can be reversed with the \fB--include-default\fP option. Addresses to match are defined using the \fB--input\fP and \fB--output\fP options, and/or by file using the \fB--includes-file\fP and \fB--excludes-file\fP options. Addresses specified multiple times will be deduplicated. -.P -Every transaction matched will cause the a copy of that transaction and its block to be stored to the cache. In some cases, it may be desirable to store all blocks and/or transactions, and only use address matches for display and/or filters. \fB--store-tx-data\fP and \fB--store-block-data\fP provides this function. .SS OPTIONS diff --git a/man/eth-monitor.overrides b/man/eth-monitor.overrides new file mode 100644 index 0000000..da0de72 --- /dev/null +++ b/man/eth-monitor.overrides @@ -0,0 +1,13 @@ +input Add an address of interest to inputs (recipients) array. Complements \fB--address-file\fP. --input address +output Add an address of interest to outputs (sender) array. Complements \fB--address-file\fP. --output address +exec Add an address of interest to executable address array. Complements \fB--address-file\fP. --exec address +address Add an address of interest to match any role. Complements \fB--address-file\fP. --address address +xinput Add an address of disinterest to inputs (recipients) array. --x-input address +xoutput Add an address of disinterest to outputs (sender) array. --x-output address +xexec Add an address of disinterest to executable address array. --x-exec address +xaddress Add an address of interest to match any role. --x-address address +includedefault Match all addresses by default. Addresses may be excluded using --excludes-file. If this is set, --input, --output, --exec and --includes-file will have no effect. --include-default +includesfile Load address include matching rules from file. See \fBMATCHING ADDRESSES\fP. --includes-file file +excludesfile Load address exclude matching rules from file. See \fBMATCHING ADDRESSES\fP. --excludes-file file +storetx Store transaction data in cache for matching transactions. Requires \fB--cache-dir\fP. --store-tx-data +storeblock Store block data in cache for matching transactions. Requires \fB--cache-dir\fP. --store-block-data diff --git a/man/eth-monitor.seealso.groff b/man/eth-monitor.seealso.groff new file mode 100644 index 0000000..e69de29