cic-internal-integration/apps/data-seeding/create_import_users.py

284 lines
7.2 KiB
Python
Raw Permalink Normal View History

2021-02-21 16:41:37 +01:00
#!/usr/bin/python
# standard imports
import json
import datetime
import random
import logging
import os
import hashlib
import argparse
import random
# external imports
import celery
from faker import Faker
2021-06-02 14:53:53 +02:00
from collections import OrderedDict
2021-02-21 16:41:37 +01:00
import confini
from cic_types.models.person import (
2021-06-02 14:53:53 +02:00
Person,
generate_vcard_from_contact_data,
get_contact_data_from_vcard,
)
from chainlib.eth.address import to_checksum_address
import phonenumbers
2021-02-21 16:41:37 +01:00
logging.basicConfig(level=logging.WARNING)
logg = logging.getLogger()
fake = Faker(['sl', 'en_US', 'no', 'de', 'ro'])
script_dir = os.path.realpath(os.path.dirname(__file__))
2021-06-02 14:53:53 +02:00
# config_dir = os.environ.get('CONFINI_DIR', '/usr/local/etc/cic')
2021-02-21 16:41:37 +01:00
config_dir = os.environ.get('CONFINI_DIR', os.path.join(script_dir, 'config'))
argparser = argparse.ArgumentParser()
argparser.add_argument('-c', type=str, default=config_dir, help='Config dir')
2021-06-02 14:53:53 +02:00
argparser.add_argument('--tag', type=str, action='append',
help='Tags to add to record')
argparser.add_argument('--gift-threshold', type=int,
help='If set, users will be funded with additional random balance (in token integer units)')
2021-02-21 16:41:37 +01:00
argparser.add_argument('-v', action='store_true', help='Be verbose')
argparser.add_argument('-vv', action='store_true', help='Be more verbose')
2021-06-02 14:53:53 +02:00
argparser.add_argument('--dir', default='out', type=str,
help='path to users export dir tree')
argparser.add_argument('user_count', type=int,
help='amount of users to generate')
2021-02-21 16:41:37 +01:00
args = argparser.parse_args()
if args.v:
logg.setLevel(logging.INFO)
elif args.vv:
logg.setLevel(logging.DEBUG)
config = confini.Config(args.c, os.environ.get('CONFINI_ENV_PREFIX'))
config.process()
logg.info('loaded config\n{}'.format(config))
dt_now = datetime.datetime.utcnow()
dt_then = dt_now - datetime.timedelta(weeks=150)
ts_now = int(dt_now.timestamp())
ts_then = int(dt_then.timestamp())
2021-06-02 14:53:53 +02:00
celery_app = celery.Celery(broker=config.get(
'CELERY_BROKER_URL'), backend=config.get('CELERY_RESULT_URL'))
2021-02-21 16:41:37 +01:00
gift_max = args.gift_threshold or 0
gift_factor = (10**6)
categories = [
2021-06-02 14:53:53 +02:00
"food/water",
"fuel/energy",
"education",
"health",
"shop",
"environment",
"transport",
"farming/labor",
"savingsgroup",
]
2021-02-21 16:41:37 +01:00
phone_idx = []
user_dir = args.dir
user_count = args.user_count
2021-04-24 08:14:24 +02:00
tags = args.tag
if tags == None or len(tags) == 0:
tags = ['individual']
random.seed()
2021-06-02 14:53:53 +02:00
2021-02-21 16:41:37 +01:00
def genPhoneIndex(phone):
h = hashlib.new('sha256')
h.update(phone.encode('utf-8'))
h.update(b':cic.phone')
2021-02-21 16:41:37 +01:00
return h.digest().hex()
def genId(addr, typ):
h = hashlib.new('sha256')
h.update(bytes.fromhex(addr[2:]))
h.update(typ.encode('utf-8'))
return h.digest().hex()
def genDate():
ts = random.randint(ts_then, ts_now)
return int(datetime.datetime.fromtimestamp(ts).timestamp())
2021-02-21 16:41:37 +01:00
def genPhone():
phone_str = '+254' + str(random.randint(100000000, 999999999))
phone_object = phonenumbers.parse(phone_str)
return phonenumbers.format_number(phone_object, phonenumbers.PhoneNumberFormat.E164)
2021-02-21 16:41:37 +01:00
def genPersonal(phone):
fn = fake.first_name()
ln = fake.last_name()
e = fake.email()
return generate_vcard_from_contact_data(ln, fn, phone, e)
def genCats():
i = random.randint(0, 3)
return random.choices(categories, k=i)
def genAmount():
return random.randint(0, gift_max) * gift_factor
2021-05-15 09:40:34 +02:00
2021-02-21 16:41:37 +01:00
def genDob():
dob_src = fake.date_of_birth(minimum_age=15)
dob = {}
if random.random() < 0.5:
dob['year'] = dob_src.year
if random.random() > 0.5:
dob['month'] = dob_src.month
dob['day'] = dob_src.day
2021-06-02 14:53:53 +02:00
2021-02-21 16:41:37 +01:00
return dob
def gen():
old_blockchain_address = '0x' + os.urandom(20).hex()
2021-06-02 14:53:53 +02:00
old_blockchain_checksum_address = to_checksum_address(
old_blockchain_address)
2021-02-21 16:41:37 +01:00
gender = random.choice(['female', 'male', 'other'])
phone = genPhone()
v = genPersonal(phone)
contact_data = get_contact_data_from_vcard(v)
p = Person()
p.load_vcard(contact_data)
p.date_registered = genDate()
p.date_of_birth = genDob()
p.gender = gender
p.identities = {
2021-06-02 14:53:53 +02:00
'evm': {
'oldchain:1': [
old_blockchain_checksum_address,
],
},
}
p.products = [fake.random_element(elements=OrderedDict(
[('fruit', 0.25),
('maji', 0.05),
('milk', 0.1),
('teacher', 0.1),
('doctor', 0.05),
('boutique', 0.15),
('recycling', 0.05),
('farmer', 0.05),
('oil', 0.05),
('pastor', 0.1),
('chama', 0.03),
('pastor', 0.01),
('bzrTsuZieaq', 0.01)
]))]
p.location['area_name'] = fake.random_element(elements=OrderedDict(
[('mnarani', 0.05),
('chilumani', 0.1),
('madewani', 0.1),
('kisauni', 0.05),
('bangla', 0.1),
('kabiro', 0.1),
('manyani', 0.05),
('ruben', 0.15),
('makupa', 0.05),
('kingston', 0.05),
('rangala', 0.05),
('homabay', 0.1),
('nakuru', 0.03),
('kajiado', 0.01),
('zurtWicKtily', 0.01)
]))
2021-02-21 16:41:37 +01:00
if random.randint(0, 1):
2021-06-02 14:53:53 +02:00
# fake.local_latitude()
p.location['latitude'] = (random.random() + 180) - 90
# fake.local_latitude()
p.location['longitude'] = (random.random() + 360) - 180
2021-05-15 09:40:34 +02:00
2021-02-21 16:41:37 +01:00
return (old_blockchain_checksum_address, phone, p)
def prepareLocalFilePath(datadir, address):
parts = [
2021-06-02 14:53:53 +02:00
address[:2],
address[2:4],
]
2021-02-21 16:41:37 +01:00
dirs = '{}/{}/{}'.format(
2021-06-02 14:53:53 +02:00
datadir,
parts[0],
parts[1],
)
2021-02-21 16:41:37 +01:00
os.makedirs(dirs, exist_ok=True)
return dirs
if __name__ == '__main__':
base_dir = os.path.join(user_dir, 'old')
ussd_dir = os.path.join(user_dir, 'ussd')
2021-02-21 16:41:37 +01:00
os.makedirs(base_dir, exist_ok=True)
fa = open(os.path.join(user_dir, 'balances.csv'), 'w')
2021-04-24 08:14:24 +02:00
ft = open(os.path.join(user_dir, 'tags.csv'), 'w')
2021-02-21 16:41:37 +01:00
i = 0
while i < user_count:
eth = None
phone = None
o = None
try:
(eth, phone, o) = gen()
except Exception as e:
logg.warning('generate failed, trying anew: {}'.format(e))
continue
2021-02-21 16:41:37 +01:00
uid = eth[2:].upper()
print(o)
2021-05-15 09:40:34 +02:00
ussd_data = {
'phone': phone,
'is_activated': 1,
'preferred_language': random.sample(['en', 'sw'], 1)[0],
'is_disabled': False
}
2021-02-21 16:41:37 +01:00
d = prepareLocalFilePath(base_dir, uid)
f = open('{}/{}'.format(d, uid + '.json'), 'w')
json.dump(o.serialize(), f)
f.close()
d = prepareLocalFilePath(ussd_dir, uid)
2021-05-15 09:40:34 +02:00
x = open('{}/{}'.format(d, uid + '_ussd_data.json'), 'w')
json.dump(ussd_data, x)
x.close()
2021-02-21 16:41:37 +01:00
pidx = genPhoneIndex(phone)
d = prepareLocalFilePath(os.path.join(user_dir, 'phone'), pidx)
2021-02-21 16:41:37 +01:00
f = open('{}/{}'.format(d, pidx), 'w')
f.write(eth)
f.close()
2021-04-24 08:14:24 +02:00
ft.write('{}:{}\n'.format(eth, ','.join(tags)))
2021-02-21 16:41:37 +01:00
amount = genAmount()
2021-06-02 14:53:53 +02:00
fa.write('{},{}\n'.format(eth, amount))
logg.debug('pidx {}, uid {}, eth {}, amount {}, phone {}'.format(
pidx, uid, eth, amount, phone))
i += 1
2021-02-21 16:41:37 +01:00
2021-04-24 08:14:24 +02:00
ft.close()
2021-02-21 16:41:37 +01:00
fa.close()