通过 python 生成随机数据,并批量插入到 Amazon DocumentDB (或mongodb) 中。
Python 生成随机数据。 使用 random。 例如:
随机整数 (0 - 999999)
id = random.randint(0,999999)
随机选择一个 item
enum_city = ['Beijing','Shanghai','Guangzhou','Shenzhen','Hangzhou','Wuhan']
city = random.choice(enum_city)
随机字符串
import random
import string
str = random.sample(string.ascii_letters + string.digits, 16)
print(''.join(str))
生成想要的数据格式(json)
enum_bool = ['true', 'false']
enum_sexy = ['male', 'female']
enum_city = ['Beijing','Shanghai','Guangzhou','Shenzhen','Hangzhou','Wuhan']
enum_device = ['IOS','Android']
random_id = random.randint(0,99999999)
mobile = '138%s' % random_id
smsConsent = random.choice(enum_bool)
emailConsent = random.choice(enum_bool)
sexual = random.choice(enum_sexy)
city = random.choice(enum_city)
device = random.choice(enum_device)
insertdata = '''
{
"journeyId" : 1,
"mobile": "%s",
"email": "%s",
"smsConsent": "%s",
"emailConsent": "%s",
"nextStepId": 1,
"traits": [
{"tag": "sexual", "value": "%s"},
{"tag": "city", "value": "%s" },
{"tag": "device", "value": "%s"}
]
}
链接 DocumentDB,插入批量数据
import pymongo
myclient = pymongo.MongoClient('mongodb://dbadmin:XXX@docdb.XXXXX.docdb.cn-north-1.amazonaws.com.cn:27017/?tls=true&tlsCAFile=rds-combined-ca-cn-bundle.pem&replicaSet=rs0&readPreference=s
econdaryPreferred&retryWrites=false')
data = [{"item1":"1"},{"item2":"2"},...]
db = myclient["dbname"]
col = db.col_test01
col.insert_many(data)
并行执行
from multiprocessing import Pool
p = Pool()
for i in range(5):
p.apply(func=insert_data, args=())
p.close()
p.join()
把以上连起来的最终代码
import pymongo
import sys
from multiprocessing import Pool
import random
import json
def insert_data():
myclient = pymongo.MongoClient('mongodb://dbadmin:XXX@docdb.XXXXX.docdb.cn-north-1.amazonaws.com.cn:27017/?tls=true&tlsCAFile=rds-combined-ca-cn-bundle.pem&replicaSet=rs0&readPreference=s
econdaryPreferred&retryWrites=false')
for i in range(1000):
data = []
db = myclient["dbname"]
col = db.col_test01
for j in range(1000):
enum_bool = ['true', 'false']
enum_sexy = ['male', 'female']
enum_city = ['Beijing','Shanghai','Guangzhou','Shenzhen','Hangzhou','Wuhan']
enum_device = ['IOS','Android']
random_id = random.randint(0,99999999)
mobile = '138%s' % random_id
email = '%s@csdn.com' % random_id
smsConsent = random.choice(enum_bool)
emailConsent = random.choice(enum_bool)
sexual = random.choice(enum_sexy)
city = random.choice(enum_city)
device = random.choice(enum_device)
insertdata = '''{
"Id" : 1,
"mobile": "%s",
"email": "%s",
"smsConsent": "%s",
"emailConsent": "%s",
"nextId": 1,
"traits": [
{"tag": "sexual", "value": "%s"},
{"tag": "city", "value": "%s" },
{"tag": "device", "value": "%s"}
]
}
''' % (mobile,email,smsConsent,emailConsent,sexual,city,device)
json_insertdata = json.loads(insertdata)
data.append(json_insertdata)
col.insert_many(data)
if __name__ == '__main__':
p = Pool()
for i in range(5):
p.apply(func=insert_data, args=())
p.close()
p.join()