raw_data_1 = {
'subject_id': ['1', '2', '3', '4', '5'],
'first_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
'last_name': ['Anderson', 'Ackerman', 'Ali', 'Aoni', 'Atiches']}
raw_data_2 = {
'subject_id': ['4', '5', '6', '7', '8'],
'first_name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
'last_name': ['Bonder', 'Black', 'Balwner', 'Brice', 'Btisan']}
raw_data_3 = {
'subject_id': ['1', '2', '3', '4', '5', '7', '8', '9', '10', '11'],
'test_id': [51, 15, 15, 61, 16, 14, 15, 1, 61, 16]}
data1 = pd.DataFrame(raw_data_1, columns = ['subject_id', 'first_name', 'last_name'])
data2 = pd.DataFrame(raw_data_2, columns = ['subject_id', 'first_name', 'last_name'])
data3 = pd.DataFrame(raw_data_3, columns = ['subject_id','test_id'])
# pandas.concat()通常用来连接DataFrame对象。默认情况下是对两个DataFrame对象进行纵向连接,
# 当然通过设置参数,也可以通过它实现DataFrame对象的横向连接。
all_data = pd.concat([data1, data2])
all_data
all_data_col = pd.concat([data1, data2], axis = 1)
all_data_col
pd.merge(all_data, data3, on='subject_id')
pd.merge(data1, data2, on='subject_id', how='inner')
pd.merge(data1, data2, on='subject_id', how='outer')
cars = cars1.append(cars2)
cars
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners