笔记来源: pandas官网书籍
simple_dict={'a':1,'b':2,'c':3}
complie_dict={'a':{'b':2,'c':3},'d':{'e':4,'f':5}}
import pandas as pd
# 转为序列
pd.Series(simple_dict)
"""
a 1
b 2
c 3
dtype: int64
"""
# 指定索引
pd.Series(simple_dict,index=['a','c','b'])
"""
a 1
c 3
b 2
dtype: int64
"""
# 转为矩阵
pd.DataFrame(complie_dict)
"""
a d
b 2.0 NaN
c 3.0 NaN
e NaN 4.0
f NaN 5.0
"""
# 设置值
pd.Series([1, 2, 3, 4])
"""
0 1
1 2
2 3
3 4
dtype: int64
"""
# 设置值和索引
pd.Series([1, 2, 3, 4],index=['a','b','c','d'])
"""
a 1
b 2
c 3
d 4
dtype: int64
"""
obj=pd.Series([1,2,3,4],index=['a','b','c','d'])
#获取值
obj.values
#获取索引
obj.index
"""
array([1, 2, 3, 4], dtype=int64)
Index(['a', 'b', 'c', 'd'], dtype='object')
"""
# 单个索引
obj['a']
"""
1
"""
# 多个索引
obj['a':'b']
obj[['a','b']]
"""
a 1
b 2
dtype: int64
"""
# 单个索引
obj[0]
"""
1
"""
# 多个索引
obj[0:2]
obj[[0,1]]
"""
a 1
b 2
dtype: int64
"""
# 值大于1的返回
obj[obj>1]
"""
b 2
c 3
d 4
dtype: int64
"""
# 通过索引修改值
obj['a']=100
obj[0]=100
"""
a 100
b 2
c 3
d 4
dtype: int64
"""
# 多个索引修改值
obj['a':'c']=[100,200,300]
obj[['a','b','c']]=[100,200,300]
obj[0:3]=[100,200,300]
"""
a 100
b 200
c 300
d 4
dtype: int64
"""
# 添加索引和值
obj['f']=5
"""
a 100
b 200
c 300
d 4
f 5
dtype: int64
"""
# 查看索引是否在序列中
'd' in obj
"""
True
"""
# 查看值是否在序列中
obj[obj==4]
"""
d 4
dtype: int64
"""
#是否为空
obj.isnull()
"""
a False
b False
c False
d False
f False
dtype: bool
"""
#是否不空
obj.notnull()
"""
a True
b True
c True
d True
f True
dtype: bool
"""
#矩阵(只有值)
frame=pd.DataFrame(np.arange(9).reshape(3,3))
"""
0 1 2
0 0 1 2
1 3 4 5
2 6 7 8
"""
# 矩阵(有值、行索引)
frame=pd.DataFrame(np.arange(9).reshape(3,3),index=['jasmine','qiqi','jasmine_qiqi'])
"""
0 1 2
jasmine 0 1 2
qiqi 3 4 5
jasmine_qiqi 6 7 8
"""
# 矩阵(有值、行列索引)
frame=pd.DataFrame(np.arange(9).reshape(3,3),index=['jasmine','qiqi','jasmine_qiqi'],columns=['name','age','sex'])
"""
0 1 2
jasmine 0 1 2
qiqi 3 4 5
jasmine_qiqi 6 7 8
"""
# 获取值
frame.values
"""
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
"""
# 获取行索引
frame.index
"""
Index(['jasmine', 'qiqi', 'jasmine_qiqi'], dtype='object')
"""
# 获取列索引
frame.columns
"""
Index(['name', 'age', 'sex'], dtype='object')
"""
# 单个行
frame.loc['jasmine']
"""
name 0
age 1
sex 2
Name: jasmine, dtype: int32
"""
# 多个行
frame.loc['jasmine':'jasmine_qiqi']
"""
name age sex
jasmine 0 1 2
qiqi 3 4 5
jasmine_qiqi 6 7 8
"""
# 选定行和列
frame.loc[['jasmine','qiqi'],['name','age']]
# 也可以通过切片器选定行和列,效果是一样的
frame.loc[:'qiqi',:'age']
"""
name age
jasmine 0 1
qiqi 3 4
"""
# 获取前2行
frame.iloc[0:2]
"""
name age sex
jasmine 0 1 2
qiqi 3 4 5
"""
# 选定行和列(前2行,前2列)
frame.iloc[:2,:2]
"""
name age
jasmine 0 1
qiqi 3 4
"""
# 单个列
frame['name']
frame.name
"""
jasmine 0
qiqi 3
jasmine_qiqi 6
Name: name, dtype: int32
"""
# 多个列
frame.loc[:,['name','age']]
"""
name age
jasmine 0 1
qiqi 3 4
jasmine_qiqi 6 7
"""
# 添加行
frame.loc['QiQi']=[9,10,11]
"""
name age sex
jasmine 0.0 1.0 2.0
qiqi 3.0 4.0 5.0
jasmine_qiqi 6.0 7.0 8.0
QiQi 9.0 10.0 11.0
"""
# 删除行,axios默认为index
frame.drop('QiQi',axis='index')
"""
name age sex
jasmine 0.0 1.0 2.0
qiqi 3.0 4.0 5.0
jasmine_qiqi 6.0 7.0 8.0
"""
# 真正的删除行
frame.drop('QiQi',axis='index',inplace=True)
"""
name age sex
jasmine 0.0 1.0 2.0
qiqi 3.0 4.0 5.0
jasmine_qiqi 6.0 7.0 8.0
"""
# 添加列
frame['telephone']='123456'
"""
name age sex telephone
jasmine 0 1 2 123456
qiqi 3 4 5 123456
jasmine_qiqi 6 7 8 123456
"""
# 删除列
frame.drop('telephone',axis='columns')
"""
name age sex
jasmine 0 1 2
qiqi 3 4 5
jasmine_qiqi 6 7 8
"""
# 真正的删除列
frame.drop('QiQi',axis='columns',inplace=True)
"""
name age sex
jasmine 0 1 2
qiqi 3 4 5
jasmine_qiqi 6 7 8
"""
#转置
frame.T
"""
jasmine qiqi jasmine_qiqi
name 0.0 3.0 6.0
age 1.0 4.0 7.0
sex 2.0 5.0 8.0
"""
# 选取前5行
frame.head()
"""
name age sex
jasmine 0 1 2
qiqi 3 4 5
jasmine_qiqi 6 7 8
"""
# 字典
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
"""
{'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
"""
# 字典-》序列
series=pd.Series(data)
"""
state [Ohio, Ohio, Ohio, Nevada, Nevada, Nevada]
year [2000, 2001, 2002, 2001, 2002, 2003]
pop [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]
dtype: object
"""
# 字典-》矩阵
frame = pd.DataFrame(data)
"""
state year pop
0 Ohio 2000 1.5
1 Ohio 2001 1.7
2 Ohio 2002 3.6
3 Nevada 2001 2.4
4 Nevada 2002 2.9
5 Nevada 2003 3.2
"""