import matplotlib.pyplot as plt
x=["1-3","4-6","7-9","10-12"] #设置X轴数值
y=[257,301,428,475] #设置Y轴数值
plt.plot(x,y) #绘制折线图
plt.grid(True, linestyle='--', alpha=1) #添加网格线
plt.show() #展示
任务说明:
(1)NumPy提供的array函数可以创建一维数组或多维数组。使用array函数把如下图示的员工的年龄创建为一维数组
(2)使用数组的索引,分别获取“Mary”和“LiLi”的年龄
(3)对数组进行分割,同时获取“Mary”和“LiLi”的年龄、“LiLi”和“Cendy”的年龄以及“Mary”和“Cendy”的年龄
代码实现:
#(1)创建出数组
import numpy as np
score=np.array([["Mary",22,95.5],["LiLi",23,56],["Cendy",22,90]])
score
#(2)对数据进行切片(先行后列)
score[:2,1]
#(3)数组分割
任务说明:
(1)算术函数
使用NumPy 算术函数add()、subtract()、multiply()、divide()实现如下两个数组之间的加减乘除。
代码实现:
import numpy as np
n1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
n2 = np.array([1, 2, 3]) # 一维数组
n3=np.add(n1,n2)
n4=np.subtract(n1,n2)
n5=np.divide(n1,n2)
(2)排序函数
import numpy as np
dt = np.dtype([('name',"S10"),('age',int),('KPI',int)])
n1=np.array([('Mary', 22, 95.5), ('LiLi', 23, 56), ('Cendy', 22, 90)],dtype = dt)
n1.sort(axis=0,order="KPI")
n1
任务说明:以员工薪资构成为例,其包含基本薪资、绩效薪资和补贴,数据如下:
代码实现:
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df
import numpy as np
arr = np.array([4, 7, 1, 6, 1, 8, 4, 9, 1, 6])
arr1=np.unique(arr)
arr1
#array([1, 4, 6, 7, 8, 9])
任务说明:
代码实现:
#(1)使用loc属性和iloc属性抽取第一行数据
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.loc[0]
#(2)增加奖金列方式一:直接赋值法
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df['奖金']=['122','222','333']
df
#(2)增加奖金列方式二:loc属性法
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.loc[:,'奖金']=['122','222','333']
df
#(2)增加奖金列方式三:指定位置插入
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.insert(loc=2, column='奖金', value=100) # 在最后一列后,插入值全为3的c列
df
#(3)增加行数据:在指定行,加入指定数据
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.insert(loc=2, column='奖金', value=100) # 在最后一列后,插入值全为3的c列
df.loc[3]=['Alice','1000','1000','1000','100']
df
(3)增加多行数据
import pandas as pd
data=[['Mary',"2500","5800","500"],["LiLi","2500","7800","650"],["Gendy","2500","10500","200"]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.insert(loc=2, column='奖金', value=100) # 在最后一列后,插入值全为3的c列
df.loc[3]=['Alice','1000','1000','1000','100'] # 在指定行添加数据
data1=[['Liming',"2500","5800","500"],["LiLi1","2500","7800","650"],["Gendy1","2500","10500","200"]]
df1=pd.DataFrame(data1,columns=['姓名','基本薪资','绩效津贴','补贴'])
df1
df.append(df1,ignore_index=True)#让索引重写进行排序
任务说明:
1)删除缺失值
使用 dropna 函数删除含有缺失值的行
2)填充缺失值
DataFrame 对象中的 fillna 函数可以实现填充缺失数据
代码实现:
#(1)查看缺失值
import pandas as pd
df = pd.read_csv('property-data.csv')
df.info()
#(2)删除含有缺失值的行
import pandas as pd
df = pd.read_csv('property-data.csv')
new_df=df.dropna()
new_df
默认情况下,dropna() 方法返回一个新的 DataFrame,不会修改源数据。
#(3)对空值进行填充替代
import pandas as pd
df = pd.read_csv('property-data.csv')
df.fillna(12345, inplace = True)
print(df.to_string())
任务说明:
代码实现:
#(1)任务1:使用 DataFrame 对象的 sum 函数实现行/列数据的求和运算,效果如下:
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df['奖金']=['122','222','333']
df.loc[:,'总薪资']=df.sum(axis=1)
df
#(2)任务2:每行的平均数
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df['奖金']=['122','222','333']
df.loc[:,'总薪资']=df.mean(axis=1)
df
#该行最大值
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df['奖金']=['122','222','333']
df.loc[:,'总薪资']=df.max(axis=1)
df
#该行最小值
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df['奖金']=['122','222','333']
df.loc[:,'总薪资']=df.max(axis=0)
df
下面的内容为每一列的求和与计算:
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.loc[3]=df.iloc[:,1:5].max(axis=0)#求每一列的最大值
df
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.loc[3]=df.iloc[:,1:5].mean(axis=0)#求出每一列的平均值
df
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.loc[3]=df.iloc[:,1:5].min(axis=0)#求出每一列的最小值
df
import pandas as pd
data=[['Mary',2500,5800,500],["LiLi",2500,7800,650],["Gendy",2500,10500,200]]
df=pd.DataFrame(data,columns=['姓名','基本薪资','绩效津贴','补贴'])
df.loc[3]=df.iloc[:,1:5].sum(axis=0) #求出每一列的和
df
import matplotlib.pyplot as plt
import random # 画出温度变化图
# 0.准备x, y坐标的数据
x = range(60)
y_shanghai = [random.uniform(15, 18) for i in x]
# 创建画布
plt.figure(figsize=(20, 8), dpi=80)
# 绘制折线图
plt.plot(x, y_shanghai)
#添加网格
plt.grid(True, linestyle='--', alpha=1)
# 构造x轴刻度标签
x_ticks_label = ["11h{}min".format(i) for i in x]
# 构造y轴刻度
y_ticks = range(40)
# 修改x,y轴坐标的刻度显示
plt.xticks(x[::5], x_ticks_label[::5])
plt.yticks(y_ticks[::5])
# 增加北京的温度数据
y_beijing = [random.uniform(1, 3) for i in x]
# 绘制折线图
plt.plot(x, y_shanghai)
# 使用多次plot可以画多个折线
plt.plot(x, y_beijing, color='r', linestyle='--')
plt.xlabel("time")
plt.ylabel("temperature")
plt.title("temperature variation", fontsize=20)
# 显示图像
plt.show()