Numpy学习笔记

项目场景：

Numpy 学习是深度学习的基础，因为无论pytorch还是tensorflow他们的数据形式都是ndarray形式，都是几乘法几维的，这里觉得Numpy主要的就是[ , ]前面是行后面是列，也可以叫做前面的0轴，后面是1轴，其他的就是按照代码格式找找敲一下就行。还有就是reshape变化维度信息。

numpy数据类型：

# coding=utf-8
import numpy as np
import random

#使用numpy生成数组,得到ndarray的类型
t1 = np.array([1,2,3,])
print(t1)
print(type(t1))

t2 = np.array(range(10)).reshape(2,5)
print(t2)
print(type(t2))
# 快速生成和array一样
t3 = np.arange(4,10,2)
print(t3)
print(type(t3))
#numpy中的数据类型
t4 = np.array(range(1,4),dtype="i1")
print(t4)
print(t4.dtype)
#numpy中的bool类型
t5 = np.array([1,1,0,1,0,0],dtype=bool)
print(t5)
print(t5.dtype)
#调整数据类型
t6 = t5.astype("int8")
print(t6)
print(t6.dtype)
#numpy中的小数
t7 = np.array([random.random() for i in range(10)])
print(t7)
print(t7.dtype)
# 取两位小数
t8 = np.round(t7,2)
print(t8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

nan处理：

NAN是指非数字形式，可能是字符串这些，在pandas中有更简单的处理方式，这边是将NAN重新赋值成这一行或者这一列的平均数。

# coding=utf-8
import numpy as np

def fill_ndarray(t1):
    for i in range(t1.shape[1]):
        temp_col = t1[:,i] # 当前的一列
        nan_num = np.count_nonzero(temp_col!=temp_col)
        if nan_num !=0: #不为0，说明当前这一列存在NAN
            temp_not_nan_col = temp_col[temp_col == temp_col] # 当前这一列不为nan
            # 选中当前为nan的位置，把值赋值为不为nan的均值
            temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
    return t1
if __name__ == '__main__':
    t1 = np.arange(12).reshape((3, 4)).astype("float")
    t1[1, 2:] = np.nan
    print(t1)
    t1 = fill_ndarray(t1)
    print(t1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

数据拼接：

将两个ndarray数据进行拼接

# coding=utf-8
import numpy as np

us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"

# unpack=True 转置函数，一行变一列
# t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
t2 = np.loadtxt(us_file_path,delimiter=",",dtype="int")

# print(t1)
print(t2)

print("*"*100)
#取行
#print(t2[2])
#取连续的多行
# print(t2[2:])

#取不连续的多行
# print(t2[[2,8,10]])
# ,前面是行,后面是列
# print(t2[1,:])
# print(t2[2:,:])
# print(t2[[2,10,3],:])
#取列
# print(t2[:,0])
#取连续的多列
# print(t2[:,2:])
#取不连续的多列
# print(t2[:,[0,2]])
#去行和列，取第3行，第四列的值
# a = t2[2,3]
# print(a)
# print(type(a))

#取多行和多列，取第3行到第五行，第2列到第4列的结果
#去的是行和列交叉点的位置
b = t2[2:5,1:4]
# print(b)

#取多个不相邻的点
#选出来的结果是（0，0） （2，1） （2，3）
c = t2[[0,2,2],[0,1,3]]
print(c)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

综合查询：

这里说一下随机种子，随机种子是什么，就是我对这个ndarray设置随机种子后，他之后无论在哪返回只要是这个随机种子中的值，那么返回的ndarray都是一样的。

# coding=utf-8
import numpy as np

us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"
# 加载国家数据
us_data = np.loadtxt(us_file_path,delimiter=",",dtype=int)
uk_data = np.loadtxt(uk_file_path,delimiter=",",dtype=int)
# 添加国家信息
# 构造全为0的数据
zero_data = np.zeros((us_data.shape[0],1)).astype(int)
one_data = np.ones((uk_data.shape[0],1)).astype(int)
# print(zero_data)
# 分别添加一列数据
us_data = np.hstack((us_data,zero_data))
uk_data = np.hstack((uk_data,one_data))
# 拼接两组数据
final_data = np.vstack((us_data,uk_data))
# print(final_data)
# 将所有值为1 替换为-1
# final_data[t==1]=-1
# 横方向axis=0   纵方向为axis=1 最大值
# np.argmax(final_data,axis=0)
# 4h5l 10-20的整数
# np.random.randint(10,20,(4,5))
# 随机种子
np.random.seed(10)
t = np.random.randint(0,20,(4,5))
print(t)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

结合直方图：

# coding=utf-8
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
matplotlib.rc('font', family='MicroSoft YaHei',weight='bold')
#设置图形大小
plt.figure(figsize=(20,8),dpi=80)
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"

t_us = np.loadtxt(us_file_path,delimiter=",",dtype=int)
# 取评论数数据
t_us_comments = t_us[:,-1]

# 选择比5000小的数据
t_us_comments = t_us_comments[t_us_comments<=5000]

d = 50  #组距
num_bins = (max(t_us_comments)-min(t_us_comments))//d
print(max(t_us_comments),min(t_us_comments),max(t_us_comments)-min(t_us_comments))
print(num_bins)
plt.hist(t_us_comments,num_bins)
plt.savefig("./1.png")
plt.show()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

在这里插入图片描述

结合散点图：

# coding=utf-8
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
matplotlib.rc('font', family='MicroSoft YaHei',weight='bold')
#设置图形大小
plt.figure(figsize=(20,8),dpi=80)
us_file_path = "./youtube_video_data/US_video_data_numbers.csv"
uk_file_path = "./youtube_video_data/GB_video_data_numbers.csv"

t_uk = np.loadtxt(uk_file_path,delimiter=",",dtype=int)
# 选择比50000小的数据
t_uk = t_uk[t_uk[:,1]<=50000]

# 取评论数数据和喜欢数数据
t_uk_comments = t_uk[:,-1]
t_uk_like = t_uk[:,1]

plt.scatter(t_uk_like,t_uk_comments)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

相关阅读:
Linux下例行性工作自结
 核酸检测多少人为一组混检合适？
图论------如何使用矩阵来存储图的信息（邻接矩阵表示法）。
OceanBase 分布式数据库【信创/国产化】- OceanBase V4.3 里程碑版本
 如何使用docker pull命令从腾讯云镜像加速源拉取镜像，以提高下载速度？
天锐绿盾数据防泄密系统
 限流与下载接口请求数控制
 C语言之extern关键字实例总结(八十二)
LeaRun.Java快速开发平台高效代码自动化生成
 湖泊河道水质蓝藻浮漂监测案例
原文地址：https://blog.csdn.net/weixin_43636034/article/details/126741318