目标在本课程项目中,学生将单独构建人脸识别系统系统学生需要应用主成分分析(PCA)来执行数据降维和可视化,以了解底层数据分布。然后,要求学生训练并应用三种分类模型——线性判别分析(LDA)、支持向量机(SVM)和卷积神经网络(CNN)–对人脸图像进行分类。通过该项目,学生有望获得基本的以及当前流行的模式识别技术的重要知识。编程语言学生可以在项目中使用他们选择的任何语言,尽管至少从MATLAB或Python开始。建议使用MATLAB,因为它提供了一个简单、完整的环境,以及可视化结果。特别是,可以在项目并在网上找到。
【numpy手写系列】用PCA、LDA、SVM与CNN手写人脸识别代码
• PCA based data distribution visualization
• PCA plus nearest neighbor classifification results
• PCA based data distribution visualization
• PCA plus nearest neighbor classifification results
• LDA based data distribution visualization
• LDA plus nearest neighbor classifification results 编辑
请务必按照以下结构构建工作区域,取名请随意。

项目将在CMU PIE数据集上进行,使用70%的所提供的图像用于训练,剩余的30%用于测试。PIE是一个人脸识别数据集,内含68个人,每个人有170张图片,随机抽样一个人取出2张图片看看:
提取码:3x2z
https://pan.baidu.com/s/172uo2gLluo47wBTbn3LfHA%C2%A0
原始人脸图像的大小为32×32像素,因此每个图像的1024维矢量年龄从CMU PIE训练集中随机抽取500张图片和您自己的照片。应用PCA将矢量化图像的维数分别降低到2和3。可视化二维和三维绘图中的投影数据矢量。突出显示与照片还可视化用于降维的相应3个特征面。然后应用PCA将人脸图像的维数分别降低到40、80和200。使用最近邻规则对测试图像进行分类。报告分类准确性分别在CMU PIE测试图像和您自己的照片上。
- import os
- from numpy import *
- import numpy as np
- import cv2
- import matplotlib.pyplot as plt
- from pylab import mpl
-
- def img2vector(im_path):
- im = cv2.imread(im_path, 0)
- rows, cols = im.shape
- imgVector = np.zeros((1, rows * cols))
- imgVector = np.reshape(im, (1, rows * cols))
- return imgVector
-
- def load_dataset(k):
- faceData_path = '../PIE'
- faceData_length = len(os.listdir(faceData_path))
- train_face = np.zeros((faceData_length * k, 32 * 32))
- train_label = np.zeros(faceData_length * k)
- train_my_face = np.zeros((7, 32 * 32))
- train_my_label = np.zeros(7)
- test_face = np.zeros((faceData_length * (170 - k), 32 * 32))
- test_label = np.zeros(faceData_length * (170 - k))
- test_my_face = np.zeros((3, 32 * 32))
- test_my_label = np.zeros(3)
- sample = random.permutation(170) + 1
-
- for i in range(faceData_length):
- people_idx = i + 1
- if people_idx == 26:
- for j in range(10):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(j+1) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < 7:
- train_my_face[j, :] = faceVector_im
- train_my_label[j] = people_idx
- else:
- test_my_face[10-j-1, :] = faceVector_im
- test_my_label[10-j-1] = people_idx
- else:
- for j in range(170):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(sample[j]) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < k:
- train_face[i*k+j, :] = faceVector_im
- train_label[i*k + j] = people_idx
- else:
- test_face[i*(170-k) + (j-k), :] = faceVector_im
- test_label[i*(170-k) + (j-k)] = people_idx
-
- return train_face, train_label, test_face, test_label, train_my_face, train_my_label, test_my_face, test_my_label
-
- def PCA_fit(data, r):
- data = np.float32(np.mat(data))
- rows, cols = np.shape(data)
- data_mean = np.mean(data, 0)
- A = data - np.tile(data_mean, (rows, 1))
- C = A * A.T
- D, V = np.linalg.eig(C)
- V_r = V[:, 0:r]
- V_r = A.T * V_r
- for i in range(r):
- V_r[:, i] = V_r[:, i] / np.linalg.norm(V_r[:, i])
-
- final_data = A * V_r
- return final_data, data_mean, V_r
-
- if __name__ == "__main__":
- for r in [2, 3]:
- print('dimensionality of face images to {}:'.format(r))
- x_value = []
- y_value = []
- k = int(170*0.9)
- train_face, train_label, test_face, test_label, train_my_face, train_my_label, test_my_face, test_my_label = load_dataset(k)
- data_train_new, data_mean, V_r = PCA_fit(train_face[100:593], r)
- data_train_my_new, data_my_mean, my_V_r = PCA_fit(train_my_face, r)
-
- data_train_new = np.array(data_train_new).astype(float)
- data_my_train_new = np.array(data_train_my_new).astype(float)
-
- if r == 2:
- x_plot, y_plot = [], []
- x_my_plot, y_my_plot = [], []
- for data in data_train_new:
- x_plot.append(data[0])
- y_plot.append(data[1])
- for my_data in data_my_train_new:
- x_my_plot.append(my_data[0])
- y_my_plot.append(my_data[1])
- fig = plt.figure()
- plt.scatter(x_plot, y_plot)
- plt.scatter(x_my_plot, y_my_plot, marker='+')
- plt.show()
-
- if r ==3 :
- x_plot, y_plot, z_plot = [], [], []
- x_my_plot, y_my_plot, z_my_plot = [], [], []
- for data in data_train_new:
- x_plot.append(data[0])
- y_plot.append(data[1])
- z_plot.append(data[2])
- for my_data in data_my_train_new:
- x_my_plot.append(data[0])
- y_my_plot.append(data[1])
- z_my_plot.append(data[2])
- fig = plt.figure()
- ax = fig.add_subplot(111,projection='3d')
- plt.set_cmap(plt.get_cmap("seismic", 100))
- ax.scatter(x_plot, y_plot, z_plot, marker='o')
- ax.scatter(x_my_plot, y_my_plot, z_my_plot, marker='+')
- ax.set_xlabel('x')
- ax.set_ylabel('y')
- ax.set_zlabel('z')
- plt.show()
-
-
-
- import os
- from numpy import *
- import numpy as np
- import cv2
- import matplotlib.pyplot as plt
- from pylab import mpl
-
- def img2vector(im_path):
- im = cv2.imread(im_path, 0)
- rows, cols = im.shape
- imgVector = np.zeros((1, rows * cols))
- imgVector = np.reshape(im, (1, rows * cols))
- return imgVector
- def load_dataset(k):
- faceData_path = '../PIE'
- faceData_length = len(os.listdir(faceData_path))
- train_face = np.zeros((faceData_length * k, 32 * 32))
- train_label = np.zeros(faceData_length * k)
- train_my_face = np.zeros((7, 32 * 32))
- train_my_label = np.zeros(7)
- test_face = np.zeros((faceData_length * (170 - k), 32 * 32))
- test_label = np.zeros(faceData_length * (170 - k))
- test_my_face = np.zeros((3, 32 * 32))
- test_my_label = np.zeros(3)
- sample = random.permutation(170) + 1
-
- for i in range(faceData_length):
- people_idx = i + 1
- if people_idx == 26:
- for j in range(10):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(j+1) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < 7:
- train_my_face[j, :] = faceVector_im
- train_my_label[j] = people_idx
- else:
- test_my_face[10-j-1, :] = faceVector_im
- test_my_label[10-j-1] = people_idx
- else:
- for j in range(170):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(sample[j]) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < k:
- train_face[i*k+j, :] = faceVector_im
- train_label[i*k + j] = people_idx
- else:
- test_face[i*(170-k) + (j-k), :] = faceVector_im
- test_label[i*(170-k) + (j-k)] = people_idx
-
- return train_face, train_label, test_face, test_label, train_my_face, train_my_label, test_my_face, test_my_label
-
- def PCA_fit(data, r ,flag =True):
- data = np.float32(np.mat(data))
- rows, cols = np.shape(data)
- data_mean = np.mean(data, 0)
- A = data - np.tile(data_mean, (rows, 1))
- C = A * A.T
- D, V = np.linalg.eig(C)
- V_r = V[:, 0:r]
- V_r = A.T * V_r
- if flag:
- for i in range(r):
- V_r[i, :] = V_r[i, :] / np.linalg.norm(V_r[i, :])
- else:
- for i in range(r):
- V_r[:, i] = V_r[:, i] / np.linalg.norm(V_r[:, i])
-
- final_data = A * V_r
- return final_data, data_mean, V_r
-
- if __name__ == "__main__":
- for r in [40, 80, 200]:
- print('dimensionality of face images to {}:'.format(r))
- x_value = []
- y_value = []
- k = int(170*0.7)
- train_face, train_label, test_face, test_label, train_my_face, train_my_label, test_my_face, test_my_label = load_dataset(k)
-
- data_train_new, data_mean, V_r = PCA_fit(train_face, r, flag=False)
- # my
- data_my_train_new, data_my_mean, my_V_r = PCA_fit(train_my_face, r)
-
- num_train = data_train_new.shape[0]
- num_test = test_face.shape[0]
- # my
- num_my_train = data_my_train_new.shape[0]
- num_my_test = test_my_face.shape[0]
-
- temp_face = test_face - np.tile(data_mean, (num_test, 1))
- # my
- temp_my_face = test_my_face - np.tile(data_my_mean, (num_my_test, 1))
-
- data_test_new = temp_face * V_r
- data_test_new = np.array(data_test_new)
- data_train_new = np.array(data_train_new)
- # my
- data_my_test_new = temp_my_face * my_V_r
- data_my_test_new = np.array(data_my_test_new)
- data_my_train_new = np.array(data_my_train_new)
-
- # -
- is_true_count = 0
- for i in range(num_test):
- testFace = data_test_new[i, :]
- diffMat = data_train_new - np.tile(testFace, (num_train, 1))
- sqDiffMat = diffMat ** 2
- sqDistances = sqDiffMat.sum(axis=1)
- sortedDistIndicies = sqDistances.argsort()
- indexMin = sortedDistIndicies[0]
- if train_label[indexMin] == test_label[i]:
- is_true_count += 1
- else:
- pass
-
- accuracy = float(is_true_count) / num_test
- x_value.append(k)
- y_value.append(round(accuracy, 2))
-
- print('The classify accuracy is: {:.2f}'.format(accuracy*100))
- # my
- is_my_true_count = 0
- for i in range(num_my_test):
- testFace = data_my_test_new[i, :]
- diffMat = data_my_train_new - np.tile(testFace, (num_my_train, 1))
- sqDiffMat = diffMat ** 2
- sqDistances = sqDiffMat.sum(axis=1)
- sortedDistIndicies = sqDistances.argsort()
- indexMin = sortedDistIndicies[0]
-
- if train_my_label[indexMin] == test_my_label[i]:
- is_my_true_count += 1
- else:
- pass
- accuracy = float(is_my_true_count) / num_my_test
- x_value.append(k)
- y_value.append(round(accuracy, 2))
- print('MY :The classify accuracy is: {:.2f}'.format(accuracy*100))

应用LDA将数据维度从2、3和9降低维度分别为2和3的采样数据(如PCA部分)(类似于PCA)。分别报告维度为2、3和9的数据的分类精度,基于最近邻分类器。报告CMU PIE测试的分类准确性图片和你自己的照片分开。
这题不同于PCA那题,因为需要降低维度刚好是可视化需要的2,3,所以一个代码回答了两个问题。
- import numpy as np
- import matplotlib.pyplot as plt
- import cv2
- from numpy import *
- import os
- def img2vector(im_path):
- im = cv2.imread(im_path, 0)
- rows, cols = im.shape
- imgVector = np.zeros((1, rows * cols))
- imgVector = np.reshape(im, (1, rows * cols))
- return imgVector
-
- def load_dataset(k):
- faceData_path = '../PIE'
- faceData_length = len(os.listdir(faceData_path))
- train_face = np.zeros((faceData_length * k, 32 * 32))
- train_label = np.zeros(faceData_length * k)
- train_my_face = np.zeros((7, 32 * 32))
- train_my_label = np.zeros(7)
- test_face = np.zeros((faceData_length * (170 - k), 32 * 32))
- test_label = np.zeros(faceData_length * (170 - k))
- test_my_face = np.zeros((3, 32 * 32))
- test_my_label = np.zeros(3)
- sample = random.permutation(170) + 1
-
- for i in range(faceData_length):
- people_idx = i + 1
- if people_idx == 26:
- for j in range(10):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(j+1) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < 7:
- train_my_face[j, :] = faceVector_im
- train_my_label[j] = people_idx
- else:
- test_my_face[10-j-1, :] = faceVector_im
- test_my_label[10-j-1] = people_idx
- else:
- for j in range(170):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(sample[j]) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < k:
- train_face[i*k+j, :] = faceVector_im
- train_label[i*k + j] = people_idx
- else:
- test_face[i*(170-k) + (j-k), :] = faceVector_im
- test_label[i*(170-k) + (j-k)] = people_idx
-
- return train_face, train_label, test_face, test_label, train_my_face, train_my_label, test_my_face, test_my_label
-
-
- def LDA(face,label,k):
- x, y = face.shape
- classes = np.unique(label)
- meanAll = face.mean(axis=0)
- Sb = np.zeros((y, y),dtype = np.float32)
- Sw = np.zeros((y, y), dtype=np.float32)
- for i in classes:
- face_i = face[np.where(label == i)[0],:]
- mean_i = face_i.mean(axis = 0)
- n = face_i.shape[0]
- Sw = Sw + np.dot((face_i - mean_i).T,(face_i-mean_i))
- Sb = Sb + n*np.dot((mean_i - meanAll).T,(mean_i - meanAll))
- #print("Sw");print(Sw);print("end");print("Sb");print(Sb);print("end")
- tmp = np.ones(Sw.shape)/500
- Sw = tmp + Sw
- matrix = np.linalg.inv(Sw) @ Sb
- eigenvalue,eigenvector = np.linalg.eigh(matrix)
- #print("vec");print(eigenvector);print("end")
- index = np.argsort(eigenvalue) # 排序
- index = index[:-(k + 1):-1]
- select = eigenvector[:, index]
- return select
-
- def recognizeMy_lda():
- eachNum = int(10*0.7) # 每个人拿出eachNum张照片进行训练
- _,_,_,_,train_my_face, train_my_label, test_my_face, test_my_label= load_dataset(int(170*0.7))
-
- for k in [2, 3, 9]:
- print('when k={}:'.format(k), end=' ')
-
- W = LDA(train_my_face, train_my_label, k)
-
- train = np.dot(train_my_face, W)
- test = np.dot(test_my_face, W)
-
- if k == 2:
- x_plot, y_plot = [], []
- for data in train:
- x_plot.append(data[0])
- y_plot.append(data[1])
- fig = plt.figure()
- plt.scatter(x_plot, y_plot)
- plt.show()
- if k ==3 :
- x_plot, y_plot, z_plot = [], [], []
- for data in train:
- x_plot.append(data[0])
- y_plot.append(data[1])
- z_plot.append(data[2])
- fig = plt.figure()
- ax = fig.add_subplot(111,projection='3d')
- plt.set_cmap(plt.get_cmap("seismic", 100))
- ax.scatter(x_plot, y_plot, z_plot)
- ax.set_xlabel('x')
- ax.set_ylabel('y')
- ax.set_zlabel('z')
- plt.show()
- acc = KNN(train,train_my_label,test,test_my_label,eachNum)
- print("the acc=", acc*100);
-
- def recognize_lda():
- eachNum = int(170*0.7) # 每个人拿出eachNum张照片进行训练
- train_face, train_label, test_face, test_label,_,_,_,_ = load_dataset(eachNum)
-
- for k in [2, 3, 9]:
- print('when k={}:'.format(k), end=' ')
- W = LDA(train_face, train_label, k)
- train = np.dot(train_face, W)
- test = np.dot(test_face, W)
- if k == 2:
- x_plot, y_plot = [], []
- for data in train[:-501]:
- x_plot.append(data[0])
- y_plot.append(data[1])
- fig = plt.figure()
- plt.scatter(x_plot, y_plot)
- plt.show()
- if k ==3 :
- x_plot, y_plot, z_plot = [], [], []
- for data in train[:-501]:
- x_plot.append(data[0])
- y_plot.append(data[1])
- z_plot.append(data[2])
- fig = plt.figure()
- ax = fig.add_subplot(111,projection='3d')
- plt.set_cmap(plt.get_cmap("seismic", 100))
- im = ax.scatter(x_plot, y_plot, z_plot)
- ax.set_xlabel('x')
- ax.set_ylabel('y')
- ax.set_zlabel('z')
- plt.show()
- acc = KNN(train,train_label,test,test_label,eachNum)
- print("the acc=", acc*100);
-
- def KNN_classify(face,train_face,train_label,k):
- dis = np.sum(np.power((train_face - face),2),axis=1)
- index = np.argsort(dis)[:k]
- weight = []
- for i in range(k):
- weight.append(dis[index[k-1]] - dis[index[i]] / (dis[index[k-1]] - dis[index[0]]))
- count = [0 for i in range (10000000)]
- tmp = 0
- for i in index:
- count[int(train_label[i])] += 1 + weight[tmp]
- tmp += 1
- label = np.argmax(count);
- return label
-
- def KNN(train_faces, train_labels,test_faces, test_labels, k):
- sum = test_faces.shape[0]
- err = 0
- for i in range(sum):
- count = KNN_classify(test_faces[i],train_faces,train_labels,k)
- if count != test_labels[i]:
- err += 1
- acc = (sum - err) / sum
- return acc
-
- if __name__ == "__main__":
- recognizeMy_lda()
- # recognize_lda()


使用原始面部图像(矢量化)和PCA预处理后的面部向量(维度为80和200)作为线性SVM的输入。惩罚参数C的尝试值{1×10英寸−2, 1 × 10−1, 1}. 报告不同参数和尺寸。讨论数据维度和参数C对最终分类的影响精确。
- import os
- from numpy import *
- import numpy as np
- import cv2
- import matplotlib.pyplot as plt
- from pylab import mpl
-
- def img2vector(im_path):
- im = cv2.imread(im_path, 0)
- rows, cols = im.shape
- imgVector = np.zeros((1, rows * cols))
- imgVector = np.reshape(im, (1, rows * cols))
- return imgVector
-
- def load_dataset(k):
- faceData_path = '../PIE'
- faceData_length = len(os.listdir(faceData_path))
- train_face = np.zeros((faceData_length * k, 32 * 32))
- train_label = np.zeros(faceData_length * k)
- test_face = np.zeros((faceData_length * (170 - k), 32 * 32))
- test_label = np.zeros(faceData_length * (170 - k))
-
- sample = random.permutation(170) + 1
-
- for i in range(faceData_length):
- people_idx = i + 1
- if people_idx == 26:
- for j in range(10):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(j+1) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j <= 7:
- train_face[i*k+j, :] = faceVector_im
- train_label[i*k + j] = people_idx
- else:
- test_face[i*(170-k) + (j-k), :] = faceVector_im
- test_label[i*(170-k) + (j-k)] = people_idx
-
- for j in range(170):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(sample[j]) + '.jpg')
- faceVector_im = img2vector(face_im_path)
- if j < k:
- train_face[i*k+j, :] = faceVector_im
- train_label[i*k + j] = people_idx
- else:
- test_face[i*(170-k) + (j-k), :] = faceVector_im
- test_label[i*(170-k) + (j-k)] = people_idx
-
- return train_face, train_label, test_face, test_label
-
- def PCA_fit(data, r):
- data = np.float32(np.mat(data))
- rows, cols = np.shape(data)
- data_mean = np.mean(data, 0)
- A = data - np.tile(data_mean, (rows, 1))
- C = A * A.T
- D, V = np.linalg.eig(C)
- V_r = V[:, 0:r]
- V_r = A.T * V_r
- for i in range(r):
- V_r[:, i] = V_r[:, i] / np.linalg.norm(V_r[:, i])
-
- final_data = A * V_r
- return final_data, data_mean, V_r
-
- if __name__ == "__main__":
- for r in [80, 200]:
- for c in [0.01, 0.1, 1]:
- print('dimensionality of face images to {}, the value of parameter C is: {}'.format(r, c))
- x_value = []
- y_value = []
- k = int(170*0.7)
- train_face, train_label, test_face, test_label = load_dataset(k)
- data_train_new, data_mean, V_r = PCA_fit(train_face, r)
- data_train_new = np.array(data_train_new).astype(float)
- from sklearn import svm
- lin = svm.SVC(kernel='linear', C=c)
- lin.fit(data_train_new, train_label)
- predict = lin.predict(data_train_new)
- acc = sum(train_label==predict) // data_train_new.shape[0]
- print('accuarcy:{}'.format(acc*100))

训练具有两个卷积层和一个完全连接层的CNN,其架构如下:number节点数:20-50-500-21。最后一层的节点数固定为21执行21个类别(20个CMU PIE人脸加1个自己)分类。卷积内核大小设置为5。每个卷积层后面是一个最大池层内核大小为2,步幅为2。完全连接层之后是ReLU。训练网络并报告最终分类性能。
- import tensorflow as tf
- import os
- from numpy import *
- import numpy as np
- import cv2
-
- def load_dataset(k):
- faceData_path = '../PIE'
- faceData_length = len(os.listdir(faceData_path))
-
- train_face, train_label = [], []
- test_face, test_label = [], []
- sample = random.permutation(170) + 1
- # 48, 68
- for i in range(faceData_length-20, faceData_length):
- people_idx = i + 1
- for j in range(170):
- face_im_path = os.path.join(faceData_path, str(people_idx), str(sample[j]) + '.jpg')
- faceVector_im = cv2.imread(face_im_path)
- if j < k:
- train_face.append(faceVector_im)
- train_label.append(people_idx-(faceData_length-20))
- else:
- test_face.append(faceVector_im)
- test_label.append(people_idx-(faceData_length-20))
-
- return np.array(train_face).astype(float), np.array(train_label), np.array(test_face).astype(float), np.array(test_label)
-
- if __name__ == '__main__':
- train_face, train_label, test_face, test_label = load_dataset(int(170*0.7))
-
- model = tf.keras.models.Sequential([
- tf.keras.layers.Conv2D(filters=20,kernel_size=(5,5),padding='same',input_shape=(32, 32, 3),activation='relu'),
- tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),
- tf.keras.layers.Conv2D(filters=50,kernel_size=(5,5),padding='same',activation='relu'),
- tf.keras.layers.MaxPool2D(pool_size=(2,2), strides=2),
- tf.keras.layers.Flatten(),
- tf.keras.layers.Dense(500, activation='relu'),
- tf.keras.layers.Dense(21,activation='softmax')])
-
- model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
- model.fit(train_face, train_label, epochs=5) #训练模型
- model.evaluate(test_face, test_label, batch_size=32,verbose=2)
-
