• 神经网络原理及代码实现


    1.深度学习

            机器学习流程: 数据获取、特征工程 建立模型、评估与应用

     特征工程的作用:

    数据特征决定了模型的上限

    预处理和特征提取是最核心的
    算法与参数选择决定了如何逼近这个上限

     传统特征提取方法:

     深度学习特征提取方式:

    2.线性函数

            从输入-->输出的映射

     每个类别的得分

     数学表示:

    计算方法:

     多组权重参数构成了决策边界

     

    3.损失函数 

    损失函数其实有很多种,我们来实验一个 

             

    如何损失函数的值相同,那么意味着两个模型一样吗?

     我们可以看到,不同的权重的损失函数值相同,因此,我们引入正则化消减权重的影响

    损失函数 = 数据损失 + 正则化惩罚项

     正则化惩罚项:

     4.Softmax分类器

            将得分值转换为概率值

    归一化: 

     计算损失值:

     

    前向传播  

    如何更新模型呢?这个就交给反向传播了(梯度下降) 

    反向传播的链式法则

    梯度是一步一步传的

    复杂的例子: 

    可以一大块一大块的计算吗?

    加法门单元:均等分配

    MAX门单元:给最大的

    乘法门单元:互换的感觉

    6.正则化的作用

    惩罚力度对结果的影响:

    加大惩罚力度,可以减小过拟合的风险

    参数个数对结果的影响:

    参数越多,一般情况下,拟合效果越好

     7.激活函数

    常用的激活函数(Sigmoid,Relu,Tanh 等)

     Sigmoid:梯度消失现象

     Relu:

     8.数据预处理

     不同的预处理结果会使得模型的效果发生很大的差异,通常要对数据进行归一化处理

    9.代码实现 

    神经网络最重要的部分为反向传播,反向传播公式如图所示:

     

    代码实现:

    1. import numpy as np
    2. from utils.features import prepare_for_training
    3. from utils.hypothesis import sigmoid, sigmoid_gradient
    4. class MultilayerPerceptron:
    5. def __init__(self,data,labels,layers,normalize_data =False):
    6. data_processed = prepare_for_training(data,normalize_data = normalize_data)[0]
    7. self.data= data_processed
    8. self.labels= labels
    9. self.layers= layers #784 25 10
    10. self.normalize_data= normalize_data
    11. self.thetas = MultilayerPerceptron.thetas_init(layers)
    12. def predict(self,data):
    13. data_processed = prepare_for_training(data,normalize_data = self.normalize_data)[0]
    14. num_examples = data_processed.shape[0]
    15. predictions = MultilayerPerceptron.feedforward_propagation(data_processed,self.thetas,self.layers)
    16. return np.argmax(predictions,axis=1).reshape((num_examples,1))
    17. def train(self,max_iterations=1000,alpha=0.1):
    18. unrolled_theta = MultilayerPerceptron.thetas_unroll(self.thetas)
    19. (optimized_theta,cost_history) = MultilayerPerceptron.gradient_descent(self.data,self.labels,unrolled_theta,self.layers,max_iterations,alpha)
    20. self.thetas = MultilayerPerceptron.thetas_roll(optimized_theta,self.layers)
    21. return self.thetas,cost_history
    22. @staticmethod
    23. def thetas_init(layers):
    24. num_layers = len(layers)
    25. thetas = {}
    26. for layer_index in range(num_layers - 1):
    27. """
    28. 会执行两次,得到两组参数矩阵:25*785 , 10*26
    29. """
    30. in_count = layers[layer_index]
    31. out_count = layers[layer_index+1]
    32. # 这里需要考虑到偏置项,记住一点偏置的个数跟输出的结果是一致的
    33. thetas[layer_index] = np.random.rand(out_count,in_count+1)*0.05 #随机进行初始化操作,值尽量小一点
    34. return thetas
    35. @staticmethod
    36. def thetas_unroll(thetas):
    37. num_theta_layers = len(thetas)
    38. unrolled_theta = np.array([])
    39. for theta_layer_index in range(num_theta_layers):
    40. unrolled_theta = np.hstack((unrolled_theta,thetas[theta_layer_index].flatten()))
    41. return unrolled_theta
    42. @staticmethod
    43. def gradient_descent(data,labels,unrolled_theta,layers,max_iterations,alpha):
    44. optimized_theta = unrolled_theta
    45. cost_history = []
    46. for _ in range(max_iterations):
    47. cost = MultilayerPerceptron.cost_function(data,labels,MultilayerPerceptron.thetas_roll(optimized_theta,layers),layers)
    48. cost_history.append(cost)
    49. theta_gradient = MultilayerPerceptron.gradient_step(data,labels,optimized_theta,layers)
    50. optimized_theta = optimized_theta - alpha* theta_gradient
    51. return optimized_theta,cost_history
    52. @staticmethod
    53. def gradient_step(data,labels,optimized_theta,layers):
    54. theta = MultilayerPerceptron.thetas_roll(optimized_theta,layers)
    55. thetas_rolled_gradients = MultilayerPerceptron.back_propagation(data,labels,theta,layers)
    56. thetas_unrolled_gradients = MultilayerPerceptron.thetas_unroll(thetas_rolled_gradients)
    57. return thetas_unrolled_gradients
    58. @staticmethod
    59. def back_propagation(data,labels,thetas,layers):
    60. num_layers = len(layers)
    61. (num_examples,num_features) = data.shape
    62. num_label_types = layers[-1]
    63. deltas = {}
    64. #初始化操作
    65. for layer_index in range(num_layers -1 ):
    66. in_count = layers[layer_index]
    67. out_count = layers[layer_index+1]
    68. deltas[layer_index] = np.zeros((out_count,in_count+1)) #25*785 10*26
    69. for example_index in range(num_examples):
    70. layers_inputs = {}
    71. layers_activations = {}
    72. layers_activation = data[example_index,:].reshape((num_features,1))#785*1
    73. layers_activations[0] = layers_activation
    74. #逐层计算
    75. for layer_index in range(num_layers - 1):
    76. layer_theta = thetas[layer_index] #得到当前权重参数值 25*785 10*26
    77. layer_input = np.dot(layer_theta,layers_activation) #第一次得到25*1 第二次10*1
    78. layers_activation = np.vstack((np.array([[1]]),sigmoid(layer_input)))
    79. layers_inputs[layer_index + 1] = layer_input #后一层计算结果
    80. layers_activations[layer_index + 1] = layers_activation #后一层经过激活函数后的结果
    81. output_layer_activation = layers_activation[1:,:]
    82. delta = {}
    83. #标签处理
    84. bitwise_label = np.zeros((num_label_types,1))
    85. bitwise_label[labels[example_index][0]] = 1
    86. #计算输出层和真实值之间的差异
    87. delta[num_layers - 1] = output_layer_activation - bitwise_label
    88. #遍历循环 L L-1 L-2 ...2
    89. for layer_index in range(num_layers - 2,0,-1):
    90. layer_theta = thetas[layer_index]
    91. next_delta = delta[layer_index+1]
    92. layer_input = layers_inputs[layer_index]
    93. layer_input = np.vstack((np.array((1)),layer_input))
    94. #按照公式进行计算
    95. delta[layer_index] = np.dot(layer_theta.T,next_delta)*sigmoid_gradient(layer_input)
    96. #过滤掉偏置参数
    97. delta[layer_index] = delta[layer_index][1:,:]
    98. for layer_index in range(num_layers-1):
    99. layer_delta = np.dot(delta[layer_index+1],layers_activations[layer_index].T)
    100. deltas[layer_index] = deltas[layer_index] + layer_delta #第一次25*785 第二次10*26
    101. for layer_index in range(num_layers -1):
    102. deltas[layer_index] = deltas[layer_index] * (1/num_examples)
    103. return deltas
    104. @staticmethod
    105. def cost_function(data,labels,thetas,layers):
    106. num_layers = len(layers)
    107. num_examples = data.shape[0]
    108. num_labels = layers[-1]
    109. #前向传播走一次
    110. predictions = MultilayerPerceptron.feedforward_propagation(data,thetas,layers)
    111. #制作标签,每一个样本的标签都得是one-hot
    112. bitwise_labels = np.zeros((num_examples,num_labels))
    113. for example_index in range(num_examples):
    114. bitwise_labels[example_index][labels[example_index][0]] = 1
    115. bit_set_cost = np.sum(np.log(predictions[bitwise_labels == 1]))
    116. bit_not_set_cost = np.sum(np.log(1-predictions[bitwise_labels == 0]))
    117. cost = (-1/num_examples) *(bit_set_cost+bit_not_set_cost)
    118. return cost
    119. @staticmethod
    120. def feedforward_propagation(data,thetas,layers):
    121. num_layers = len(layers)
    122. num_examples = data.shape[0]
    123. in_layer_activation = data
    124. # 逐层计算
    125. for layer_index in range(num_layers - 1):
    126. theta = thetas[layer_index]
    127. out_layer_activation = sigmoid(np.dot(in_layer_activation,theta.T))
    128. # 正常计算完之后是num_examples*25,但是要考虑偏置项 变成num_examples*26
    129. out_layer_activation = np.hstack((np.ones((num_examples,1)),out_layer_activation))
    130. in_layer_activation = out_layer_activation
    131. #返回输出层结果,结果中不要偏置项了
    132. return in_layer_activation[:,1:]
    133. @staticmethod
    134. def thetas_roll(unrolled_thetas,layers):
    135. num_layers = len(layers)
    136. thetas = {}
    137. unrolled_shift = 0
    138. for layer_index in range(num_layers - 1):
    139. in_count = layers[layer_index]
    140. out_count = layers[layer_index+1]
    141. thetas_width = in_count + 1
    142. thetas_height = out_count
    143. thetas_volume = thetas_width * thetas_height
    144. start_index = unrolled_shift
    145. end_index = unrolled_shift + thetas_volume
    146. layer_theta_unrolled = unrolled_thetas[start_index:end_index]
    147. thetas[layer_index] = layer_theta_unrolled.reshape((thetas_height,thetas_width))
    148. unrolled_shift = unrolled_shift+thetas_volume
    149. return thetas

    10.测试效果 

    使用mnist数据集进行测试

    数据集展示:

     

    效果展示: 

     

     

     

     

     

     

  • 相关阅读:
    【哈佛公开课】积极心理学笔记-06乐观主义(下)
    防火墙基本概念
    当 xxl-job 遇上 docker → 它晕了,我也乱了!
    【力扣SQL】几个常见SQL题
    Tensorboard入门使用及显示空白/乱码/没有数据的问题
    【译】使用 .NET Aspire 和 Visual Studio 开发云原生应用
    Karmada更高效地实现故障转移
    通过LabVIEW提升生产设备自动化水平
    ISAC通信感知一体化学习记录
    数据分析技能点-数据挖掘及入门
  • 原文地址:https://blog.csdn.net/qq_52053775/article/details/126094711