flyfish
展示计算的方向
在二维的情况 下,BatchNorm是按列算,LayerNorm按行算
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
class CustomLayerNorm:
def __init__(self, eps=1e-5):
self.eps = eps
def __call__(self, x):
mean = np.mean(x, axis=-1, keepdims=True)
std = np.std(x, axis=-1, keepdims=True)
normalized = (x - mean) / (std + self.eps)
return normalized
class CustomBatchNorm:
def __init__(self, eps=1e-5):
self.eps = eps
def __call__(self, x):
mean = np.mean(x, axis=0)
std = np.std(x, axis=0)
normalized = (x - mean) / (std + self.eps)
return normalized
# Original Data
data = np.array([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0]])
# Apply Custom LayerNorm
custom_layer_norm = CustomLayerNorm()
custom_layer_norm_data = custom_layer_norm(data)
# Apply Custom BatchNorm
custom_batch_norm = CustomBatchNorm()
custom_batch_norm_data = custom_batch_norm(data)
# Apply PyTorch LayerNorm
data_tensor = torch.tensor(data, dtype=torch.float32)
layer_norm = nn.LayerNorm(data_tensor.size()[1:])
pytorch_layer_norm_data = layer_norm(data_tensor).detach().numpy()
# Compare Custom and PyTorch LayerNorm
print("Original Data:\n", data)
print("Custom LayerNorm Data:\n", custom_layer_norm_data)
print("PyTorch LayerNorm Data:\n", pytorch_layer_norm_data)
Original Data:
[[1. 2. 3.]
[4. 5. 6.]
[7. 8. 9.]]
Custom LayerNorm Data:
[[-1.22472987 0. 1.22472987]
[-1.22472987 0. 1.22472987]
[-1.22472987 0. 1.22472987]]
PyTorch LayerNorm Data:
[[-1.2247356 0. 1.2247356]
[-1.2247356 0. 1.2247356]
[-1.2247356 0. 1.2247356]]
具体步骤如下:
标准化公式: n o r m a l i z e d = ( x − m e a n ) / ( s t d + e p s ) normalized = (x - mean) / (std + eps) normalized=(x−mean)/(std+eps)
第1行:
[(1-2)/(0.8165+1e-5), (2-2)/(0.8165+1e-5), (3-2)/(0.8165+1e-5)]
= [-1.2247, 0, 1.2247]
第2行:
[(4-5)/(0.8165+1e-5), (5-5)/(0.8165+1e-5), (6-5)/(0.8165+1e-5)]
= [-1.2247, 0, 1.2247]
第3行:
[(7-8)/(0.8165+1e-5), (8-8)/(0.8165+1e-5), (9-8)/(0.8165+1e-5)]
= [-1.2247, 0, 1.2247]
最终标准化结果矩阵为:
[[-1.2247, 0, 1.2247]
[-1.2247, 0, 1.2247]
[-1.2247, 0, 1.2247]]
Meta Llama 3 使用了RMSNorm
假设我们有以下 2D 输入张量
X
X
X(为了简单起见,我们假设这个张量有 2 行 3 列):
[
1
2
3
4
5
6
]
[123456]
[142536]
RMSNorm 的计算过程如下:
以下是使用 PyTorch 实现上述步骤的代码示例:
import torch
import torch.nn as nn
class RMSNorm(nn.Module):
def __init__(self, dim: int, eps: float = 1e-6):
super().__init__()
self.eps = eps
self.weight = nn.Parameter(torch.ones(dim))
def _norm(self, x):
return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
def forward(self, x):
output = self._norm(x.float()).type_as(x)
return output * self.weight
# 示例数据
data = torch.tensor([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]])
# 实例化 RMSNorm 层
rms_norm = RMSNorm(dim=data.size(-1))
# 计算归一化后的输出
normalized_data = rms_norm(data)
print("Original Data:\n", data)
print("RMSNorm Normalized Data:\n", normalized_data)
运行上述代码后,我们将得到归一化后的数据:
tensor([[1., 2., 3.],
[4., 5., 6.]])
RMSNorm Normalized Data:
tensor([[0.4629, 0.9258, 1.3887],
[0.7895, 0.9869, 1.1843]], grad_fn=)