参考论文1:《Convolutional Neural Networks for Sentence Classification 》用于句子分类的卷积神经网络 2014
参考论文:《A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural Networks for Sentence Classification 》用于句子分类的卷积神经网络(和从业者指南)的敏感性分析 2016
配套文章 https://wmathor.com/index.php/archives/1445/ 写的非常详细
import torch
import numpy as np
import torch.optim as optim
import torch.utils.data as Data
import torch.nn.functional as F
dtype = torch.FloatTensor
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.
# TextCNN 参数
embedding_size=2 # 一个词使用二维向量表示
sequence_length=len(sentences[0].split ()) # 3 序列长度 这里默认全部一样长,都是三个单词
num_classes=len(set(labels)) # 2
word_list=" ".join(sentences).split() # 包含了sentences中所有的词 有重复的词
vocab=list(set(word_list)) # 词典 包含了sentences中所有的词
word2idx = {w:i for i,w in enumerate(vocab)} # 词:索引
vocab_size = len(vocab)
def make_data(sentences,labels):
for sentence in sentences:
inputs.append([word2idx[n] for n in sentence.split()]) # 把句子变成对应的索引序列
for out in labels:
return inputs,targets
tensor([[ 4, 11, 14],
[ 5, 9, 10],
[15, 0, 12],
[ 4, 6, 14],
[ 1, 2, 13],
[ 7, 3, 8]])
tensor([1, 1, 1, 0, 0, 0])
from torch import nn
class TextCNN(nn.Module):
def __init__(self):
super(TextCNN, self).__init__()
self.conv = nn.Sequential(
# conv : [input_channel(=1), output_channel, kernel_size=(filter_height, filter_width=embedding_size), stride=1]
# 因为out_channels=3,所以会有3个卷积核与输入数据做卷积
# 这里filter_height=2 只有一个卷积核
# 输出[batch_size, out_channels=3, 2, 1] out_channels行batch_size列给元素,每个元素是2*1的
# pool : ((filter_height, filter_width))
,nn.MaxPool2d(kernel_size=(2,1)) # 把2*1的元素最大池化输出1*1
# fc
self.fc = nn.Linear(in_features=out_channels,out_features=num_classes) # 输出二分类
def forward(self, x):
X: [batch_size, sequence_length]
batch_size=x.shape[0] # 多少个句子
# 变成一个立方体 比如[[ 4, 11, 14],...] 4表示一个单词,然后用词向量[1,2]来表示该词,即[[ [1,2], 11, 14],...]
embedding_x = self.W(x) # [batch_size, sequence_length, embedding_size]
# 在第二个维度的位置增加值为1的维度 即通道数,单通道,类似于图片的黑白图像
# 即有一行batch_size列元素,每个元素都是sequence_length行embedding_size列
# 现在的数据才能做卷积,因为在传统 CNN 中,输入数据就应该是 [batch_size, in_channel, height, width] 这种维度
embedding_x = embedding_x.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]
conved=self.conv(embedding_x) # [batch_size, output_channel,1,1]
flatten=conved.view(batch_size,-1) # [batch_size, output_channel*1*1]
output = self.fc(flatten)
return output
optimizer=optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(5000):
for batch_x, batch_y in loader:
loss = criterion(pred, batch_y)
if (epoch +1) %1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(loss))
Epoch: 1000 loss = 0.030200
Epoch: 1000 loss = 0.054546
Epoch: 2000 loss = 0.014919
Epoch: 2000 loss = 0.007824
Epoch: 3000 loss = 0.002666
Epoch: 3000 loss = 0.005158
Epoch: 4000 loss = 0.001931
Epoch: 4000 loss = 0.000988
Epoch: 5000 loss = 0.000379
Epoch: 5000 loss = 0.000743
# Test
test_text = 'i hate me'
tests = [[word2idx[n] for n in test_text.split()]]
test_batch = torch.LongTensor(tests).to(device)
# Predict
model = model.eval()
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
print(test_text,"is Bad Mean...")
print(test_text,"is Good Mean!!")
i hate me is Bad Mean...
# %%
# code by Tae Hwan Jung @graykode
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class TextCNN(nn.Module):
def __init__(self):
super(TextCNN, self).__init__()
self.num_filters_total = num_filters * len(filter_sizes)
self.W = nn.Embedding(vocab_size, embedding_size)
self.Weight = nn.Linear(self.num_filters_total, num_classes, bias=False)
self.Bias = nn.Parameter(torch.ones([num_classes]))
self.filter_list = nn.ModuleList([
nn.Conv2d(1, num_filters, kernel_size=(size, embedding_size))
for size in filter_sizes])
def forward(self, X):
embedded_chars = self.W(X) # [batch_size, sequence_length, sequence_length]
# 加入通道数1
embedded_chars = embedded_chars.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]
pooled_outputs = []
for i, conv in enumerate(self.filter_list):
# conv : [input_channel(=1), output_channel(=3), (filter_height, filter_width), bias_option]
h = F.relu(conv(embedded_chars)) # h:[batch_size(=6), output_channel(=3),output_height(=2), output_width(=1)]
# mp : ((filter_height, filter_width))
mp = nn.MaxPool2d((sequence_length - filter_sizes[i] + 1, 1)) # [2,1]
# mp(h):[batch_size(=6),output_channel(=3), output_height(=1), output_width(=1)]
# pooled : [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3)]
pooled = mp(h).permute(0, 3, 2, 1)
h_pool = torch.cat(pooled_outputs, len(filter_sizes)) # [batch_size(=6), output_height(=1), output_width(=1), output_channel(=3) * 3]
h_pool_flat = torch.reshape(h_pool, [-1, self.num_filters_total]) # [batch_size(=6), output_height * output_width * (output_channel * 3)]
model = self.Weight(h_pool_flat) + self.Bias # [batch_size, num_classes]
return model
if __name__ == '__main__':
embedding_size = 2 # embedding size
sequence_length = 3 # sequence length
num_classes = 2 # number of classes
# 论文中是2,3,4
filter_sizes = [2, 2, 2] # n-gram windows # 卷积核大小 [filter_size,embedding_size]
num_filters = 3 # number of filters # 3个卷积核会把输入数据变成三通道的数据
# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0] # 1 is good, 0 is not good.
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
vocab_size = len(word_dict)
model = TextCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
inputs = torch.LongTensor([np.asarray([word_dict[n] for n in sen.split()]) for sen in sentences])
targets = torch.LongTensor([out for out in labels]) # To using Torch Softmax Loss function
# Training
for epoch in range(5000):
output = model(inputs)
# output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot)
loss = criterion(output, targets)
if (epoch + 1) % 1000 == 0:
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
# Test
test_text = 'sorry hate you'
tests = [np.asarray([word_dict[n] for n in test_text.split()])]
test_batch = torch.LongTensor(tests)
# Predict
predict = model(test_batch).data.max(1, keepdim=True)[1]
if predict[0][0] == 0:
print(test_text,"is Bad Mean...")
print(test_text,"is Good Mean!!")
【参考:手写AI出品: TextCNN文本分类,逐行代码复现! 可加UP免费答疑!_哔哩哔哩_bilibili】
