论文:《Deep Domain Confusion: Maximizing for Domain Invariance》
https://arxiv.org/abs/1412.3474
[DDC]Deep Domain Confusion: Maximizing for Domain Invariance_HHzdh的博客-CSDN博客
源码:https://github.com/guoXuehong77/transferlearning/tree/master/code/deep/DDC_DeepCoral
该源码主要是做了《Deep CORAL Correlation Alignment for Deep Domain Adaptation》的复现工作,但是只要把源码中的CORAL loss替换成MMD loss, 就可以复现DDC了。
该架构使用了一个自适应层(adaptation layer)和一个基于最大平均偏差(MMD,maximum mean discrepancy)的domain confusion loss来自动学习一个联合训练的表示来优化分类和域不变性。论文是在AlexNet的fc7层后面加入fc_adapt层,源码中实际上实现了多种backbone,包括alexnet、resnet18-152。
1、backbone.py
该文件定义了几种经典的CNN网络,包括alexnet、resnet18、resnet34、resnet50、resnet101、resnet152。初始化时加载预训练模型。
- import numpy as np
- import torch
- import torch.nn as nn
- import torchvision
- from torchvision import models
- from torch.autograd import Variable
-
-
- # convnet without the last layer
- class AlexNetFc(nn.Module):
- def __init__(self):
- super(AlexNetFc, self).__init__()
- model_alexnet = models.alexnet(pretrained=True)
- self.features = model_alexnet.features
- self.classifier = nn.Sequential()
- for i in range(6):
- self.classifier.add_module(
- "classifier"+str(i), model_alexnet.classifier[i])
- self.__in_features = model_alexnet.classifier[6].in_features
-
- def forward(self, x):
- x = self.features(x)
- x = x.view(x.size(0), 256*6*6)
- x = self.classifier(x)
- return x
-
- def output_num(self):
- return self.__in_features
-
-
- class ResNet18Fc(nn.Module):
- def __init__(self):
- super(ResNet18Fc, self).__init__()
- model_resnet18 = models.resnet18(pretrained=True)
- self.conv1 = model_resnet18.conv1
- self.bn1 = model_resnet18.bn1
- self.relu = model_resnet18.relu
- self.maxpool = model_resnet18.maxpool
- self.layer1 = model_resnet18.layer1
- self.layer2 = model_resnet18.layer2
- self.layer3 = model_resnet18.layer3
- self.layer4 = model_resnet18.layer4
- self.avgpool = model_resnet18.avgpool
- self.__in_features = model_resnet18.fc.in_features
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- x = self.maxpool(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x)
- x = x.view(x.size(0), -1)
- return x
-
- def output_num(self):
- return self.__in_features
-
-
- class ResNet34Fc(nn.Module):
- def __init__(self):
- super(ResNet34Fc, self).__init__()
- model_resnet34 = models.resnet34(pretrained=True)
- self.conv1 = model_resnet34.conv1
- self.bn1 = model_resnet34.bn1
- self.relu = model_resnet34.relu
- self.maxpool = model_resnet34.maxpool
- self.layer1 = model_resnet34.layer1
- self.layer2 = model_resnet34.layer2
- self.layer3 = model_resnet34.layer3
- self.layer4 = model_resnet34.layer4
- self.avgpool = model_resnet34.avgpool
- self.__in_features = model_resnet34.fc.in_features
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- x = self.maxpool(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x)
- x = x.view(x.size(0), -1)
- return x
-
- def output_num(self):
- return self.__in_features
-
-
- class ResNet50Fc(nn.Module):
- def __init__(self):
- super(ResNet50Fc, self).__init__()
- model_resnet50 = models.resnet50(pretrained=True)
- self.conv1 = model_resnet50.conv1
- self.bn1 = model_resnet50.bn1
- self.relu = model_resnet50.relu
- self.maxpool = model_resnet50.maxpool
- self.layer1 = model_resnet50.layer1
- self.layer2 = model_resnet50.layer2
- self.layer3 = model_resnet50.layer3
- self.layer4 = model_resnet50.layer4
- self.avgpool = model_resnet50.avgpool
- self.__in_features = model_resnet50.fc.in_features
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- x = self.maxpool(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x)
- x = x.view(x.size(0), -1)
- return x
-
- def output_num(self):
- return self.__in_features
-
-
- class ResNet101Fc(nn.Module):
- def __init__(self):
- super(ResNet101Fc, self).__init__()
- model_resnet101 = models.resnet101(pretrained=True)
- self.conv1 = model_resnet101.conv1
- self.bn1 = model_resnet101.bn1
- self.relu = model_resnet101.relu
- self.maxpool = model_resnet101.maxpool
- self.layer1 = model_resnet101.layer1
- self.layer2 = model_resnet101.layer2
- self.layer3 = model_resnet101.layer3
- self.layer4 = model_resnet101.layer4
- self.avgpool = model_resnet101.avgpool
- self.__in_features = model_resnet101.fc.in_features
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- x = self.maxpool(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x)
- x = x.view(x.size(0), -1)
- return x
-
- def output_num(self):
- return self.__in_features
-
-
- class ResNet152Fc(nn.Module):
- def __init__(self):
- super(ResNet152Fc, self).__init__()
- model_resnet152 = models.resnet152(pretrained=True)
- self.conv1 = model_resnet152.conv1
- self.bn1 = model_resnet152.bn1
- self.relu = model_resnet152.relu
- self.maxpool = model_resnet152.maxpool
- self.layer1 = model_resnet152.layer1
- self.layer2 = model_resnet152.layer2
- self.layer3 = model_resnet152.layer3
- self.layer4 = model_resnet152.layer4
- self.avgpool = model_resnet152.avgpool
- self.__in_features = model_resnet152.fc.in_features
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- x = self.maxpool(x)
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
- x = self.avgpool(x)
- x = x.view(x.size(0), -1)
- return x
-
- def output_num(self):
- return self.__in_features
-
-
- network_dict = {"alexnet": AlexNetFc,
- "resnet18": ResNet18Fc,
- "resnet34": ResNet34Fc,
- "resnet50": ResNet50Fc,
- "resnet101": ResNet101Fc,
- "resnet152": ResNet152Fc}
2、data_loader.py
数据加载部分,对于训练集数据Resize成[256,256]大小再进行[224,224]的随机裁剪,后续操作以此为随机水平翻转、转为tensor、标准化;而对于测试集数据则直接Resize为[224,224]大小,后进行tensor化和标准化。
- from torchvision import datasets, transforms
- import torch
-
- def load_data(data_folder, batch_size, train, kwargs):
- transform = {
- 'train': transforms.Compose(
- [transforms.Resize([256, 256]),
- transforms.RandomCrop(224),
- transforms.RandomHorizontalFlip(),
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406],
- std=[0.229, 0.224, 0.225])]),
- 'test': transforms.Compose(
- [transforms.Resize([224, 224]),
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406],
- std=[0.229, 0.224, 0.225])])
- }
- data = datasets.ImageFolder(root = data_folder, transform=transform['train' if train else 'test'])
- data_loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True, **kwargs, drop_last = True if train else False)
- return data_loader
-
3、mmd.py
即论文提出的MMD loss,MMD(最大均值差异)是迁移学习,尤其是Domain adaptation (域适应)中使用最广泛(目前)的一种损失函数,主要用来度量两个不同但相关的分布的距离。两个分布的距离定义为:可以参考以下这篇博客。【代码阅读】最大均值差异(Maximum Mean Discrepancy, MMD)损失函数代码解读(Pytroch版)_Vincent_gc的博客-CSDN博客_mmd损失
- import torch
- import torch.nn as nn
-
-
- class MMD_loss(nn.Module):
- def __init__(self, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
- '''
- source:源域数据(n * len(x))
- target:目标域数据(m * len(y))
- kernel_mul:核的倍数
- kernel_num:多少个核
- fix_sigma: 不同高斯核的sigma值
- '''
- super(MMD_loss, self).__init__()
- self.kernel_num = kernel_num
- self.kernel_mul = kernel_mul
- self.fix_sigma = None
- self.kernel_type = kernel_type
-
- def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
- n_samples = int(source.size()[0]) + int(target.size()[0]) # 求矩阵的行数,一般source和target的尺度是一样的,这样便于计算
- total = torch.cat([source, target], dim=0) # 将source,target按列方向合并
- # 将total复制(n+m)份
- total0 = total.unsqueeze(0).expand(
- int(total.size(0)), int(total.size(0)), int(total.size(1)))
- # 将total的每一行都复制成(n+m)行,即每个数据都扩展成(n+m)份
- total1 = total.unsqueeze(1).expand(
- int(total.size(0)), int(total.size(0)), int(total.size(1)))
- # 求任意两个数据之间的和,得到的矩阵中坐标(i,j)代表total中第i行数据和第j行数据之间的l2 distance(i==j时为0)
- L2_distance = ((total0-total1)**2).sum(2)
- # 调整高斯核函数的sigma值
- if fix_sigma:
- bandwidth = fix_sigma
- else:
- bandwidth = torch.sum(L2_distance.data) / (n_samples**2-n_samples)
- # 以fix_sigma为中值,以kernel_mul为倍数取kernel_num个bandwidth值(比如fix_sigma为1时,得到[0.25,0.5,1,2,4]
- bandwidth /= kernel_mul ** (kernel_num // 2)
- bandwidth_list = [bandwidth * (kernel_mul**i)
- for i in range(kernel_num)]
- # 高斯核函数的数学表达式
- kernel_val = [torch.exp(-L2_distance / bandwidth_temp)
- for bandwidth_temp in bandwidth_list]
- # 得到最终的核矩阵
- return sum(kernel_val)
-
- def linear_mmd2(self, f_of_X, f_of_Y):
- loss = 0.0
- delta = f_of_X.float().mean(0) - f_of_Y.float().mean(0)
- loss = delta.dot(delta.T)
- return loss
-
- def forward(self, source, target):
- if self.kernel_type == 'linear':
- return self.linear_mmd2(source, target)
- elif self.kernel_type == 'rbf':
- batch_size = int(source.size()[0]) # 一般默认为源域和目标域的batchsize相同
- kernels = self.guassian_kernel(
- source, target, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=self.fix_sigma)
- # 将核矩阵分成4部分
- with torch.no_grad():
- XX = torch.mean(kernels[:batch_size, :batch_size])
- YY = torch.mean(kernels[batch_size:, batch_size:])
- XY = torch.mean(kernels[:batch_size, batch_size:])
- YX = torch.mean(kernels[batch_size:, :batch_size])
- loss = torch.mean(XX + YY - XY - YX) # 因为一般都是n==m,所以L矩阵一般不加入计算
- torch.cuda.empty_cache()
- return loss
4、models.py
- import torch.nn as nn
- from Coral import CORAL
- import mmd
- import backbone
-
-
- class Transfer_Net(nn.Module):
- def __init__(self, num_class, base_net='resnet50', transfer_loss='mmd', use_bottleneck=True, bottleneck_width=256, width=1024):
- super(Transfer_Net, self).__init__()
- self.base_network = backbone.network_dict[base_net]()
- self.use_bottleneck = use_bottleneck
- self.transfer_loss = transfer_loss
- bottleneck_list = [nn.Linear(self.base_network.output_num(
- ), bottleneck_width), nn.BatchNorm1d(bottleneck_width), nn.ReLU(), nn.Dropout(0.5)]
- self.bottleneck_layer = nn.Sequential(*bottleneck_list)
- classifier_layer_list = [nn.Linear(self.base_network.output_num(), width), nn.ReLU(), nn.Dropout(0.5),
- nn.Linear(width, num_class)]
- self.classifier_layer = nn.Sequential(*classifier_layer_list)
-
- self.bottleneck_layer[0].weight.data.normal_(0, 0.005)
- self.bottleneck_layer[0].bias.data.fill_(0.1)
- for i in range(2):
- self.classifier_layer[i * 3].weight.data.normal_(0, 0.01)
- self.classifier_layer[i * 3].bias.data.fill_(0.0)
-
- def forward(self, source, target):
- source = self.base_network(source)
- target = self.base_network(target)
- source_clf = self.classifier_layer(source)
- if self.use_bottleneck:
- source = self.bottleneck_layer(source)
- target = self.bottleneck_layer(target)
- transfer_loss = self.adapt_loss(source, target, self.transfer_loss)
- return source_clf, transfer_loss
-
- def predict(self, x):
- features = self.base_network(x)
- clf = self.classifier_layer(features)
- return clf
-
- def adapt_loss(self, X, Y, adapt_loss):
- """Compute adaptation loss, currently we support mmd and coral
- Arguments:
- X {tensor} -- source matrix
- Y {tensor} -- target matrix
- adapt_loss {string} -- loss type, 'mmd' or 'coral'. You can add your own loss
- Returns:
- [tensor] -- adaptation loss tensor
- """
- if adapt_loss == 'mmd':
- mmd_loss = mmd.MMD_loss()
- loss = mmd_loss(X, Y)
- elif adapt_loss == 'coral':
- loss = CORAL(X, Y)
- else:
- loss = 0
- return loss
以AlexNet为例,model.py中定义的base_network即直接调用backbone.py中的alexnet,根据网络结构图,下图即conv1-5以及fc6-7的结构,因为这部分源域和目标域均使用且参数共享,故作为base_network。
而bottleneck_layer 即论文中提到的fc_adapt层,结构如下所示。
5、main.py
(1)参数设置
- # Command setting
- parser = argparse.ArgumentParser(description='DDC_DCORAL')
- parser.add_argument('--model', type=str, default='alexnet')
- parser.add_argument('--batchsize', type=int, default=2)
- parser.add_argument('--src', type=str, default='amazon')
- parser.add_argument('--tar', type=str, default='webcam')
- parser.add_argument('--n_class', type=int, default=31)
- parser.add_argument('--lr', type=float, default=1e-3)
- parser.add_argument('--n_epoch', type=int, default=100)
- parser.add_argument('--momentum', type=float, default=0.9)
- parser.add_argument('--decay', type=float, default=5e-4)
- parser.add_argument('--data', type=str, default='E:\Myself\Office31\Original_images')
- parser.add_argument('--early_stop', type=int, default=20)
- parser.add_argument('--lamb', type=float, default=10)
- parser.add_argument('--trans_loss', type=str, default='mmd')
- args = parser.parse_args()
(2)test函数
model.eval()的作用是不启用 Batch Normalization 和 Dropout。如果模型中有BN层(Batch Normalization)和Dropout。在测试时添加model.eval()。model.eval()是保证BN层能够用全部训练数据的均值和方差,即测试过程中要保证BN层的均值和方差不变。对于Dropout,model.eval()是利用到了所有网络连接,即不进行随机舍弃神经元。
- def test(model, target_test_loader):
- model.eval()
- test_loss = utils.AverageMeter()
- correct = 0
- criterion = torch.nn.CrossEntropyLoss()
- len_target_dataset = len(target_test_loader.dataset)
- with torch.no_grad():
- for data, target in target_test_loader:
- data, target = data.to(DEVICE), target.to(DEVICE)
- s_output = model.predict(data)
- loss = criterion(s_output, target)
- test_loss.update(loss.item())
- pred = torch.max(s_output, 1)[1]
- correct += torch.sum(pred == target)
- acc = 100. * correct / len_target_dataset
- return acc
(3)train函数
model.train()的作用是启用 Batch Normalization 和 Dropout。如果模型中有BN层(Batch Normalization)和Dropout,需要在训练时添加model.train()。model.train()是保证BN层能够用到每一批数据的均值和方差。对于Dropout,model.train()是随机取一部分网络连接来训练更新参数。
- def train(source_loader, target_train_loader, target_test_loader, model, optimizer):
- len_source_loader = len(source_loader)
- len_target_loader = len(target_train_loader)
- best_acc = 0
- stop = 0
- for e in range(args.n_epoch):
- stop += 1
- train_loss_clf = utils.AverageMeter()
- train_loss_transfer = utils.AverageMeter()
- train_loss_total = utils.AverageMeter()
- model.train()
- iter_source, iter_target = iter(source_loader), iter(target_train_loader)
- n_batch = min(len_source_loader, len_target_loader)
- criterion = torch.nn.CrossEntropyLoss()
- for _ in range(n_batch):
- data_source, label_source = iter_source.next()
- data_target, _ = iter_target.next()
- data_source, label_source = data_source.to(
- DEVICE), label_source.to(DEVICE)
- data_target = data_target.to(DEVICE)
-
- optimizer.zero_grad()
- label_source_pred, transfer_loss = model(data_source, data_target)
- clf_loss = criterion(label_source_pred, label_source)
- loss = clf_loss + args.lamb * transfer_loss
- loss.backward()
- optimizer.step()
- train_loss_clf.update(clf_loss.item())
- train_loss_transfer.update(transfer_loss.item())
- train_loss_total.update(loss.item())
- # Test
- acc = test(model, target_test_loader)
- log.append([train_loss_clf.avg, train_loss_transfer.avg, train_loss_total.avg])
- np_log = np.array(log, dtype=float)
- np.savetxt('train_log.csv', np_log, delimiter=',', fmt='%.6f')
- print('Epoch: [{:2d}/{}], cls_loss: {:.4f}, transfer_loss: {:.4f}, total_Loss: {:.4f}, acc: {:.4f}'.format(
- e, args.n_epoch, train_loss_clf.avg, train_loss_transfer.avg, train_loss_total.avg, acc))
- if best_acc < acc:
- best_acc = acc
- stop = 0
- if stop >= args.early_stop:
- break
- print('Transfer result: {:.4f}'.format(best_acc))
(4)数据加载和main
-
- # 方便自己电脑运行,num_workers更改为1,原代码为4
- def load_data(src, tar, root_dir):
- folder_src = os.path.join(root_dir, src)
- folder_tar = os.path.join(root_dir, tar)
- source_loader = data_loader.load_data(
- folder_src, args.batchsize, True, {'num_workers': 1})
- target_train_loader = data_loader.load_data(
- folder_tar, args.batchsize, True, {'num_workers': 1})
- target_test_loader = data_loader.load_data(
- folder_tar, args.batchsize, False, {'num_workers': 1})
- return source_loader, target_train_loader, target_test_loader
-
-
- if __name__ == '__main__':
- torch.manual_seed(0)
-
- source_name = "amazon"
- target_name = "webcam"
-
- print('Src: %s, Tar: %s' % (source_name, target_name))
- print('Backbone: %s,Loss: %s' % (args.model, args.trans_loss))
-
- source_loader, target_train_loader, target_test_loader = load_data(
- source_name, target_name, args.data)
-
- model = models.Transfer_Net(
- args.n_class, transfer_loss=args.trans_loss, base_net=args.model).to(DEVICE)
- print(model)
- optimizer = torch.optim.SGD([
- {'params': model.base_network.parameters()},
- {'params': model.bottleneck_layer.parameters(), 'lr': 10 * args.lr},
- {'params': model.classifier_layer.parameters(), 'lr': 10 * args.lr},
- ], lr=args.lr, momentum=args.momentum, weight_decay=args.decay)
-
- train(source_loader, target_train_loader,
- target_test_loader, model, optimizer)
debugging~虽然跑通了但是跟论文和github给出的结果相差有点大??