核心:提出了一个新的框架通过对抗过程估计生成模型.我们同时训练了两个模型:一个生成模型G(用来捕获数据分布),一个判别模型D(用来估计采样是来自训练数据而不是生成器的概率),G的训练过程是最大化D 犯错的概率,该框架对应一个最大最小化的两人游戏。在任意函数G和D的两人空间中,存在唯一的解,当生成器G 恢复训练数据分布D 处处等于1/2。
注意:D 的值是一个概率 即采样是来自训练数据 而不是生成器的概率
(1)对于判别器D :1最大化把真实图片输入到判别器时候把真实图片判断为真的概率
2 最小化 把G 生成的假图 输入到判别器中时把假图判别为真的概率 即(最大化log(1-D(G(z))
(2)对于生成器G 目标是混淆判别器 让判别器把生成器生成的假图判别为真
即优化目标函数:最大化D(G(z) 即最小化min (log(1-D(G(z))
代码实现:
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- # File : test_gan.py
- # Author : none
- # Date : 14.04.2022
- # Last Modified Date: 15.04.2022
- # Last Modified By : none
- """ 基于MNIST 实现对抗生成网络 (GAN) """
-
- import torch
- import torchvision
- import torch.nn as nn
- import numpy as np
-
- image_size = [1, 28, 28]
- latent_dim = 96
- batch_size = 64
- use_gpu = torch.cuda.is_available()
-
- class Generator(nn.Module):
-
- def __init__(self):
- super(Generator, self).__init__()
-
- self.model = nn.Sequential(
- nn.Linear(latent_dim, 128),
- torch.nn.BatchNorm1d(128),
- torch.nn.GELU(),
-
- nn.Linear(128, 256),
- torch.nn.BatchNorm1d(256),
- torch.nn.GELU(),
- nn.Linear(256, 512),
- torch.nn.BatchNorm1d(512),
- torch.nn.GELU(),
- nn.Linear(512, 1024),
- torch.nn.BatchNorm1d(1024),
- torch.nn.GELU(),
- nn.Linear(1024, np.prod(image_size, dtype=np.int32)),
- # nn.Tanh(),
- nn.Sigmoid(),
- )
-
- def forward(self, z):
- # shape of z: [batchsize, latent_dim]
-
- output = self.model(z)
- image = output.reshape(z.shape[0], *image_size)
-
- return image
-
-
- class Discriminator(nn.Module):
-
- def __init__(self):
- super(Discriminator, self).__init__()
-
- self.model = nn.Sequential(
- nn.Linear(np.prod(image_size, dtype=np.int32), 512),
- torch.nn.GELU(),
- nn.Linear(512, 256),
- torch.nn.GELU(),
- nn.Linear(256, 128),
- torch.nn.GELU(),
- nn.Linear(128, 64),
- torch.nn.GELU(),
- nn.Linear(64, 32),
- torch.nn.GELU(),
- nn.Linear(32, 1),
- nn.Sigmoid(),
- )
-
- def forward(self, image):
- # shape of image: [batchsize, 1, 28, 28]
-
- prob = self.model(image.reshape(image.shape[0], -1))
-
- return prob
-
- # Training
- dataset = torchvision.datasets.MNIST(r"D:\1APythonSpace\Use_model\gan\data\mnist", train=True, download=True,
- transform=torchvision.transforms.Compose(
- [
- torchvision.transforms.Resize(28),
- torchvision.transforms.ToTensor(),
- # torchvision.transforms.Normalize([0.5], [0.5]),
- ]
- )
- )
- dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
-
- generator = Generator()
- discriminator = Discriminator()
-
-
- g_optimizer = torch.optim.Adam(generator.parameters(), lr=0.0003, betas=(0.4, 0.8), weight_decay=0.0001)
- d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.0003, betas=(0.4, 0.8), weight_decay=0.0001)
-
- loss_fn = nn.BCELoss()
- labels_one = torch.ones(batch_size, 1)
- labels_zero = torch.zeros(batch_size, 1)
-
- if use_gpu:
- print("use gpu for training")
- generator = generator.cuda()
- discriminator = discriminator.cuda()
- loss_fn = loss_fn.cuda()
- labels_one = labels_one.to("cuda")
- labels_zero = labels_zero.to("cuda")
-
- num_epoch = 200
- for epoch in range(num_epoch):
- for i, mini_batch in enumerate(dataloader):
- gt_images, _ = mini_batch
-
-
- z = torch.randn(batch_size, latent_dim)
-
- if use_gpu:
- gt_images = gt_images.to("cuda")
- z = z.to("cuda")
-
- pred_images = generator(z)
- g_optimizer.zero_grad()
-
- recons_loss = torch.abs(pred_images-gt_images).mean()
-
- g_loss = recons_loss*0.05 + loss_fn(discriminator(pred_images), labels_one)
-
- g_loss.backward()
- g_optimizer.step()
-
- d_optimizer.zero_grad()
-
- real_loss = loss_fn(discriminator(gt_images), labels_one)
- fake_loss = loss_fn(discriminator(pred_images.detach()), labels_zero)
- d_loss = (real_loss + fake_loss)
-
- # 观察real_loss与fake_loss,同时下降同时达到最小值,并且差不多大,说明D已经稳定了
-
- d_loss.backward()
- d_optimizer.step()
-
- if i % 50 == 0:
- print(f"step:{len(dataloader)*epoch+i}, recons_loss:{recons_loss.item()}, g_loss:{g_loss.item()}, d_loss:{d_loss.item()}, real_loss:{real_loss.item()}, fake_loss:{fake_loss.item()}")
-
- if i % 400 == 0:
- image = pred_images[:16].data
- torchvision.utils.save_image(image, f"image_{len(dataloader)*epoch+i}.png", nrow=4)
-