• GAN-Tutorial procedural record


    DCGAN

    This code defines a Deep Convolutional Generative Adversarial Network (DCGAN) for generating images of cars.

    1. import torch
    2. import torchvision
    3. import torch.nn as nn
    4. import torch.nn.functional as F
    5. from torch.utils.data import DataLoader
    6. from torch.utils.data.dataset import Dataset
    7. from torchvision import datasets
    8. from torchvision import transforms
    9. from torchvision.utils import save_image
    10. import numpy as np
    11. import datetime
    12. import os, sys
    13. import glob
    14. from PIL import Image
    15. from matplotlib.pyplot import imshow, imsave
    16. %matplotlib inline
    17. MODEL_NAME = 'DCGAN'
    18. DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    19. IMAGE_DIM = (32, 32, 3)
    20. def get_sample_image(G, n_noise):
    21. """
    22. save sample 100 images
    23. """
    24. z = torch.randn(10, n_noise).to(DEVICE)
    25. y_hat = G(z).view(10, 3, 28, 28).permute(0, 2, 3, 1) # (100, 28, 28)
    26. result = (y_hat.detach().cpu().numpy()+1)/2.
    27. class Discriminator(nn.Module):
    28. """
    29. Convolutional Discriminator for MNIST
    30. """
    31. def __init__(self, in_channel=1, num_classes=1):
    32. super(Discriminator, self).__init__()
    33. self.conv = nn.Sequential(
    34. # 28 -> 14
    35. nn.Conv2d(in_channel, 512, 3, stride=2, padding=1, bias=False),
    36. nn.BatchNorm2d(512),
    37. nn.LeakyReLU(0.2),
    38. # 14 -> 7
    39. nn.Conv2d(512, 256, 3, stride=2, padding=1, bias=False),
    40. nn.BatchNorm2d(256),
    41. nn.LeakyReLU(0.2),
    42. # 7 -> 4
    43. nn.Conv2d(256, 128, 3, stride=2, padding=1, bias=False),
    44. nn.BatchNorm2d(128),
    45. nn.LeakyReLU(0.2),
    46. #
    47. nn.Conv2d(128, 128, 3, stride=2, padding=1, bias=False),
    48. nn.BatchNorm2d(128),
    49. nn.LeakyReLU(0.2),
    50. nn.AdaptiveAvgPool2d(1),
    51. )
    52. self.fc = nn.Sequential(
    53. # reshape input, 128 -> 1
    54. nn.Linear(128, 1),
    55. nn.Sigmoid(),
    56. )
    57. def forward(self, x, y=None):
    58. y_ = self.conv(x)
    59. y_ = y_.view(y_.size(0), -1)
    60. y_ = self.fc(y_)
    61. return y_
    62. return result
    63. class Generator(nn.Module):
    64. """
    65. Convolutional Generator for MNIST
    66. """
    67. def __init__(self, out_channel=1, input_size=100, num_classes=784):
    68. super(Generator, self).__init__()
    69. assert IMAGE_DIM[0] % 2**4 == 0, 'Should be divided 16'
    70. self.init_dim = (IMAGE_DIM[0] // 2**4, IMAGE_DIM[1] // 2**4)
    71. self.fc = nn.Sequential(
    72. nn.Linear(input_size, self.init_dim[0]*self.init_dim[1]*512),
    73. nn.ReLU(),
    74. )
    75. self.conv = nn.Sequential(
    76. nn.Conv2d(512, 512, 3, padding=1, bias=False),
    77. nn.BatchNorm2d(512),
    78. nn.ReLU(),
    79. # x2
    80. nn.ConvTranspose2d(512, 256, 4, stride=2, padding=1, bias=False),
    81. nn.BatchNorm2d(256),
    82. nn.ReLU(),
    83. # x2
    84. nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1, bias=False),
    85. nn.BatchNorm2d(128),
    86. nn.ReLU(),
    87. # x2
    88. nn.ConvTranspose2d(128, 128, 4, stride=2, padding=1, bias=False),
    89. nn.BatchNorm2d(128),
    90. nn.ReLU(),
    91. # x2
    92. nn.ConvTranspose2d(128, out_channel, 4, stride=2, padding=1, bias=False),
    93. nn.Tanh(),
    94. )
    95. def forward(self, x, y=None):
    96. x = x.view(x.size(0), -1)
    97. y_ = self.fc(x)
    98. y_ = y_.view(y_.size(0), 512, self.init_dim[0], self.init_dim[1])
    99. y_ = self.conv(y_)
    100. return y_
    101. class CARS(Dataset):
    102. '''
    103. CARS Dataset
    104. You should download this dataset from below url.
    105. url: https://ai.stanford.edu/~jkrause/cars/car_dataset.html
    106. '''
    107. def __init__(self, data_path, transform=None):
    108. '''
    109. Args:
    110. data_path (str): path to dataset
    111. '''
    112. self.data_path = data_path
    113. self.transform = transform
    114. self.fpaths = sorted(glob.glob(os.path.join(data_path, '*.jpg')))
    115. gray_lst = [266, 1085, 2176, 3048, 3439, 3469, 3539, 4577, 4848, 5177, 5502, 5713, 6947, 7383, 7693, 7774, 8137, 8144]
    116. for num in gray_lst:
    117. self.fpaths.remove(os.path.join(data_path, '{:05d}.jpg'.format(num)))
    118. def __getitem__(self, idx):
    119. img = self.transform(Image.open(self.fpaths[idx]))
    120. return img
    121. def __len__(self):
    122. return len(self.fpaths)
    123. D = Discriminator(in_channel=IMAGE_DIM[-1]).to(DEVICE)
    124. G = Generator(out_channel=IMAGE_DIM[-1]).to(DEVICE)
    125. # D.load_state_dict('D_dc.pkl')
    126. # G.load_state_dict('G_dc.pkl')
    127. transform = transforms.Compose([transforms.Resize((IMAGE_DIM[0],IMAGE_DIM[1])),
    128. transforms.ToTensor(),
    129. transforms.Normalize(mean=(0.5, 0.5, 0.5),
    130. std=(0.5, 0.5, 0.5))
    131. ]
    132. )
    133. dataset = CARS(data_path='/home/yangyangii/git/cars_train', transform=transform)
    134. 逐行解释代码

    1. 这段代码定义了一个DCGAN的判别器模型和一个获取样本图像的函数。
    2. `MODEL_NAME`是DCGAN模型的名称,`DEVICE`是判断是否使用cuda的设备。`IMAGE_DIM`是图像的维度。
    3. `get_sample_image`函数用于保存100个样本图像。首先使用正态分布生成一个大小为`n_noise`的随机向量`z`,并将其发送到设备上。然后将随机向量通过生成器`G`得到生成的图像`y_hat`,并将其reshape为(10, 3, 28, 28)的形状,并按照维度顺序重新排列为(10, 28, 28, 3)。最后返回将生成的图像转换为numpy数组并进行归一化处理的结果。
    4. `Discriminator`类是一个用于MNIST数据集的卷积判别器模型。该模型包含几个卷积层和全连接层。卷积层部分使用了`nn.Conv2d`进行卷积操作,`nn.BatchNorm2d`进行批归一化操作,`nn.LeakyReLU`进行LeakyReLU激活操作,`nn.AdaptiveAvgPool2d`进行自适应平均池化操作。全连接层部分使用了`nn.Linear`进行线性变换操作,`nn.Sigmoid`进行Sigmoid激活操作。
    5. `forward`函数是判别器模型的前向传播方法。输入`x`经过卷积层和reshape操作后,经过全连接层得到输出`y_`。若有标签`y`,则在最后通过softmax函数将输出映射到[0,1]之间;若无标签,则直接返回输出结果。
    6. 最后返回结果。

     

  • 相关阅读:
    每天学习一个Linux命令之gzip
    在 Spring Boot 中使用 JDBI
    【Django-GAGA系统】添加用户时自动追加权限-20220801
    阿里架构师吐血整理:从源码到架构的Spring全系列笔记,已全部分享
    k8s笔记20--基于 K8S 的 cicd 概述
    GitLab EE 企业版破解
    Python入门学习15(面向对象)
    JVM性能调优
    day01-GUI坦克大战01
    system_error错误处理库学习
  • 原文地址:https://blog.csdn.net/qq_53826699/article/details/132639747