• 【阅读和学习代码】VoxelNet


    将点特征 转换为 voxel 特征

    https://github.com/skyhehe123/VoxelNet-pytorch/blob/master/data/kitti.py

    【Python】np.unique() 介绍与使用

    self.T : # maxiumum number of points per voxel

        def preprocess(self, lidar):
    
            # shuffling the points
            np.random.shuffle(lidar)
    
            voxel_coords = ((lidar[:, :3] - np.array([self.xrange[0], self.yrange[0], self.zrange[0]])) / (
                            self.vw, self.vh, self.vd)).astype(np.int32)
    
            # convert to  (D, H, W)
            voxel_coords = voxel_coords[:,[2,1,0]]
            voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, \
                                                      return_inverse=True, return_counts=True)
    
            voxel_features = []
    
            for i in range(len(voxel_coords)):
                voxel = np.zeros((self.T, 7), dtype=np.float32)
                pts = lidar[inv_ind == i] # 落到同一个voxel上的 点
                if voxel_counts[i] > self.T:
                    pts = pts[:self.T, :]
                    voxel_counts[i] = self.T
                # augment the points
                voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1)
                voxel_features.append(voxel)
            return np.array(voxel_features), voxel_coords
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25

    输入,输出解释
    在这里插入图片描述

    稀疏张量 到 稠密张量,反向索引

    https://github.com/skyhehe123/VoxelNet-pytorch/blob/master/voxelnet.py

    这里理解见这篇文章:【代码学习】voxel 或者 pillar,稀疏张量 转 稠密张量 的代码理解,理解了很久

    和 chatgpt一起学习的代码:

    import torch.nn as nn
    import torch.nn.functional as F
    import torch
    from torch.autograd import Variable
    from config import config as cfg
    
    # conv2d + bn + relu
    class Conv2d(nn.Module):
    
        def __init__(self,in_channels,out_channels,k,s,p, activation=True, batch_norm=True):
            super(Conv2d, self).__init__()
            self.conv = nn.Conv2d(in_channels,out_channels,kernel_size=k,stride=s,padding=p)
            if batch_norm:
                self.bn = nn.BatchNorm2d(out_channels)
            else:
                self.bn = None
            self.activation = activation
        def forward(self,x):
            x = self.conv(x)
            if self.bn is not None:
                x=self.bn(x)
            if self.activation:
                return F.relu(x,inplace=True)
            else:
                return x
    
    # conv3d + bn + relu
    class Conv3d(nn.Module):
    
        def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True):
            super(Conv3d, self).__init__()
            self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k, stride=s, padding=p)
            if batch_norm:
                self.bn = nn.BatchNorm3d(out_channels)
            else:
                self.bn = None
    
        def forward(self, x):
            x = self.conv(x)
            if self.bn is not None:
                x = self.bn(x)
    
            return F.relu(x, inplace=True)
    
    # Fully Connected Network
    class FCN(nn.Module):
    
        def __init__(self,cin,cout):
            super(FCN, self).__init__()
            self.cout = cout
            self.linear = nn.Linear(cin, cout)
            self.bn = nn.BatchNorm1d(cout)
    
        def forward(self,x):
            # KK is the stacked k across batch
            kk, t, _ = x.shape
            x = self.linear(x.view(kk*t,-1))
            x = F.relu(self.bn(x))
            return x.view(kk,t,-1)
    
    # Voxel Feature Encoding layer
    class VFE(nn.Module):
    
        def __init__(self,cin,cout):
            super(VFE, self).__init__()
            assert cout % 2 == 0
            self.units = cout // 2
            self.fcn = FCN(cin,self.units)
    
        def forward(self, x, mask): # x: [N, T, C] : # N:一个batch voxel 的数量,不固定
            # point-wise feauture
            pwf = self.fcn(x)
            #locally aggregated feature
            laf = torch.max(pwf,1)[0].unsqueeze(1).repeat(1,cfg.T,1) # laf:[N, T, cout // 2]
            # point-wise concat feature
            pwcf = torch.cat((pwf,laf),dim=2)
            # apply mask
            mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2) # mask作用: 一个voxel T=35 个点,不够T个点则用0填充,但在计算时 不考虑这些0
            pwcf = pwcf * mask.float()
    
            return pwcf # [N, T, Cout]
    
    # Stacked Voxel Feature Encoding
    class SVFE(nn.Module):
    
        def __init__(self):
            super(SVFE, self).__init__()
            self.vfe_1 = VFE(7,32)
            self.vfe_2 = VFE(32,128)
            self.fcn = FCN(128,128)
        def forward(self, x):
            mask = torch.ne(torch.max(x,2)[0], 0)
            x = self.vfe_1(x, mask)
            x = self.vfe_2(x, mask)
            x = self.fcn(x)
            # element-wise max pooling
            x = torch.max(x,1)[0]
            return x 
    
    # Convolutional Middle Layer
    class CML(nn.Module):
        def __init__(self):
            super(CML, self).__init__()
            self.conv3d_1 = Conv3d(128, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
            self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1))
            self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
    
        def forward(self, x): 
            x = self.conv3d_1(x)
            x = self.conv3d_2(x)
            x = self.conv3d_3(x)
            return x
    
    # # Region Proposal Network
    # class RPN(nn.Module):
    #     def __init__(self):
    #         super(RPN, self).__init__()
    #         self.block_1 = [Conv2d(128, 128, 3, 2, 1)]
    #         self.block_1 += [Conv2d(128, 128, 3, 1, 1) for _ in range(3)]
    #         self.block_1 = nn.Sequential(*self.block_1)
    
    #         self.block_2 = [Conv2d(128, 128, 3, 2, 1)]
    #         self.block_2 += [Conv2d(128, 128, 3, 1, 1) for _ in range(5)]
    #         self.block_2 = nn.Sequential(*self.block_2)
    
    #         self.block_3 = [Conv2d(128, 256, 3, 2, 1)]
    #         self.block_3 += [nn.Conv2d(256, 256, 3, 1, 1) for _ in range(5)]
    #         self.block_3 = nn.Sequential(*self.block_3)
    
    #         self.deconv_1 = nn.Sequential(nn.ConvTranspose2d(256, 256, 4, 4, 0),nn.BatchNorm2d(256))
    #         self.deconv_2 = nn.Sequential(nn.ConvTranspose2d(128, 256, 2, 2, 0),nn.BatchNorm2d(256))
    #         self.deconv_3 = nn.Sequential(nn.ConvTranspose2d(128, 256, 1, 1, 0),nn.BatchNorm2d(256))
    
    #         self.score_head = Conv2d(768, cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False)
    #         self.reg_head = Conv2d(768, 7 * cfg.anchors_per_position, 1, 1, 0, activation=False, batch_norm=False)
    
    #     def forward(self,x):
    #         x = self.block_1(x)
    #         x_skip_1 = x
    #         x = self.block_2(x)
    #         x_skip_2 = x
    #         x = self.block_3(x)
    #         x_0 = self.deconv_1(x)
    #         x_1 = self.deconv_2(x_skip_2)
    #         x_2 = self.deconv_3(x_skip_1)
    #         x = torch.cat((x_0,x_1,x_2),1)
    #         return self.score_head(x),self.reg_head(x)
    
    
    class VoxelNet(nn.Module):
    
        def __init__(self):
            super(VoxelNet, self).__init__()
            self.svfe = SVFE()
            self.cml = CML()
            # self.rpn = RPN()
    
        def voxel_indexing(self, sparse_features, coords): # sparse_features:[N, C]: # N: 一个batch voxel的数量,不固定
          
            dim = sparse_features.shape[-1]
    
            dense_feature = Variable(torch.zeros(dim, cfg.N, cfg.D, cfg.H, cfg.W).cuda()) # cfg.N = batch
            
            """
            这段代码的操作可以通过一个for循环来实现,但是需要注意,使用for循环的效率通常会比使用向量化操作低。下面是一个可能的实现:
            for i in range(len(coords)):
                dense_feature[:, coords[i,0], coords[i,1], coords[i,2], coords[i,3]] = sparse_features[i]
    
            这个for循环遍历coords的每一行(即每一个坐标),然后在dense_feature中找到对应的位置,将sparse_features中的对应元素赋给这个位置。这与原始代码的操作是一样的。
            但是,需要注意的是,这种方法的效率通常会比使用向量化操作低,特别是当处理大量数据时。在实际的代码中,我们通常会优先使用向量化操作,因为它们可以利用现代硬件的并行计算能力,从而大大提高计算效率
            
            
            
    这是一种常见的将稀疏张量转换为密集张量的方法。在稀疏张量中,只存储非零元素和它们的位置,而在密集张量中,所有元素都被存储。
    这段代码就是在将 sparse_features 中的元素放入 dense_feature 的对应位置,从而将稀疏表示转换为密集表示。
            """
    
            dense_feature[:, coords[:,0], coords[:,1], coords[:,2], coords[:,3]]= sparse_features
            # dense_feature:[C, B, D, H, W]
            return dense_feature.transpose(0, 1) # dense_feature:[B, C, D, H, W] # 这样就转换为稠密张量了 
    
        def forward(self, voxel_features, voxel_coords): # voxel_features:[N, T, C] # N:一个batch voxel的数量,每个voxel 35个点,每个点 C维
                                                        # voxel_coords:[N, 4] , [batch_id, x, y, z]
            # feature learning network
            vwfs = self.svfe(voxel_features)
            print(f"vwfs.shape = {vwfs.shape}") # [N, C]
            vwfs = self.voxel_indexing(vwfs,voxel_coords) # index 反向索引
            print(f"voxel_indexing ==> vwfs.shape = {vwfs.shape}") # 
            # convolutional middle network
            # cml_out = self.cml(vwfs)
    
            # region proposal network
    
            # merge the depth and feature dim into one, output probability score map and regression map
            # psm,rm = self.rpn(cml_out.view(cfg.N,-1,cfg.H, cfg.W))
    
            # return psm, rm
    
    
    if __name__ == '__main__':
        model = VoxelNet()
    
        voxel_features = torch.rand(100, 35, 7)
        voxel_coords = torch.randint(low=0, high=10, size=(100, 4))
    
        model(voxel_features, voxel_coords)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206

    参考博客

    VoxelNet End-to-End Learning for Point Cloud Based 3D Object Detection 论文学习

    VoxelNet:基于点云的端到端 3D 物体检测网络

  • 相关阅读:
    lunar-1.5.jar
    汇编语言实现for循环?怎么实现的,形象的比喻
    【智能优化算法】基于Jaya算法求解单目标优化问题附matlab代码MOJAYA
    小程序版九宫格抽奖
    Linux IPC通信
    Yakit工具篇:端口探测和指纹扫描的配置和使用
    Jenkins部署springboot项目
    跨境电商独立站海外引流渠道:Quora运营技巧
    golang切片和数组拷贝(浅拷贝和深拷贝)
    在Visual Studio Code中使用pytest进行AWS Lambda函数测试的最佳实践
  • 原文地址:https://blog.csdn.net/weixin_43154149/article/details/134061044