不知道CV方向的同学在读论文的时候有没有发现这样一个问题:论文的核心思想很简单,但当你找这篇论文的核心代码时发现,作者提供的源码模块会嵌入到分类、检测、分割等任务框架中,这时候如果你对某一特定框架不熟悉,尽管核心代码只有十几行,依然会发现很难找出。
今天我就帮大家解决一部分这个问题,还记得上次分享的attention论文合集吗?没印象的同学点这里。
这次总结了这30篇attention论文中的核心代码分享,还有一部分其他系列的论文,比如ReP、卷积级数等,核心代码与原文都整理了。
由于篇幅和时间原因,暂时只分享了一部分,需要全部论文以及完整核心代码的同学看文末
- from model.attention.Axial_attention import AxialImageTransformer
- import torch
-
- if __name__ == '__main__':
- input=torch.randn(3, 128, 7, 7)
- model = AxialImageTransformer(
- dim = 128,
- depth = 12,
- reversible = True
- )
- outputs = model(input)
- print(outputs.shape)
- from model.attention.CrissCrossAttention import CrissCrossAttention
- import torch
-
- if __name__ == '__main__':
- input=torch.randn(3, 64, 7, 7)
- model = CrissCrossAttention(64)
- outputs = model(input)
- print(outputs.shape)
- from model.attention.MOATransformer import MOATransformer
- import torch
-
- if __name__ == '__main__':
- input=torch.randn(1,3,224,224)
- model = MOATransformer(
- img_size=224,
- patch_size=4,
- in_chans=3,
- num_classes=1000,
- embed_dim=96,
- depths=[2, 2, 6],
- num_heads=[3, 6, 12],
- window_size=14,
- mlp_ratio=4.,
- qkv_bias=True,
- qk_scale=None,
- drop_rate=0.0,
- drop_path_rate=0.1,
- ape=False,
- patch_norm=True,
- use_checkpoint=False
- )
- output=model(input)
- print(output.shape)
- from model.attention.Crossformer import CrossFormer
- import torch
-
- if __name__ == '__main__':
- input=torch.randn(1,3,224,224)
- model = CrossFormer(img_size=224,
- patch_size=[4, 8, 16, 32],
- in_chans= 3,
- num_classes=1000,
- embed_dim=48,
- depths=[2, 2, 6, 2],
- num_heads=[3, 6, 12, 24],
- group_size=[7, 7, 7, 7],
- mlp_ratio=4.,
- qkv_bias=True,
- qk_scale=None,
- drop_rate=0.0,
- drop_path_rate=0.1,
- ape=False,
- patch_norm=True,
- use_checkpoint=False,
- merge_size=[[2, 4], [2,4], [2, 4]]
- )
- output=model(input)
- print(output.shape)
- from model.attention.DAT import DAT
- import torch
-
- if __name__ == '__main__':
- input=torch.randn(1,3,224,224)
- model = DAT(
- img_size=224,
- patch_size=4,
- num_classes=1000,
- expansion=4,
- dim_stem=96,
- dims=[96, 192, 384, 768],
- depths=[2, 2, 6, 2],
- stage_spec=[['L', 'S'], ['L', 'S'], ['L', 'D', 'L', 'D', 'L', 'D'], ['L', 'D']],
- heads=[3, 6, 12, 24],
- window_sizes=[7, 7, 7, 7] ,
- groups=[-1, -1, 3, 6],
- use_pes=[False, False, True, True],
- dwc_pes=[False, False, False, False],
- strides=[-1, -1, 1, 1],
- sr_ratios=[-1, -1, -1, -1],
- offset_range_factor=[-1, -1, 2, 2],
- no_offs=[False, False, False, False],
- fixed_pes=[False, False, False, False],
- use_dwc_mlps=[False, False, False, False],
- use_conv_patches=False,
- drop_rate=0.0,
- attn_drop_rate=0.0,
- drop_path_rate=0.2,
- )
- output=model(input)
- print(output[0].shape)
- from model.attention.MobileViTv2Attention import MobileViTv2Attention
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- if __name__ == '__main__':
- input=torch.randn(50,49,512)
- sa = MobileViTv2Attention(d_model=512)
- output=sa(input)
- print(output.shape)
- from model.attention.ACmix import ACmix
- import torch
-
- if __name__ == '__main__':
- input=torch.randn(50,256,7,7)
- acmix = ACmix(in_planes=256, out_planes=256)
- output=acmix(input)
- print(output.shape)
- from model.attention.ParNetAttention import *
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- if __name__ == '__main__':
- input=torch.randn(50,512,7,7)
- pna = ParNetAttention(channel=512)
- output=pna(input)
- print(output.shape) #50,512,7,7
- from model.attention.UFOAttention import *
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- if __name__ == '__main__':
- input=torch.randn(50,49,512)
- ufo = UFOAttention(d_model=512, d_k=512, d_v=512, h=8)
- output=ufo(input,input,input)
- print(output.shape) #[50, 49, 512]
- from model.attention.CoordAttention import CoordAtt
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- inp=torch.rand([2, 96, 56, 56])
- inp_dim, oup_dim = 96, 96
- reduction=32
-
- coord_attention = CoordAtt(inp_dim, oup_dim, reduction=reduction)
- output=coord_attention(inp)
- print(output.shape)
- from model.rep.repvgg import RepBlock
- import torch
-
-
- input=torch.randn(50,512,49,49)
- repblock=RepBlock(512,512)
- repblock.eval()
- out=repblock(input)
- repblock._switch_to_deploy()
- out2=repblock(input)
- print('difference between vgg and repvgg')
- print(((out2-out)**2).sum())
- from model.rep.acnet import ACNet
- import torch
- from torch import nn
-
- input=torch.randn(50,512,49,49)
- acnet=ACNet(512,512)
- acnet.eval()
- out=acnet(input)
- acnet._switch_to_deploy()
- out2=acnet(input)
- print('difference:')
- print(((out2-out)**2).sum())
- from model.conv.CondConv import *
- import torch
- from torch import nn
- from torch.nn import functional as F
-
-
-
-
-
- if __name__ == '__main__':
- input=torch.randn(2,32,64,64)
- m=CondConv(in_planes=32,out_planes=64,kernel_size=3,stride=1,padding=1,bias=False)
- out=m(input)
- print(out.shape)
- from model.conv.DynamicConv import *
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- if __name__ == '__main__':
- input=torch.randn(2,32,64,64)
- m=DynamicConv(in_planes=32,out_planes=64,kernel_size=3,stride=1,padding=1,bias=False)
- out=m(input)
- print(out.shape) # 2,32,64,64
- from model.conv.Involution import Involution
- import torch
- from torch import nn
- from torch.nn import functional as F
-
- input=torch.randn(1,4,64,64)
- involution=Involution(kernel_size=3,in_channel=4,stride=2)
- out=involution(input)
- print(out.shape)
关注下方《学姐带你玩AI》🚀🚀🚀
回复“核心代码”获取全部论文+代码合集
码字不易,欢迎大家点赞评论收藏!