深度学习论文: Pyramidal Convolution: Rethinking CNN for Visual Recognition及其PyTorch实现

网友投稿 956 2022-08-31

深度学习论文: Pyramidal Convolution: Rethinking CNN for Visual Recognition及其PyTorch实现

深度学习论文: Pyramidal Convolution: Rethinking CNN for Visual Recognition及其PyTorch实现

深度学习论文: Pyramidal Convolution: Rethinking Convolutional Neural Networks for Visual Recognition及其PyTorch实现 Pyramidal Convolution: Rethinking Convolutional Neural Networks for Visual Recognition PDF:​​​​​概述

该文提出一种金字塔卷积(Pyramidal Convolution, PyConv),它可以在多个滤波器尺度对输入进行处理。PyConv包含一个核金字塔,每一层包含不同类型的滤波器(滤波器的大小与深度可变,因此可以提取不同尺度的细节信息)。除了上述提到的可以提取多尺度信息外,相比标准卷积,PyConv实现高效,即不会提升额外的计算量与参数量。更进一步,它更为灵活并具有可扩展性,为不同的应用提升了更大的架构设计空间。

2 Pyramidal Convolution

def ConvBNReLU(in_channels,out_channels,kernel_size,stride,groups=1): return nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,padding=kernel_size//2,groups=groups), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) )def Conv1x1BNReLU(in_channels,out_channels,groups=1): return nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), nn.BatchNorm2d(out_channels), nn.ReLU6(inplace=True) )class PyConv(nn.Module): def __init__(self, in_channels, out_channels, kernel_sizes, groups, stride=1): super(PyConv, self).__init__() if out_channels is None: out_channels = [] assert len(out_channels) == len(kernel_sizes) == len(groups) self.pyconv_list = nn.ModuleList() for i in range(len(kernel_sizes)): self.pyconv_list.append(ConvBNReLU(in_channels=in_channels,out_channels=out_channels[i],kernel_size=kernel_sizes[i],stride=stride,groups=groups[i])) def forward(self, x): outputs = [] for pyconv in self.pyconv_list: outputs.append(pyconv(x)) return torch.cat(outputs, 1)

3 应用

3-1 PyConv Networks for Image Classification

将PyConv嵌入到Bottleneck中,提出两种PyConvResNet与PyConvHGResNet结构

3-2 PyConv Network on Semantic Segmentation

PyConvPH。PyConvPH包含三个主要成分:

class LocalPyConv(nn.Module): def __init__(self, planes): super(LocalPyConv, self).__init__() inplanes = planes//4 self._reduce = Conv1x1BNReLU(planes, 512) self._pyConv = PyConv(in_channels=512, out_channels=[inplanes, inplanes, inplanes, inplanes], kernel_sizes=[3, 5, 7, 9], groups=[1, 4, 8, 16]) self._combine = Conv1x1BNReLU(512, planes) def forward(self, x): return self._combine(self._pyConv(self._reduce(x)))class GlobalPyConv(nn.Module): def __init__(self, planes): super(GlobalPyConv, self).__init__() inplanes = planes // 4 self.global_pool = nn.AdaptiveAvgPool2d(output_size=9) self._reduce = Conv1x1BNReLU(planes, 512) self._pyConv = PyConv(in_channels=512, out_channels=[inplanes, inplanes, inplanes, inplanes], kernel_sizes=[3, 5, 7, 9], groups=[1, 4, 8, 16]) self._fuse = Conv1x1BNReLU(512, 512) def forward(self, x): b,c,w,h = x.shape x = self._fuse(self._pyConv(self._reduce(self.global_pool(x)))) out = F.interpolate(x,(w,h),align_corners=True,mode='bilinear') return outclass MergePyConv(nn.Module): def __init__(self, img_size,in_channels, num_classes): super(MergePyConv, self).__init__() self.img_size = img_size self.conv3 = ConvBNReLU(in_channels=in_channels,out_channels=256,kernel_size=3,stride=1) self.conv1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1, stride=1,groups=1) def forward(self, x): x = self.conv3(x) x = F.interpolate(x, self.img_size, align_corners=True,mode='bilinear') out = self.conv1(x) return outclass PyConvParsingHead(nn.Module): def __init__(self, img_size=(473,473), planes=512,num_classes=150): super(PyConvParsingHead, self).__init__() self.globalPyConv = GlobalPyConv(planes=planes) self.localPyConv = LocalPyConv(planes=planes) self.mergePyConv = MergePyConv(img_size,1024, num_classes) def forward(self, x): g_x = self.globalPyConv(x) l_x = self.localPyConv(x) x = torch.cat([g_x,l_x],dim=1) out = self.mergePyConv(x) return

3-3 PyConv Network on Object Detection

class PyConv4(nn.Module): def __init__(self, inplaces, places, kernel_sizes=[3, 5, 7, 9], groups=[1, 4, 8, 16], stride=2): super(PyConv4, self).__init__() assert len(kernel_sizes) == len(groups) out_channels = [places//4, places//4, places//4, places//4] self.pyconv_list = nn.ModuleList() for i in range(len(kernel_sizes)): self.pyconv_list.append(ConvBNReLU(in_channels=inplaces,out_channels=out_channels[i],kernel_size=kernel_sizes[i],stride=stride,groups=groups[i])) def forward(self, x): outputs = [] for pyconv in self.pyconv_list: outputs.append(pyconv(x)) return torch.cat(outputs, 1)class PyConv3(nn.Module): def __init__(self, inplaces,places, kernel_sizes=[3, 5, 7], groups=[1, 4, 8], stride=2): super(PyConv3, self).__init__() assert len(kernel_sizes) == len(groups) out_channels = [places // 4, places // 4, places // 2] self.pyconv_list = nn.ModuleList() for i in range(len(kernel_sizes)): self.pyconv_list.append(ConvBNReLU(in_channels=inplaces,out_channels=out_channels[i],kernel_size=kernel_sizes[i],stride=stride,groups=groups[i])) def forward(self, x): outputs = [] for pyconv in self.pyconv_list: outputs.append(pyconv(x)) return torch.cat(outputs, 1)class PyConv2(nn.Module): def __init__(self, inplaces,places, kernel_sizes=[3, 5], groups=[1, 4], stride=2): super(PyConv2, self).__init__() assert len(kernel_sizes) == len(groups) out_channels = [places // 2, places // 2] self.pyconv_list = nn.ModuleList() for i in range(len(kernel_sizes)): self.pyconv_list.append(ConvBNReLU(in_channels=inplaces,out_channels=out_channels[i],kernel_size=kernel_sizes[i],stride=stride,groups=groups[i])) def forward(self, x): outputs = [] for pyconv in self.pyconv_list: outputs.append(pyconv(x)) return torch.cat(outputs, 1)class PyConv1(nn.Module): def __init__(self, inplaces,places, kernel_sizes, groups, stride=1): super(PyConv1, self).__init__() assert len(kernel_sizes) == len(groups) self.pyconv = ConvBNReLU(in_channels=inplaces,out_channels=places,kernel_size=3,stride=stride,groups=1) def forward(self, x): return self.pyconv(x)

3-4 PyConv Network on Video Classification

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:深度学习论文: Fast-SCNN: Fast Semantic Segmentation Network及其PyTorch实现
下一篇:Go 语言面向对象教程 — 接口篇:接口赋值(google play)
相关文章

 发表评论

暂时没有评论,来抢沙发吧~