语义分割论文:DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution及其Pytorch实现

网友投稿 879 2022-08-31

语义分割论文:DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution及其Pytorch实现

语义分割论文:DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution及其Pytorch实现

DeepLabv3+: Encoder-Decoder with Atrous Separable Convolution PDF: ​​​PyTorch代码: ​​创新点

为了融合多尺度信息,论文引入全新的encoder-decoder架构,使用DeepLabv3作为encoder模块,并添加了一个简单却有效的decoder模块在我们提出的encoder-decoder架构中,可通过扩张卷积直接控制提取encoder特征的分辨率,用于平衡精度和运行时间论文将Xception结构应用于分割任务中,在ASPP和decoder模块中加入深度分离卷积,获得到强大又快速的模型

2 网络结构

使用DeepLabv3 作为 encoder, 同时加入轻量级的ecoder模块

3 Xception 改进

4 实验结果

4-1 PASCAL VOC 2012

4-2 Cityscapes

PyTorch代码:

# !/usr/bin/env python# -- coding: utf-8 --# @Time : 2020/9/25 12:50# @Author : liumin# @File : DeeplabV3.pyimport torchimport torch.nn as nnimport torch.nn.functional as Ffrom torchvision.models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152class ResNet(nn.Module): def __init__(self, backbone='resnet50', pretrained_path=None): super().__init__() if backbone == 'resnet18': backbone = resnet18(pretrained=not pretrained_path) self.final_out_channels = 256 self.low_level_inplanes = 64 elif backbone == 'resnet34': backbone = resnet34(pretrained=not pretrained_path) self.final_out_channels = 256 self.low_level_inplanes = 64 elif backbone == 'resnet50': backbone = resnet50(pretrained=not pretrained_path) self.final_out_channels = 1024 self.low_level_inplanes = 256 elif backbone == 'resnet101': backbone = resnet101(pretrained=not pretrained_path) self.final_out_channels = 1024 self.low_level_inplanes = 256 else: # backbone == 'resnet152': backbone = resnet152(pretrained=not pretrained_path) self.final_out_channels = 1024 self.low_level_inplanes = 256 if pretrained_path: backbone.load_state_dict(torch.load(pretrained_path)) self.early_extractor = nn.Sequential(*list(backbone.children())[:5]) self.later_extractor = nn.Sequential(*list(backbone.children())[5:7]) conv4_block1 = self.later_extractor[-1][0] conv4_block1.conv1.stride = (1, 1) conv4_block1.conv2.stride = (1, 1) conv4_block1.downsample[0].stride = (1, 1) def forward(self, x): x = self.early_extractor(x) out = self.later_extractor(x) return out,xclass _ASPPModule(nn.Module): def __init__(self, inplanes, planes, kernel_size, padding, dilation): super(_ASPPModule, self).__init__() self.atrous_conv = nn.Conv2d(inplanes, planes, kernel_size=kernel_size, stride=1, padding=padding, dilation=dilation, bias=False) self.bn = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self._init_weight() def forward(self, x): x = self.atrous_conv(x) x = self.bn(x) return self.relu(x) def _init_weight(self): for m in self.modules(): if isinstance(m, nn.Conv2d): torch.nn.init.kaiming_normal_(m.weight) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()class ASPP(nn.Module): def __init__(self, inplanes=2048, output_stride=16): super(ASPP, self).__init__() if output_stride == 16: dilations = [1, 6, 12, 18] elif output_stride == 8: dilations = [1, 12, 24, 36] else: raise NotImplementedError self.aspp1 = _ASPPModule(inplanes, 256, 1, padding=0, dilation=dilations[0]) self.aspp2 = _ASPPModule(inplanes, 256, 3, padding=dilations[1], dilation=dilations[1]) self.aspp3 = _ASPPModule(inplanes, 256, 3, padding=dilations[2], dilation=dilations[2]) self.aspp4 = _ASPPModule(inplanes, 256, 3, padding=dilations[3], dilation=dilations[3]) self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), nn.Conv2d(inplanes, 256, 1, stride=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(inplace=True)) self.conv1 = nn.Conv2d(1280, 256, 1, bias=False) self.bn1 = nn.BatchNorm2d(256) self.relu = nn.ReLU(inplace=True) self.dropout = nn.Dropout(0.5) self._init_weight() def forward(self, x): x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) return self.dropout(x) def _init_weight(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) torch.nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)class Decoder(nn.Module): def __init__(self, num_classes, low_level_inplanes=256): super(Decoder, self).__init__() self.conv1 = nn.Conv2d(low_level_inplanes, 48, 1, bias=False) self.bn1 = nn.BatchNorm2d(48) self.relu = nn.ReLU() self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(inplace=True), nn.Dropout(0.1), nn.Conv2d(256, num_classes, kernel_size=1, stride=1)) self._init_weight() def forward(self, x, low_level_feat): low_level_feat = self.conv1(low_level_feat) low_level_feat = self.bn1(low_level_feat) low_level_feat = self.relu(low_level_feat) x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x, low_level_feat), dim=1) x = self.last_conv(x) return x def _init_weight(self): for m in self.modules(): if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) torch.nn.init.kaiming_normal_(m.weight) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)class DeepLabv3Plus(nn.Module): def __init__(self, num_classes=None): super().__init__() self.num_classes = num_classes self.backbone = ResNet('resnet50', None) self.aspp = ASPP(inplanes=self.backbone.final_out_channels) self.decoder = Decoder(self.num_classes, self.backbone.low_level_inplanes) def forward(self, imgs, labels=None, mode='infer', **kwargs): x, low_level_feat = self.backbone(imgs) x = self.aspp(x) x = self.decoder(x, low_level_feat) outputs = F.interpolate(x, size=imgs.size()[2:], mode='bilinear', align_corners=True) return outputsif __name__ == '__main__': model = DeepLabv3Plus(num_classes=19) print(model) input = torch.randn(2,3,1024,2048) output = model(input) print(output.shape)

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:注意力机制论文:Non-Local neural networks及其Pytorch实现
下一篇:yii2框架下在两个页面之间传递弹出消息
相关文章

 发表评论

暂时没有评论,来抢沙发吧~