yolov4+cbam

2022-01-17 12:34:00 阅读：277 来源： 互联网

标签：yolov4 Conv 512 cbam self size Activation Bn

yolov4+cbam@TOC

import torch
from torch import nn
import torch.nn.functional as F
from tool.torch_utils import *
from tool.yolo_layer import YoloLayer

class BasicConv(nn.Module):
def init(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
super(BasicConv, self).init()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
self.relu = nn.ReLU() if relu else None

def forward(self, x):
    x = self.conv(x)
    if self.bn is not None:
        x = self.bn(x)
    if self.relu is not None:
        x = self.relu(x)
    return x

class Flatten(nn.Module):
def forward(self, x):
return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
def init(self, gate_channels, reduction_ratio=16, pool_types=[‘avg’, ‘max’]):
super(ChannelGate, self).init()
self.gate_channels = gate_channels
self.mlp = nn.Sequential(
Flatten(),
nn.Linear(gate_channels, gate_channels // 16), # 写死16
nn.ReLU(),
nn.Linear(gate_channels // 16, gate_channels)
)
self.pool_types = pool_types
def forward(self, x):
channel_att_sum = None
for pool_type in self.pool_types:
if pool_type==‘avg’:
avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
channel_att_raw = self.mlp( avg_pool )
elif pool_type==‘max’:
max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
channel_att_raw = self.mlp( max_pool )
elif pool_type==‘lp’:
lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
channel_att_raw = self.mlp( lp_pool )
elif pool_type==‘lse’:
# LSE pool only
lse_pool = logsumexp_2d(x)
channel_att_raw = self.mlp( lse_pool )

        if channel_att_sum is None:
            channel_att_sum = channel_att_raw
        else:
            channel_att_sum = channel_att_sum + channel_att_raw

    scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
    return x * scale

def logsumexp_2d(tensor):
tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
return outputs

class ChannelPool(nn.Module):
def forward(self, x):
return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class SpatialGate(nn.Module):
def init(self):
super(SpatialGate, self).init()
kernel_size = 7
self.compress = ChannelPool()
self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
def forward(self, x):
x_compress = self.compress(x)
x_out = self.spatial(x_compress)
scale = F.sigmoid(x_out) # broadcasting
return x * scale

class CBAM(nn.Module):
def init(self, gate_channels=1024, reduction_ratio=16, pool_types=[‘avg’, ‘max’], no_spatial=False):
super(CBAM, self).init()
self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
self.no_spatial=no_spatial
if not no_spatial:
self.SpatialGate = SpatialGate()
def forward(self, x):
x_out = self.ChannelGate(x)
if not self.no_spatial:
x_out = self.SpatialGate(x_out)
return x_out

class Mish(torch.nn.Module):
def init(self):
super().init()

def forward(self, x):
    x = x * (torch.tanh(torch.nn.functional.softplus(x)))
    return x

class Upsample(nn.Module):
def init(self):
super(Upsample, self).init()

def forward(self, x, target_size, inference=False):
    assert (x.data.dim() == 4)
    # _, _, tH, tW = target_size

    if inference:

        #B = x.data.size(0)
        #C = x.data.size(1)
        #H = x.data.size(2)
        #W = x.data.size(3)

        return x.view(x.size(0), x.size(1), x.size(2), 1, x.size(3), 1).\
                expand(x.size(0), x.size(1), x.size(2), target_size[2] // x.size(2), x.size(3), target_size[3] // x.size(3)).\
                contiguous().view(x.size(0), x.size(1), target_size[2], target_size[3])
    else:
        return F.interpolate(x, size=(target_size[2], target_size[3]), mode='nearest')

class Conv_Bn_Activation(nn.Module):
def init(self, in_channels, out_channels, kernel_size, stride, activation, bn=True, bias=False):
super().init()
pad = (kernel_size - 1) // 2

    self.conv = nn.ModuleList()
    if bias:
        self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad))
    else:
        self.conv.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, pad, bias=False))
    if bn:
        self.conv.append(nn.BatchNorm2d(out_channels))
    if activation == "mish":
        self.conv.append(Mish())
    elif activation == "relu":
        self.conv.append(nn.ReLU(inplace=True))
    elif activation == "leaky":
        self.conv.append(nn.LeakyReLU(0.1, inplace=True))
    elif activation == "linear":
        pass
    else:
        print("activate error !!! {} {} {}".format(sys._getframe().f_code.co_filename,
                                                   sys._getframe().f_code.co_name, sys._getframe().f_lineno))

def forward(self, x):
    for l in self.conv:
        x = l(x)
    return x

class ResBlock(nn.Module):
“”"
Sequential residual blocks each of which consists of
two convolution layers.
Args:
ch (int): number of input and output channels.
nblocks (int): number of residual blocks.
shortcut (bool): if True, residual tensor addition is enabled.
“”"

def __init__(self, ch, nblocks=1, shortcut=True):
    super().__init__()
    self.shortcut = shortcut
    self.module_list = nn.ModuleList()
    for i in range(nblocks):
        resblock_one = nn.ModuleList()
        resblock_one.append(Conv_Bn_Activation(ch, ch, 1, 1, 'mish'))
        resblock_one.append(Conv_Bn_Activation(ch, ch, 3, 1, 'mish'))
        self.module_list.append(resblock_one)

def forward(self, x):
    for module in self.module_list:
        h = x
        for res in module:
            h = res(h)
        x = x + h if self.shortcut else h
    return x

class DownSample1(nn.Module):
def init(self):
super().init()
self.conv1 = Conv_Bn_Activation(3, 32, 3, 1, ‘mish’)

    self.conv2 = Conv_Bn_Activation(32, 64, 3, 2, 'mish')
    self.conv3 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
    # [route]
    # layers = -2
    self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')

    self.conv5 = Conv_Bn_Activation(64, 32, 1, 1, 'mish')
    self.conv6 = Conv_Bn_Activation(32, 64, 3, 1, 'mish')
    # [shortcut]
    # from=-3
    # activation = linear

    self.conv7 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
    # [route]
    # layers = -1, -7
    self.conv8 = Conv_Bn_Activation(128, 64, 1, 1, 'mish')

def forward(self, input):
    x1 = self.conv1(input)
    x2 = self.conv2(x1)
    x3 = self.conv3(x2)
    # route -2
    x4 = self.conv4(x2)
    x5 = self.conv5(x4)
    x6 = self.conv6(x5)
    # shortcut -3
    x6 = x6 + x4

    x7 = self.conv7(x6)
    # [route]
    # layers = -1, -7
    x7 = torch.cat([x7, x3], dim=1)
    x8 = self.conv8(x7)
    return x8

class DownSample2(nn.Module):
def init(self):
super().init()
self.conv1 = Conv_Bn_Activation(64, 128, 3, 2, ‘mish’)
self.conv2 = Conv_Bn_Activation(128, 64, 1, 1, ‘mish’)
# r -2
self.conv3 = Conv_Bn_Activation(128, 64, 1, 1, ‘mish’)

    self.resblock = ResBlock(ch=64, nblocks=2)

    # s -3
    self.conv4 = Conv_Bn_Activation(64, 64, 1, 1, 'mish')
    # r -1 -10
    self.conv5 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')

def forward(self, input):
    x1 = self.conv1(input)
    x2 = self.conv2(x1)
    x3 = self.conv3(x1)

    r = self.resblock(x3)
    x4 = self.conv4(r)

    x4 = torch.cat([x4, x2], dim=1)
    x5 = self.conv5(x4)
    return x5

class DownSample3(nn.Module):
def init(self):
super().init()
self.conv1 = Conv_Bn_Activation(128, 256, 3, 2, ‘mish’)
self.conv2 = Conv_Bn_Activation(256, 128, 1, 1, ‘mish’)
self.conv3 = Conv_Bn_Activation(256, 128, 1, 1, ‘mish’)

    self.resblock = ResBlock(ch=128, nblocks=8)
    self.conv4 = Conv_Bn_Activation(128, 128, 1, 1, 'mish')
    self.conv5 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')

def forward(self, input):
    x1 = self.conv1(input)
    x2 = self.conv2(x1)
    x3 = self.conv3(x1)

    r = self.resblock(x3)
    x4 = self.conv4(r)

    x4 = torch.cat([x4, x2], dim=1)
    x5 = self.conv5(x4)
    return x5

class DownSample4(nn.Module):
def init(self):
super().init()
self.conv1 = Conv_Bn_Activation(256, 512, 3, 2, ‘mish’)
self.conv2 = Conv_Bn_Activation(512, 256, 1, 1, ‘mish’)
self.conv3 = Conv_Bn_Activation(512, 256, 1, 1, ‘mish’)

    self.resblock = ResBlock(ch=256, nblocks=8)
    self.conv4 = Conv_Bn_Activation(256, 256, 1, 1, 'mish')
    self.conv5 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')

def forward(self, input):
    x1 = self.conv1(input)
    x2 = self.conv2(x1)
    x3 = self.conv3(x1)

    r = self.resblock(x3)
    x4 = self.conv4(r)

    x4 = torch.cat([x4, x2], dim=1)
    x5 = self.conv5(x4)
    return x5

class DownSample5(nn.Module):
def init(self):
super().init()
self.conv1 = Conv_Bn_Activation(512, 1024, 3, 2, ‘mish’)
self.conv2 = Conv_Bn_Activation(1024, 512, 1, 1, ‘mish’)
self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, ‘mish’)

    self.resblock = ResBlock(ch=512, nblocks=4)
    self.conv4 = Conv_Bn_Activation(512, 512, 1, 1, 'mish')
    self.conv5 = Conv_Bn_Activation(1024, 1024, 1, 1, 'mish')

def forward(self, input):
    x1 = self.conv1(input)
    x2 = self.conv2(x1)
    x3 = self.conv3(x1)

    r = self.resblock(x3)
    x4 = self.conv4(r)

    x4 = torch.cat([x4, x2], dim=1)
    x5 = self.conv5(x4)
    return x5

class Neck(nn.Module):
def init(self, inference=False):
super().init()
self.inference = inference

    self.conv1 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
    self.conv2 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
    self.conv3 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
    # SPP
    self.maxpool1 = nn.MaxPool2d(kernel_size=5, stride=1, padding=5 // 2)
    self.maxpool2 = nn.MaxPool2d(kernel_size=9, stride=1, padding=9 // 2)
    self.maxpool3 = nn.MaxPool2d(kernel_size=13, stride=1, padding=13 // 2)

    # R -1 -3 -5 -6
    # SPP
    self.conv4 = Conv_Bn_Activation(2048, 512, 1, 1, 'leaky')
    self.conv5 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
    self.conv6 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
    self.conv7 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    # UP
    self.upsample1 = Upsample()
    # R 85
    self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    # R -1 -3
    self.conv9 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    self.conv10 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
    self.conv11 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    self.conv12 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
    self.conv13 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    self.conv14 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
    # UP
    self.upsample2 = Upsample()
    # R 54
    self.conv15 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
    # R -1 -3
    self.conv16 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
    self.conv17 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
    self.conv18 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')
    self.conv19 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
    self.conv20 = Conv_Bn_Activation(256, 128, 1, 1, 'leaky')

def forward(self, input, downsample4, downsample3, inference=False):
    x1 = self.conv1(input)
    x2 = self.conv2(x1)
    x3 = self.conv3(x2)
    # SPP
    m1 = self.maxpool1(x3)
    m2 = self.maxpool2(x3)
    m3 = self.maxpool3(x3)
    spp = torch.cat([m3, m2, m1, x3], dim=1)
    # SPP end
    x4 = self.conv4(spp)
    x5 = self.conv5(x4)
    x6 = self.conv6(x5)
    x7 = self.conv7(x6)
    # UP
    up = self.upsample1(x7, downsample4.size(), self.inference)
    # R 85
    x8 = self.conv8(downsample4)
    # R -1 -3
    x8 = torch.cat([x8, up], dim=1)

    x9 = self.conv9(x8)
    x10 = self.conv10(x9)
    x11 = self.conv11(x10)
    x12 = self.conv12(x11)
    x13 = self.conv13(x12)
    x14 = self.conv14(x13)

    # UP
    up = self.upsample2(x14, downsample3.size(), self.inference)
    # R 54
    x15 = self.conv15(downsample3)
    # R -1 -3
    x15 = torch.cat([x15, up], dim=1)

    x16 = self.conv16(x15)
    x17 = self.conv17(x16)
    x18 = self.conv18(x17)
    x19 = self.conv19(x18)
    x20 = self.conv20(x19)
    return x20, x13, x6

class Yolov4Head(nn.Module):
def init(self, output_ch, n_classes, inference=False):
super().init()
self.inference = inference

    self.conv1 = Conv_Bn_Activation(128, 256, 3, 1, 'leaky')
    self.conv2 = Conv_Bn_Activation(256, output_ch, 1, 1, 'linear', bn=False, bias=True)

    self.yolo1 = YoloLayer(
                            anchor_mask=[0, 1, 2], num_classes=n_classes,
                            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
                            num_anchors=9, stride=8)

    # R -4
    self.conv3 = Conv_Bn_Activation(128, 256, 3, 2, 'leaky')

    # R -1 -16
    self.conv4 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    self.conv5 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
    self.conv6 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    self.conv7 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
    self.conv8 = Conv_Bn_Activation(512, 256, 1, 1, 'leaky')
    self.conv9 = Conv_Bn_Activation(256, 512, 3, 1, 'leaky')
    self.conv10 = Conv_Bn_Activation(512, output_ch, 1, 1, 'linear', bn=False, bias=True)
    
    self.yolo2 = YoloLayer(
                            anchor_mask=[3, 4, 5], num_classes=n_classes,
                            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
                            num_anchors=9, stride=16)

    # R -4
    self.conv11 = Conv_Bn_Activation(256, 512, 3, 2, 'leaky')

    # R -1 -37
    self.conv12 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
    self.conv13 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
    self.conv14 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
    self.conv15 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
    self.conv16 = Conv_Bn_Activation(1024, 512, 1, 1, 'leaky')
    self.conv17 = Conv_Bn_Activation(512, 1024, 3, 1, 'leaky')
    self.conv18 = Conv_Bn_Activation(1024, output_ch, 1, 1, 'linear', bn=False, bias=True)
    
    self.yolo3 = YoloLayer(
                            anchor_mask=[6, 7, 8], num_classes=n_classes,
                            anchors=[12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
                            num_anchors=9, stride=32)

def forward(self, input1, input2, input3):
    x1 = self.conv1(input1)
    x2 = self.conv2(x1)

    x3 = self.conv3(input1)
    # R -1 -16
    x3 = torch.cat([x3, input2], dim=1)
    x4 = self.conv4(x3)
    x5 = self.conv5(x4)
    x6 = self.conv6(x5)
    x7 = self.conv7(x6)
    x8 = self.conv8(x7)
    x9 = self.conv9(x8)
    x10 = self.conv10(x9)

    # R -4
    x11 = self.conv11(x8)
    # R -1 -37
    x11 = torch.cat([x11, input3], dim=1)

    x12 = self.conv12(x11)
    x13 = self.conv13(x12)
    x14 = self.conv14(x13)
    x15 = self.conv15(x14)
    x16 = self.conv16(x15)
    x17 = self.conv17(x16)
    x18 = self.conv18(x17)
    
    if self.inference:
        y1 = self.yolo1(x2)
        y2 = self.yolo2(x10)
        y3 = self.yolo3(x18)

        return get_region_boxes([y1, y2, y3])
    
    else:
        return [x2, x10, x18]

class Yolov4(nn.Module):
def init(self, yolov4conv137weight=None, n_classes=80, inference=False):
super().init()

    output_ch = (4 + 1 + n_classes) * 3

    # backbone
    self.down1 = DownSample1()
    self.down2 = DownSample2()
    self.down3 = DownSample3()
    self.down4 = DownSample4()
    self.down5 = DownSample5()
    self.cbam = CBAM()
    # neck
    self.neck = Neck(inference)
    # yolov4conv137
    if yolov4conv137weight:
        _model = nn.Sequential(self.down1, self.down2, self.down3, self.down4, self.down5, self.cbam, self.neck)
        pretrained_dict = torch.load(yolov4conv137weight)

        model_dict = _model.state_dict()
        # 1. filter out unnecessary keys
        pretrained_dict = {k1: v for (k, v), k1 in zip(pretrained_dict.items(), model_dict)}
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        _model.load_state_dict(model_dict)
    
    # head
    self.head = Yolov4Head(output_ch, n_classes, inference)


def forward(self, input):
    d1 = self.down1(input)
    d2 = self.down2(d1)
    d3 = self.down3(d2)
    d4 = self.down4(d3)
    d5 = self.down5(d4)
    d6 = self.cbam(d5)
    x20, x13, x6 = self.neck(d6, d4, d3)

    output = self.head(x20, x13, x6)
    return output

if name == “main”:
import sys
import cv2

# namesfile = None
# if len(sys.argv) == 6:
#     n_classes = int(sys.argv[1])
#     weightfile = sys.argv[2]
#     imgfile = sys.argv[3]
#     height = int(sys.argv[4])
#     width = int(sys.argv[5])
# elif len(sys.argv) == 7:
#     n_classes = int(sys.argv[1])
#     weightfile = sys.argv[2]
#     imgfile = sys.argv[3]
#     height = int(sys.argv[4])
#     width = int(sys.argv[5])
#     namesfile = sys.argv[6]
# else:
#     print('Usage: ')
#     print('  python models.py num_classes weightfile imgfile namefile')
import torch
x = torch.rand(1, 3, 512, 512)
model = Yolov4(yolov4conv137weight=None, n_classes=3, inference=False)
y = model(x)
print(model)
for i in range(len(y)):
    print(y[i].shape)

# pretrained_dict = torch.load(weightfile, map_location=torch.device('cuda'))
# model.load_state_dict(pretrained_dict)

# use_cuda = True
# if use_cuda:
#     model.cuda()

# img = cv2.imread(imgfile)

# # Inference input size is 416*416 does not mean training size is the same
# # Training size could be 608*608 or even other sizes
# # Optional inference sizes:
# #   Hight in {320, 416, 512, 608, ... 320 + 96 * n}
# #   Width in {320, 416, 512, 608, ... 320 + 96 * m}
# sized = cv2.resize(img, (width, height))
# sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)

# from tool.utils import load_class_names, plot_boxes_cv2
# from tool.torch_utils import do_detect

# for i in range(2):  # This 'for' loop is for speed check
#                     # Because the first iteration is usually longer
#     boxes = do_detect(model, sized, 0.4, 0.6, use_cuda)

# if namesfile == None:
#     if n_classes == 20:
#         namesfile = 'data/voc.names'
#     elif n_classes == 80:
#         namesfile = 'data/coco.names'
#     else:
#         print("please give namefile")

# class_names = load_class_names(namesfile)
# plot_boxes_cv2(img, boxes[0], 'predictions.jpg', class_names)

标签：yolov4,Conv,512,cbam,self,size,Activation,Bn
来源： https://blog.csdn.net/qq_38102943/article/details/122537312

本站声明： 1. iCode9 技术分享网（下文简称本站）提供的所有内容，仅供技术学习、探讨和分享；
2. 关于本站的所有留言、评论、转载及引用，纯属内容发起人的个人观点，与本站观点和立场无关；
3. 关于本站的所有言论和文字，纯属内容发起人的个人观点，与本站观点和立场无关；
4. 本站文章均是网友提供，不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属；如您发现该文章侵犯了您的权益，可联系我们第一时间进行删除；
5. 本站为非盈利性的个人网站，所有内容不会用来进行牟利，也不会利用任何形式的广告来间接获益，纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

ICode9

yolov4+cbam