yolov5中head修改为decouple head详解

2022-06-30 08:33:13 阅读：255 来源： 互联网

标签：head conv nn self yolov5 inplace grid decouple

现成的YOLOv5代码真的很香,不管口碑怎么样,我用着反正是挺爽的,下面这篇文章主要给大家介绍了关于yolov5中head修改为decouple head的相关资料,需要的朋友可以参考下

yolox的decoupled head结构

本来想将yolov5的head修改为decoupled head，与yolox的decouple head对齐，但是没注意，该成了如下结构：

感谢少年肩上杨柳依依的指出，如还有问题欢迎指出

1.修改models下的yolo.py文件中的Detect

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 class Detect(nn.Module): stride = None # strides computed during build onnx_dynamic = False # ONNX export parameter def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.nl = len(anchors) # number of detection layers self.na = len(anchors[0]) // 2 # number of anchors self.grid = [torch.zeros(1)] * self.nl # init grid self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.m_box = nn.ModuleList(nn.Conv2d(256, 4 * self.na, 1) for x in ch) # output conv self.m_conf = nn.ModuleList(nn.Conv2d(256, 1 * self.na, 1) for x in ch) # output conv self.m_labels = nn.ModuleList(nn.Conv2d(256, self.nc * self.na, 1) for x in ch) # output conv self.base_conv = nn.ModuleList(BaseConv(in_channels = x, out_channels = 256, ksize = 1, stride = 1) for x in ch) self.cls_convs = nn.ModuleList(BaseConv(in_channels = 256, out_channels = 256, ksize = 3, stride = 1) for x in ch) self.reg_convs = nn.ModuleList(BaseConv(in_channels = 256, out_channels = 256, ksize = 3, stride = 1) for x in ch) # self.m = nn.ModuleList(nn.Conv2d(x, 4 * self.na, 1) for x in ch, nn.Conv2d(x, 1 * self.na, 1) for x in ch,nn.Conv2d(x, self.nc * self.na, 1) for x in ch) self.inplace = inplace # use in-place ops (e.g. slice assignment)self.ch = ch def forward(self, x): z = [] # inference output for i in range(self.nl): # # x[i] = self.m[i](x[i]) # convs # print("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&", i) # print(x[i].shape) # print(self.base_conv[i]) # print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") x_feature = self.base_conv[i](x[i]) # x_feature = x[i] cls_feature = self.cls_convs[i](x_feature) reg_feature = self.reg_convs[i](x_feature) # reg_feature = x_feature m_box = self.m_box[i](reg_feature) m_conf = self.m_conf[i](reg_feature) m_labels = self.m_labels[i](cls_feature) x[i] = torch.cat((m_box,m_conf, m_labels),1) bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) y = x[i].sigmoid() if self.inplace: y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, y[..., 4:]), -1) z.append(y.view(bs, -1, self.no)) return x if self.training else (torch.cat(z, 1), x)

2.在yolo.py中添加

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 def get_activation(name="silu", inplace=True): if name == "silu": module = nn.SiLU(inplace=inplace) elif name == "relu": module = nn.ReLU(inplace=inplace) elif name == "lrelu": module = nn.LeakyReLU(0.1, inplace=inplace) else: raise AttributeError("Unsupported act type: {}".format(name)) return module class BaseConv(nn.Module): """A Conv2d -> Batchnorm -> silu/leaky relu block""" def __init__( self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu" ): super().__init__() # same padding pad = (ksize - 1) // 2 self.conv = nn.Conv2d( in_channels, out_channels, kernel_size=ksize, stride=stride, padding=pad, groups=groups, bias=bias, ) self.bn = nn.BatchNorm2d(out_channels) self.act = get_activation(act, inplace=True) def forward(self, x): # print(self.bn(self.conv(x)).shape) return self.act(self.bn(self.conv(x))) # return self.bn(self.conv(x)) def fuseforward(self, x): return self.act(self.conv(x))

decouple head的特点：

由于训练模型时，应该是channels = 256的地方改成了channels = x(失误)，所以在decoupled head的部分参数量比yolox要大一些，以下的结果是在channels= x的情况下得出

比yolov5s参数多，计算量大，在我自己的2.5万的数据量下map提升了3%多

1.模型给出的目标cls较高，需要将conf的阈值设置较大（0.5），不然准确率较低

1	`parser.add_argument('--conf-thres',` `type=float, default=0.5,` `help='confidence threshold')`

2.对于少样本的检测效果较好，召回率的提升比准确率多

3.在conf设置为0.25时，召回率比yolov5s高，但是准确率低；在conf设置为0.5时，召回率与准确率比yolov5s高

4.比yolov5s参数多，计算量大，在2.5万的数据量下map提升了3%多

对于decouple head的改进

改进：

1.将红色框中的conv去掉，缩小参数量和计算量；

2.channels =256 ，512 ，1024是考虑不增加参数，不进行featuremap的信息压缩

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 class Detect(nn.Module): stride = None # strides computed during build onnx_dynamic = False # ONNX export parameter def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer super().__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.nl = len(anchors) # number of detection layers self.na = len(anchors[0]) // 2 # number of anchors self.grid = [torch.zeros(1)] * self.nl # init grid self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv self.inplace = inplace # use in-place ops (e.g. slice assignment) def forward(self, x): z = [] # inference output for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) y = x[i].sigmoid() if self.inplace: y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, y[..., 4:]), -1) z.append(y.view(bs, -1, self.no)) return x if self.training else (torch.cat(z, 1), x)

特点

1.模型给出的目标cls较高，需要将conf的阈值设置较大（0.4），不然准确率较低

2.对于少样本的检测效果较好，准确率的提升比召回率多

3. 准确率的提升比召回率多，

该改进不如上面的模型提升多，但是参数量小，计算量小少9Gflop，占用显存少

decoupled head指标提升的原因：由于yolov5s原本的head不能完全的提取featuremap中的信息，decoupled head能够较为充分的提取featuremap的信息；

疑问

为什么decoupled head目标的cls会比较高，没想明白

为什么去掉base_conv，召回率要比准确率提升少

原文链接：https://blog.csdn.net/qq_34496674/article/details/124828868

标签：head,conv,nn,self,yolov5,inplace,grid,decouple
来源： https://www.cnblogs.com/chentiao/p/16425546.html

本站声明： 1. iCode9 技术分享网（下文简称本站）提供的所有内容，仅供技术学习、探讨和分享；
2. 关于本站的所有留言、评论、转载及引用，纯属内容发起人的个人观点，与本站观点和立场无关；
3. 关于本站的所有言论和文字，纯属内容发起人的个人观点，与本站观点和立场无关；
4. 本站文章均是网友提供，不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属；如您发现该文章侵犯了您的权益，可联系我们第一时间进行删除；
5. 本站为非盈利性的个人网站，所有内容不会用来进行牟利，也不会利用任何形式的广告来间接获益，纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

ICode9

yolov5中head修改为decouple head详解

目录

yolox的decoupled head结构

对于decouple head的改进

特点

疑问