上一篇讲了怎么训练yolov8,
训练yolov8
但是如果只满足于此,我们就真的只是调参侠了。。。
所以为了更深入的理解大神的想法,也为了我们自己的代码能力的提升和深度学习的改造。我觉得应该把代码分解一下,可以更好的食用
一般我们说的网络就是指前向网络,网络怎么反向训练的,我们一般不说,因为一般这个过程是框架自动完成的,但是训练不仅包括网络,还包括数据,loss。所以我们把yolov8分成3部分。第一部分就是前向网络。那么前向网络又可以分成3个backbone、neck、head
假设我们的输入是一个512*640的图片,数据集只有一个class。
一些yolov8用到的模块,后面我们也可以增加自己的模块
# encoding=utf-8
import mathimport torch.nn as nn
import torch
from utils.module_utils import autopadclass CBA(nn.Module):default_act = nn.SiLU()def __init__(self, input_channel, output_channel, k=1, s=1, p=None, g=1, d=1, act=True):super().__init__()self.conv = nn.Conv2d(input_channel, output_channel, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)self.bn = nn.BatchNorm2d(output_channel)self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()def forward(self, x):return self.act(self.bn(self.conv(x)))class DWCBA(CBA):def __init__(self, input_channel, output_channel, k=1, s=1, d=1, act=True):super().__init__(input_channel, output_channel, k, s, g=math.gcd(input_channel, output_channel), d=d, act=act)class Bottleneck(nn.Module):def __init__(self, input_channel, output_channel, shortcut=True, g=1, k=(3, 3), e=0.5):super().__init__()c_ = int(output_channel * e)self.cv1 = CBA(input_channel, c_, k[0], 1)self.cv2 = CBA(c_, output_channel, k[1], 1, g=g)self.add = shortcut and input_channel == output_channeldef forward(self, x):return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))class BottleneckCSP(nn.Module):def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):super(BottleneckCSP, self).__init__()c_ = int(output_channel * e)self.cv1 = CBA(input_channel, c_, 1, 1)self.cv2 = nn.Conv2d(input_channel, c_, 1, 1, bias=False)self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)self.cv4 = CBA(2 * c_, output_channel, 1, 1)self.bn = nn.BatchNorm2d(2 * c_)self.act = nn.SiLU()self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))def forward(self, x):y1 = self.cv3(self.m(self.cv1(x)))y2 = self.cv2(x)return self.cv4(self.act(self.bn(torch.cat(y1, y2), 1)))class C3(nn.Module):def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):super(C3, self).__init__()c_ = int(output_channel * e)self.cv1 = CBA(input_channel, c_, 1, 1)self.cv2 = CBA(input_channel, c_, 1, 1)self.cv3 = CBA(2 * c_, output_channel, 1)self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))def forward(self, x):return self.cv3(torch.cat(self.m(self.cv1(x)), self.cv2(x)), 1)class C2(nn.Module):def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):super(C2, self).__init__()self.c = int(output_channel * e)self.cv1 = CBA(input_channel, 2 * self.c, 1, 1)self.cv2 = CBA(2 * self.c, output_channel, 1)self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))def forward(self, x):a, b = self.cv1(x).split((self.c, self.c), 1)return self.cv2(torch.cat((self.m(a), b), 1))class C2f(nn.Module):def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):super(C2f, self).__init__()self.c = int(output_channel * e)self.cv1 = CBA(input_channel, 2 * self.c, 1, 1)self.cv2 = CBA((2 + n) * self.c, output_channel, 1)self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))def forward(self, x):y = list(self.cv1(x).split((self.c, self.c), 1))y.extend(m(y[-1]) for m in self.m)return self.cv2(torch.cat(y, 1))class C1(nn.Module):def __init__(self, input_channel, output_channel, n=1):super(C1, self).__init__()self.cv1 = CBA(input_channel, output_channel, 1, 1)self.m = nn.Sequential(*(CBA(output_channel, output_channel, 3) for _ in range(n)))def forward(self, x):y = self.cv1(x)return self.m(y) + yclass C3x(C3):def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):super(C3x, self).__init__(input_channel, output_channel, n, shortcut, g, e)self.c_ = int(output_channel * e)self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))class C3Ghost(C3):def __init__(self, input_channel, output_channel, n=1, shortcut=True, g=1, e=0.5):super(C3Ghost, self).__init__(input_channel, output_channel, n, shortcut, g, e)c_ = int(output_channel * e)self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))class GhostCBA(nn.Module):def __init__(self, input_channel, output_channel, k=1, s=1, g=1, act=True):super(GhostCBA, self).__init__()c_ = output_channel // 2self.cv1 = CBA(input_channel, c_, k, s, None, g, act)self.cv2 = CBA(c_, c_, 5, 1, None, c_, act=act)def forward(self, x):y = self.cv1(x)return torch.cat((y, self.cv2(y)), 1)class GhostBottleneck(nn.Module):def __init__(self, input_channel, output_channel, k=3, s=1):super(GhostBottleneck, self).__init__()c_ = output_channel // 2self.conv = nn.Sequential(GhostCBA(input_channel, c_, 1, 1),DWCBA(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),GhostCBA(c_, output_channel, 1, 1, act=False))self.shortcut = nn.Sequential(DWCBA(input_channel, input_channel, k, s, act=False),CBA(input_channel, output_channel, 1, 1, act=False)) if s == 2 else nn.Identity()def forward(self, x):return self.conv(x) + self.shortcut(x)
代码参考了paddleyolo
# encoding=utf-8import torch.nn as nn
import torch
from backbone.basemodules import CBA,C2f
from enhance.other import SPPCSPCclass YOLOv8CSPDarkNet(nn.Module):def __init__(self,return_idx=[2, 3, 4]):super(YOLOv8CSPDarkNet, self).__init__()self.return_idx=return_idxarch_setting=[[64, 128, 3, True, False], [128, 256, 6, True, False],[256, 512, 6, True, False], [512, 1024, 3, True, True]]base_channels=arch_setting[0][0]self.stem=CBA(3,base_channels,k=3,s=2)_output_channels=[base_channels]self.csp_dark_blocks=[]for i,(input_channel,output_channel,num_blocks,shortcut,use_sppf) in enumerate(arch_setting):_output_channels.append(output_channel)stage=[]conv_layer=CBA(input_channel,output_channel,3,2)c2f_layer=C2f(output_channel,output_channel,num_blocks,shortcut)stage.append(conv_layer)stage.append(c2f_layer)if use_sppf:sppf_layer=SPPCSPC(output_channel,output_channel)stage.append(sppf_layer)self.csp_dark_blocks.append(nn.Sequential(*stage))self._output_channels=[_output_channels[i] for i in self.return_idx]self.strides = [[2, 4, 8, 16, 32, 64][i] for i in self.return_idx]def forward(self,x):outputs=[]x=self.stem(x)for i , layer in enumerate(self.csp_dark_blocks):x=layer(x)if i+1 in self.return_idx:outputs.append(x)return outputsinput=torch.randn(1,3,512,640)
model=YOLOv8CSPDarkNet()
output=model(input)
print(output[0].shape)
neck模块 ,其实就是一些特征融合
# encoding=utf-8
import torch.nn as nn
import torch
from backbone.basemodules import C2f,CBA
from utils.module_utils import Concatclass YOLOV8C2FPAN(nn.Module):def __init__(self,n=3,input_chaneels=[256,512,1024]): # n cspbottleneck 个数super(YOLOV8C2FPAN, self).__init__()self.input_channels=input_chaneelsself._output_channels=input_chaneelsself.concat=Concat(1)self.upsample=nn.Upsample(scale_factor=2,mode='nearest')# fpnself.fpn_p4=C2f(int(input_chaneels[2]+input_chaneels[1]),input_chaneels[1],n)self.fpn_p3=C2f(int(input_chaneels[1]+input_chaneels[0]),input_chaneels[0],n)# panself.down_conv2=CBA(input_chaneels[0],input_chaneels[0],k=3,s=2)self.pan_n3=C2f(int(input_chaneels[0]+input_chaneels[1]),input_chaneels[1],n)self.down_conv1 = CBA(input_chaneels[1], input_chaneels[1], k=3, s=2)self.pan_n4 = C2f(int(input_chaneels[1] + input_chaneels[2]), input_chaneels[2], n)def forward(self,x):[c3,c4,c5]=x# fpnup_x1=self.upsample(c5)f_concat1=self.concat((up_x1,c4))f_out1=self.fpn_p4(f_concat1)up_x2=self.upsample(f_out1)f_concat2=self.concat((up_x2,c3))f_out0=self.fpn_p3(f_concat2)#pandown_x1=self.down_conv2(f_out0)p_concat1=self.concat((down_x1,f_out1))pan_out1=self.pan_n3(p_concat1)down_x2=self.down_conv1(pan_out1)p_concat2=self.concat((down_x2,c5))pan_out0=self.pan_n4(p_concat2)return [f_out0,pan_out1,pan_out0]c3=torch.randn(1,256,64,80)
c4=torch.randn(1,512,32,40)
c5=torch.randn(1,1024,16,20)
input=[c3,c4,c5]
m=YOLOV8C2FPAN()
output=m(input)
print(output[0].shape)
print(output[1].shape)
print(output[2].shape)
yolov8的检测头模块,这里只写了训练的部分,推理部分下次补上
# encoding=utf-8
import torch.nn as nn
import torch
from backbone.basemodules import CBA
class Detect(nn.Module):def __init__(self,nc=1,ch=()):super(Detect, self).__init__()self.nc=ncself.nl=len(ch)self.reg_max=16 # ch[0] // 16 l r t d 除以stride 后 一定落在[0-16]区间 当然,如果图像大且检测物大 这个数也要大self.no=nc+self.reg_max*4self.stride=torch.zeros(self.nl)c2,c3=max((16,ch[0]//4,self.reg_max*4)),max(ch[0],self.nc)self.cv2=nn.ModuleList(nn.Sequential(CBA(x,c2,3),CBA(c2,c2,3),CBA(c2,4*self.reg_max,1)) for x in ch)self.cv3=nn.ModuleList(nn.Sequential(CBA(x,c3,3),CBA(c3,c3,3),nn.Conv2d(c3,self.nc,1)) for x in ch)def forward(self,x):for i in range(self.nl):x[i]=torch.cat((self.cv2[i](x[i]),self.cv3[i](x[i])),1)if self.training:return xfrom necks.yolov8_pafpn import YOLOV8C2FPAN
c3=torch.randn(1,256,64,80)
c4=torch.randn(1,512,32,40)
c5=torch.randn(1,1024,16,20)m=YOLOV8C2FPAN()
m1=Detect(ch=([256,512,1024]))
output=m([c3,c4,c5])
output=m1(output)
print(output[0].shape)
把前面的3个部分组合一下就是我们的前向推理网络了
class Yolov8(nn.Module):def __init__(self, backbone, neck, head):super(Yolov8, self).__init__()self.backbone = backboneself.neck = neckself.head = headdef forward(self, x):x = self.backbone(x)x = self.neck(x)return self.head(x)
backbone = YOLOv8CSPDarkNet()
neck = YOLOV8C2FPAN()
head = Detect(nc=1, ch=[256, 512, 1024])
model = Yolov8(backbone, neck, head)
input=torch.randn(1,3,512,640)
output=model(input)
print(output[0].shape)