ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

关于YOLOV4 dataloader的详细理解

2021-11-22 10:30:46  阅读:277  来源: 互联网

标签:box rand YOLOV4 int image dataloader 详细 np new


讲解之前
dataloader程序是对图片进行不同的调整以扩充数据量,同样的把与之对应的目标框进行调整

这里引用的Bubbliiiing](https://blog.csdn.net/weixin_44791964?spm=1001.2014.3001.5509)关于YOLOV4的讲解一部分,并进行更改。YOLOV4

get_random_data


import cv2
import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset

from utils import merge_bboxes


path = 'xml.txt'
with open(path) as f:
    lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
train_lines = lines[:num_train]
train_batches = len(train_lines)
image_size = [416,416]
#mosaic = mosaic
#flag = True
#is_train = is_train
annotation_line = lines[0]
input_shape = image_size
jitter=.3
random=False
#取一个进行分割,建立列表
line = annotation_line.split()
print("lines[0]为{},line为{}".format(lines[0],line))
#读取图像
image = Image.open(line[0])
#image.show()
iw, ih = image.size
h, w = input_shape
#iw:1920,ih:1080,w:416,h:416
print("iw:{},ih:{},w:{},h:{}".format(iw,ih,w,h))
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
#取筛选框及类别索引
print(box)
#在random为FALSE时进行
#一组服从标准正态分布的随机样本值,这里假设为n
def rand(a, b):
    return np.random.rand() * (b - a) + a

if not random:
    scale = min(w / iw, h / ih)#416/1920
    nw = int(iw * scale)#416
    nh = int(ih * scale)#234
    dx = (w - nw) // 2#0
    dy = (h - nh) // 2#91
    #把1080*1920的缩小为234*416,放大情况的Image.BICUBIC插着原理
    image = image.resize((nw, nh), Image.BICUBIC)
    #生成一个RGB值为(128, 128, 128)大小为416*416的图像
    new_image = Image.new('RGB', (w, h), (128, 128, 128))
    #将缩小后的图像粘贴到416*416的中间
    new_image.paste(image, (dx, dy))
    #new_image.show()
    #转换数据类型
    image_data = np.array(new_image, np.float32)
    # 生成一个目标框数目*5的全0列表
    box_data = np.zeros((len(box), 5))
    print("box_data",box_data)
    if len(box) > 0:
        #以行维度随机洗牌
        np.random.shuffle(box)
        print(box)
        #将目标框在1920*1080的图像信息,按比例转换的416*416图像中
        box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
        box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
        #box[:, 0:2]取box每行的0和1列的数据,[box[:, 0:2] < 0]判断其数据<0返回TURE,否则返回FLASE
        box[:, 0:2][box[:, 0:2] < 0] = 0
        print("box[:, 0:2]={},[box[:, 0:2] < 0]={}".format(box[:, 0:2],[box[:, 0:2] < 0]))
        box[:, 2][box[:, 2] > w] = w
        box[:, 3][box[:, 3] > h] = h
        print("box[:, 2]={},[box[:, 2] >w]={}".format(box[:, 2], [box[:, 2] > w]))
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        #在416*416图像中宽和高都少于1的去掉
        box = box[np.logical_and(box_w > 1, box_h > 1)]
        box_data = np.zeros((len(box), 5))
        #赋值
        box_data[:len(box)] = box
        print(box_data)

#一组服从标准正态分布的随机样本值,这里假设为n
#416/416 * (0.6n + 0.7) / (0.6n + 0.7) 前后的n是不同的
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
print(new_ar)
#1.75n + 0.25
scale = rand(.25, 2)
if new_ar < 1:
    nh = int(scale * h)
    nw = int(nh * new_ar)
else:
    nw = int(scale * w)
    nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
#image.show()
#加入随机噪声改变图像的缩放大小
print(image.size)

# 放置图片  (w - nw)*n
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
new_image.paste(image, (dx, dy))
image = new_image
#image.show()
#随机翻转图片
flip = rand(0,1) < .5
if flip:
    image = image.transpose(Image.FLIP_LEFT_RIGHT)
#色域变换
hue=.1
sat=1.5
val=1.5
#-0.1 + 0.2n
hue = rand(-hue, hue)
#1 + 0.5n或其倒数
sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)
#将RGB格式的图片文件转换成HSV形式
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
#色相H空间进行加减赋值变换
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
#饱和度和亮度变换
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
#转换为现在的图像
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
#调整目标框坐标
box_data = np.zeros((len(box), 5))
if len(box) > 0:
    np.random.shuffle(box)
    box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
    box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
    if flip:
        box[:, [0, 2]] = w - box[:, [2, 0]]
    box[:, 0:2][box[:, 0:2] < 0] = 0
    box[:, 2][box[:, 2] > w] = w
    box[:, 3][box[:, 3] > h] = h
    box_w = box[:, 2] - box[:, 0]
    box_h = box[:, 3] - box[:, 1]
    box = box[np.logical_and(box_w > 1, box_h > 1)]
    box_data = np.zeros((len(box), 5))
    box_data[:len(box)] = box

同样的get_random_data_with_Mosaic是跟上述过程类似

hue=.1
sat=1.5
val=1.5
def rand(a, b):
    return np.random.rand() * (b - a) + a
random=False
h, w = input_shape
#iw:1920,ih:1080,w:416,h:416
min_offset_x = 0.3
min_offset_y = 0.3
scale_low = 1 - min(min_offset_x, min_offset_y)#0.7
scale_high = scale_low + 0.2#0.9

image_datas = []
box_datas = []
index = 0
#[0,0,124,124]
place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]
print(place_x)
place_y = [0, int(h * min_offset_y), int(h * min_offset_y), 0]
for line in annotation_line:
    #每一行进行分割
    line_content = line.split()
    #打开图片
    image = Image.open(line_content[0])
    #对图片进行转换为RGB形式
    image = image.convert("RGB")
    #图片的大小
    iw, ih = image.size
    #保存框的位置,取该副图像下的所有目标框
    box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])
    #是否翻转图片n
    flip = rand(0,1) < .5
    if flip and len(box) > 0:
        #反转操作
        image = image.transpose(Image.FLIP_LEFT_RIGHT)
        #对应框的变化
        box[:, [0, 2]] = iw - box[:, [2, 0]]

    #对输入进来的图片进行缩放
    new_ar = w / h#1
    scale = rand(scale_low, scale_high)#0.2n+0.7
    #以最大的边为标准进行缩放,这里为正方形,不需要做过多考虑
    if new_ar < 1:
        nh = int(scale * h)#(0.2n+0.7)*416
        nw = int(nh * new_ar)
    else:
        nw = int(scale * w)
        nh = int(nw / new_ar)
    image = image.resize((nw, nh), Image.BICUBIC)

    # 进行色域变换
    hue = rand(-hue, hue)#0.2n - 0.1
    sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)#0.5n + 1或其倒数
    val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)#0.5n + 1或其倒数
    #RGB-->HSV
    x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
    x[..., 0] += hue*360#H变换
    x[..., 0][x[..., 0]>1] -= 1
    x[..., 0][x[..., 0]<0] += 1
    x[..., 1] *= sat
    x[..., 2] *= val
    x[x[:,:, 0]>360, 0] = 360
    x[:, :, 1:][x[:, :, 1:]>1] = 1
    x[x<0] = 0
    image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) #转换为RGB

    image = Image.fromarray((image * 255).astype(np.uint8))
    # 将图片进行放置,分别对应四张分割图片的位置,详情查看上面的讲解
    dx = place_x[index]
    dy = place_y[index]
    new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
    new_image.paste(image, (dx, dy))
    image_data = np.array(new_image)

    index = index + 1
    box_data = []
    # 对box进行重新处理
    if len(box) > 0:
        np.random.shuffle(box)
        box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
        box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
        box[:, 0:2][box[:, 0:2] < 0] = 0
        box[:, 2][box[:, 2] > w] = w
        box[:, 3][box[:, 3] > h] = h
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        box = box[np.logical_and(box_w > 1, box_h > 1)]
        box_data = np.zeros((len(box), 5))
        box_data[:len(box)] = box

    image_datas.append(image_data)
    box_datas.append(box_data)

# 将图片分割,放在一起。生成随机数 124 291
cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))
cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))

new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

# 对框进行进一步的处理,见utils的merge_bboxes
new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))

标签:box,rand,YOLOV4,int,image,dataloader,详细,np,new
来源: https://blog.csdn.net/weixin_46484389/article/details/121466286

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有