DataAugmentforYolo.py

# encoding='UTF-8'
# author: pureyang
# TIME: 2019/8/26 下午5:22
# Description:data augmentation for Object Segmentation
##############################################################

# 包括:
#     1. 改变亮度
#     2. 加噪声
#     3. 加随机点
#     4. 镜像(需要改变points)

import time
import random
import cv2
import os
import numpy as np
from skimage.util import random_noise
import re
from copy import deepcopy
import argparse
import base64


def xywh2xyxy(x, h, w):
    y = np.zeros_like(x)
    y[0] = w*(x[0] - x[2] / 2)
    y[1] = h*(x[1] - x[3] / 2)
    y[2] = w*(x[0] + x[2] / 2)
    y[3] = h*(x[1] + x[3] / 2)
    y = [[y[0], y[1]], [y[2], y[3]]]
    return y


def xywh2xyxy_list(x, h, w):
    y = np.zeros_like(x)
    y[0] = x[0]
    y[1] = w*(x[1] - x[3] / 2)
    y[2] = h*(x[2] - x[4] / 2)
    y[3] = w*(x[1] + x[3] / 2)
    y[4] = h*(x[2] + x[4] / 2)
    return y


def xyxy2xywh(x, h, w):
    y = list(range(len(x)))
    y[0] = int(x[0])
    y[1] = ((x[1] + x[3]) / 2)/w  # x center
    y[2] = ((x[2] + x[4]) / 2)/h  # y center
    y[3] = (x[3] - x[1])/w  # width
    y[4] = (x[4] - x[2])/h  # height
    text = ' '.join(str(s) for s in y)+'\n'
    return text


def xywh2xywh(x):
    y = list(range(len(x)))
    y[0] = int(x[0])
    y[1] = x[1]
    y[2] = x[2]
    y[3] = x[3]
    y[4] = x[4]
    text = ' '.join(str(s) for s in y)+'\n'
    return text

# 图像均为cv2读取


class DataAugmentForObjectDetection():
    def __init__(self, change_light_rate=0.15,
                 add_noise_rate=0.3, random_point=0.3, flip_rate=0.3, shift_rate=0.3, rand_point_percent=0.03,
                 is_addNoise=True, is_changeLight=True, is_random_point=True, is_shift_pic_bboxes=True,
                 is_filp_pic_bboxes=True):
        # 配置各个操作的属性
        self.change_light_rate = change_light_rate
        self.add_noise_rate = add_noise_rate
        self.random_point = random_point
        self.flip_rate = flip_rate
        self.shift_rate = shift_rate

        self.rand_point_percent = rand_point_percent

        # 是否使用某种增强方式
        self.is_addNoise = is_addNoise
        self.is_changeLight = is_changeLight
        self.is_random_point = is_random_point
        self.is_filp_pic_bboxes = is_filp_pic_bboxes
        self.is_shift_pic_bboxes = is_shift_pic_bboxes

    # 加噪声
    def _addNoise(self, img):

        return random_noise(img, seed=int(time.time())) * 255

    # 调整亮度
    def _changeLight(self, img):

        alpha = random.uniform(0.35, 1)
        blank = np.zeros(img.shape, img.dtype)
        return cv2.addWeighted(img, alpha, blank, 1 - alpha, 0)

    # 随机的改变点的值
    def _addRandPoint(self, img):

        percent = self.rand_point_percent
        num = int(percent * img.shape[0] * img.shape[1])
        for i in range(num):
            rand_x = random.randint(0, img.shape[0] - 1)
            rand_y = random.randint(0, img.shape[1] - 1)
            if random.randint(0, 1) == 0:
                img[rand_x, rand_y] = 0
            else:
                img[rand_x, rand_y] = 255
        return img

    # 平移
    def _shift_pic_bboxes(self, img, txt_info):

        # ---------------------- 平移图像 ----------------------
        h, w, _ = img.shape
        x_min = w
        x_max = 0
        y_min = h
        y_max = 0

        shapes = txt_info
        for shape in shapes:
            shape = [float(i) for i in shape.split(' ')]

            points = np.array(xywh2xyxy(shape[1:], h, w))
            x_min = min(x_min, points[:, 0].min())
            y_min = min(y_min, points[:, 1].min())
            x_max = max(x_max, points[:, 0].max())
            y_max = max(y_max, points[:, 0].max())

        d_to_left = x_min  # 包含所有目标框的最大左移动距离
        d_to_right = w - x_max  # 包含所有目标框的最大右移动距离
        d_to_top = y_min  # 包含所有目标框的最大上移动距离
        d_to_bottom = h - y_max  # 包含所有目标框的最大下移动距离

        x = random.uniform(-(d_to_left - 1) / 3, (d_to_right - 1) / 3)
        y = random.uniform(-(d_to_top - 1) / 3, (d_to_bottom - 1) / 3)

        # x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上
        M = np.float32([[1, 0, x], [0, 1, y]])
        shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

        # ---------------------- 平移boundingbox ----------------------
        for i in range(len(shapes)):
            shapes[i] = [float(i) for i in shapes[i].split(' ')]
            shapes[i] = xywh2xyxy_list(shapes[i], h, w)
            shapes[i][1] += x
            shapes[i][3] += x
            shapes[i][2] += y
            shapes[i][4] += y
            shapes[i] = ' '.join(str(s) for s in shapes[i])

        for i in range(len(shapes)):
            shapes[i] = [float(i) for i in shapes[i].split(' ')]
            shapes[i] = xyxy2xywh(shapes[i], h, w)

        return shift_img, shapes

    # 镜像
    def _filp_pic_bboxes(self, img, txt_info):

        # ---------------------- 翻转图像 ----------------------
        sed = random.random()

        if 0 < sed < 0.33:  # 0.33的概率水平翻转，0.33的概率垂直翻转,0.33是对角反转
            flip_img = cv2.flip(img, 0)  # _flip_x
            inver = 0
        elif 0.33 < sed < 0.66:
            flip_img = cv2.flip(img, 1)  # _flip_y
            inver = 1
        else:
            flip_img = cv2.flip(img, -1)  # flip_x_y
            inver = -1

        # ---------------------- 调整boundingbox ----------------------
        shapes = txt_info

        for i in range(len(shapes)):
            if inver == 0:
                shapes[i] = [float(i) for i in shapes[i].split(' ')]
                shapes[i][2] = 1-float(shapes[i][2])
                shapes[i] = xywh2xywh(shapes[i])
            if inver == 1:
                shapes[i] = [float(i) for i in shapes[i].split(' ')]
                shapes[i][1] = 1-float(shapes[i][1])
                shapes[i] = xywh2xywh(shapes[i])
            if inver == -1:
                shapes[i] = [float(i) for i in shapes[i].split(' ')]
                shapes[i][1] = 1-float(shapes[i][1])
                shapes[i][2] = 1-float(shapes[i][2])
                shapes[i] = xywh2xywh(shapes[i])

        return flip_img, txt_info

    # 图像增强方法
    def dataAugment(self, img, dic_info):
        change_num = 0  # 改变的次数
        while change_num < 1:  # 默认至少有一种数据增强生效

            if self.is_changeLight:
                if random.random() > self.change_light_rate:  # 改变亮度
                    change_num += 1
                    img = self._changeLight(img)

            if self.is_addNoise:
                if random.random() < self.add_noise_rate:  # 加噪声
                    change_num += 1
                    img = self._addNoise(img)
            if self.is_random_point:
                if random.random() < self.random_point:  # 加随机点
                    change_num += 1
                    img = self._addRandPoint(img)
            if self.is_shift_pic_bboxes:
                if random.random() < self.shift_rate:  # 平移
                    change_num += 1
                    img, dic_info = self._shift_pic_bboxes(img, dic_info)
            if self.is_filp_pic_bboxes:
                if random.random() < self.flip_rate:  # 翻转
                    change_num += 1
                    img, bboxes = self._filp_pic_bboxes(img, dic_info)

        return img, dic_info


# txt解析工具
class ToolHelper():
    # 从txt文件中提取原始标定的信息
    def parse_txt(self, path):
        with open(path)as f:
            lines = f.readlines()
        return lines

    # 对图片进行字符编码
    def img2str(self, img_name):
        with open(img_name, "rb")as f:
            base64_data = str(base64.b64encode(f.read()))
        match_pattern = re.compile(r'b\'(.*)\'')
        base64_data = match_pattern.match(base64_data).group(1)
        return base64_data

    # 保存图片结果
    def save_img(self, save_path, img):
        cv2.imwrite(save_path, img)

    # 保持txt结果

    def save_txt(self, file_name, save_folder, txt_info):
        try:
            # print(txt_info)
            with open(os.path.join(save_folder, file_name), 'w') as f:
                for line in txt_info:
                    line = line.split(' ')
                    text = ' '.join(str(s) for s in line)
                    f.write(text)
            f.close()
        except:
            pass


if __name__ == '__main__':

    need_aug_num = 1  # 每张图片需要增强的次数

    toolhelper = ToolHelper()  # 工具

    is_endwidth_dot = True  # 文件是否以.jpg或者png结尾

    dataAug = DataAugmentForObjectDetection()  # 数据增强工具类

    # 获取相关参数
    parser = argparse.ArgumentParser()
    parser.add_argument('--source_img_txt_path', type=str,
                        default=r'Y:\diska\dataset\gcdataset\juhua_data\one_datas_focus\raw\4.20_add/')
    parser.add_argument('--save_img_txt_path', type=str,
                        default=r'Y:\diska\dataset\gcdataset\juhua_data\one_datas_focus\raw\4.20_add/')

    args = parser.parse_args()
    source_img_txt_path = args.source_img_txt_path  # 图片和txt文件原始位置
    save_img_txt_path = args.save_img_txt_path  # 图片增强结果保存文件

    # 如果保存文件夹不存在就创建
    if not os.path.exists(save_img_txt_path):
        os.mkdir(save_img_txt_path)

    for file in os.listdir(source_img_txt_path):
        if file.endswith('jpg') or file.endswith('png'):
            cnt = 0
            pic_path = os.path.join(source_img_txt_path, file)
            txt_path = os.path.join(source_img_txt_path, file[:-4] + '.txt')
            txt_dic = toolhelper.parse_txt(txt_path)
            # 如果图片是有后缀的
            if is_endwidth_dot:
                # 找到文件的最后名字
                dot_index = file.rfind('.')
                _file_prefix = file[:dot_index]  # 文件名的前缀
                _file_suffix = file[dot_index:]  # 文件名的后缀
            img = cv2.imread(pic_path)
            # print(pic_path)
            print("emd")

            while cnt < need_aug_num:  # 继续增强
                auged_img, txt_info = dataAug.dataAugment(
                    deepcopy(img), deepcopy(txt_dic))
                img_name = f'zq_{_file_prefix}_{cnt}{_file_suffix}'  # 图片保存的信息
                img_save_path = os.path.join(save_img_txt_path, img_name)
                toolhelper.save_img(img_save_path, auged_img)  # 保存增强图片

                base64_data = toolhelper.img2str(img_save_path)
                toolhelper.save_txt(f'zq_{_file_prefix}_{cnt}.txt',
                                    save_img_txt_path, txt_info)  # 保存xml文件
                # print(img_name)
                cnt += 1  # 继续增强下一张