VisualStudio 2022 cuda11.7
cuda 11.7.0
cudnn 8.9.6
cmake 3.26.0
conda 23.11.0
labelme 5.4.1
numpy 1.24.3
torch 1.13.1+cu117
torchaudio 0.13.1+cu117
torchvision 0.14.1+cu117ultralytics 8.1.30
1 数据处理
1.1 数据集标注
1.1.1 文件树结构
├── JPEGImages ← 方便手动打标签、标签转换、数据集处理
│ ├── images ← 存储未标注的原始图片
│ │ └── ······
│ ├── json_labels ← json标签
│ │ └── ······
│ ├── labels ← txt标签
│ └── classes.txt ← 存放标签的种类
└── dataset ← 数据集
├── test
├── images
│ ├── train
│ │ └── ······
│ └── val
│ └── ······
├── labels
│ ├── train
│ │ └── ······
│ └── val
│ └── ······
├── data.yaml
└── yolov8n.yaml
1.1.2 classes.txt
1.1.3 data.yaml
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Example usage: yolo detect train data=data.yaml model=yolov8n.pt epochs=150 imgsz=640 batch=4 workers=0
# dataset
# ├── test
# ├── images
# │ ├── train
# │ │ └── ······
# │ └── val
# │ └── ······
# ├── labels
# │ ├── train
# │ │ └── ······
# │ └── val
# │ └── ······
# ├── data.yaml
# └── yolov8n.yaml
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: D:/GraduationDesign/YOLOv8/test/dataset # dataset root dir
train: images/train # train images (relative to 'path') 4 images
val: images/val # val images (relative to 'path') 4 images
test: images/test # test images (relative to 'path') 4 images
# Classes
0: surface
1.2 开始标注
1.2.1 labelme
conda activate yolov8
# 使用classes.txt限制标签
labelme JPEGImages --labels JPEGImages/classes.txt --output JPEGImages/json_labels --nodata --validatelabel exact --config "{shift_auto_shape_color: -2}" # windows环境
labelme JPEGImages --labels JPEGImages/classes.txt --output JPEGImages/json_labels --nodata --validatelabel exact --config '{shift_auto_shape_color: -2}' # linux环境
1.2.2 Windows(bat):
call activate yolov8
labelme JPEGImages --labels JPEGImages/classes.txt --output JPEGImages/json_labels --nodata --validatelabel exact --config "{shift_auto_shape_color: -2}"
1.2.3 Linux(sh):
1.3 数据集转换
在进行目标检测任务中,存在labelme json、voc、coco、yolo等格式。labelme json是由anylabeling、labelme等软件生成的标注格式、voc是通用目标检测框(mmdetection、paddledetection)所支持的格式,coco是通用目标检测框(mmdetection、paddledetection)所支持的格式,yolo格式是yolo系列项目中所支持的格式。
1.3.1 json2txt
# -*- coding: utf-8 -*-
import argparse
import json
import os
from pathlib import Path
import yaml
from tqdm import tqdm
def convert(img_h, img_w, box):
""" 归一化, 返回box的中心x,y及宽高w,h """
dw = 1. / img_w
dh = 1. / img_h
x = (box[0] + box[2]) / 2.0
y = (box[1] + box[3]) / 2.0
w = abs(box[2] - box[0])
h = abs(box[3] - box[1])
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return x, y, w, h
def convert_label_json(json_dir, save_dir, classes):
json_paths = os.listdir(json_dir)
# classes = classes.split(',')
for json_path in tqdm(json_paths):
# 指定某个
# if '56' not in str(json_path):
# continue
path = os.path.join(json_dir, json_path)
# print(path)
with open(path, 'r') as load_f:
# print(load_f)
json_dict = json.load(load_f, )
h, w = json_dict['imageHeight'], json_dict['imageWidth']
# save txt path
txt_path = os.path.join(save_dir, json_path.replace('json', 'txt'))
txt_file = open(txt_path, 'w')
for shape_dict in json_dict['shapes']:
label = shape_dict['label']
label_index = classes.index(label)
points = shape_dict['points']
if len(points) != 2:
exit("The shape of json_label is error")
box = [j for i in points for j in i]
bbox = convert(h, w, box)
bbox_nor_str = ' '.join([str(a) for a in bbox])
label_str = str(label_index) + ' ' + bbox_nor_str + '\n'
def save_data_yaml(classes):
data = {
'path': '',
'train': 'images/train',
'val': 'images/val',
'test': 'images/test',
'names': {0: 'surface'}
dataset_path = Path.cwd().joinpath('dataset')
data['path'] = str(dataset_path).replace('\\', '/')
for i in range(len(classes)):
data['names'][i] = classes[i]
with open(dataset_path.joinpath('data.yaml'), 'w') as file:
yaml.dump(data, file, default_flow_style=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='json convert to txt params')
parser.add_argument('--json-dir', type=str, default='JPEGImages/json_labels', help='json path dir')
parser.add_argument('--save-dir', type=str, default='JPEGImages/labels', help='txt save dir')
# parser.add_argument('--classes', type=str, default='surface', help='classes')
args = parser.parse_args()
json_dir = args.json_dir
save_dir = args.save_dir
# classes = args.classes
# 本地读取 classes
with open(r'JPEGImages/classes.txt', 'r') as f:
classes = f.readlines()[2:]
if len(classes) == 0:
exit('No classes')
convert_label_json(json_dir, save_dir, classes)
# -*- coding: utf-8 -*-
import argparse
import json
import os
from pathlib import Path
import yaml
from tqdm import tqdm
def convert_label_json(json_dir, save_dir, classes):
json_paths = os.listdir(json_dir)
# classes = classes.split(',')
for json_path in tqdm(json_paths):
# 指定某个
# if '56' not in str(json_path):
# continue
# for json_path in json_paths:
path = os.path.join(json_dir, json_path)
# print(path)
with open(path, 'r') as load_f:
json_dict = json.load(load_f, )
h, w = json_dict['imageHeight'], json_dict['imageWidth']
# save txt path
txt_path = os.path.join(save_dir, json_path.replace('json', 'txt'))
txt_file = open(txt_path, 'w')
for shape_dict in json_dict['shapes']:
label = shape_dict['label']
label_index = classes.index(label)
points = shape_dict['points']
if len(points) == 2:
exit("The shape of json_label is error")
points_nor_list = []
for point in points:
points_nor_list.append(point[0] / w)
points_nor_list.append(point[1] / h)
points_nor_list = list(map(lambda x: str(x), points_nor_list))
points_nor_str = ' '.join(points_nor_list)
label_str = str(label_index) + ' ' + points_nor_str + '\n'
def save_data_yaml(classes):
data = {
'path': '',
'train': 'images/train',
'val': 'images/val',
'test': 'images/test',
'names': {0: 'surface'}
dataset_path = Path.cwd().joinpath('dataset')
data['path'] = str(dataset_path).replace('\\', '/')
for i in range(len(classes)):
data['names'][i] = classes[i]
with open(dataset_path.joinpath('data.yaml'), 'w') as file:
yaml.dump(data, file, default_flow_style=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='json convert to txt params')
parser.add_argument('--json-dir', type=str, default='JPEGImages/json_labels', help='json path dir')
parser.add_argument('--save-dir', type=str, default='JPEGImages/labels', help='txt save dir')
# parser.add_argument('--classes', type=str, default='surface', help='classes')
args = parser.parse_args()
json_dir = args.json_dir
save_dir = args.save_dir
# classes = args.classes
# 本地读取 classes
with open(r'JPEGImages/classes.txt', 'r') as load_f:
classes = load_f.readlines()[2:]
if len(classes) == 0:
exit('No classes')
convert_label_json(json_dir, save_dir, classes)
0 0.4173280423280423 0.31617647058823534 0.3862044817927171 0.3526493930905696 0.4192732648615002 0.38620448179271705 0.465958605664488 0.38912231559290383 0.4834656084656085 0.35848506069094305 0.4640133831310302 0.330765639589169
1.3.2 验证标签
# -*- coding: utf-8 -*-
import glob
import os
import random
import cv2
def bbox_cxcywh_to_xyxy(img_h, img_w, bbox):
x1 = int(img_w * (bbox[0] - bbox[2] / 2))
y1 = int(img_h * (bbox[1] - bbox[3] / 2))
x2 = int(img_w * (bbox[0] + bbox[2] / 2))
y2 = int(img_h * (bbox[1] + bbox[3] / 2))
return (x1, y1), (x2, y2)
def check_labels(txt_labels, images_dir):
color_map = {}
for i in range(0, 11):
r = random.randint(0, 128)
g = random.randint(0, 128)
b = random.randint(0, 128)
color_map[str(i)] = (r, g, b)
txt_files = glob.glob(txt_labels + "/*.txt")
for txt_file in txt_files:
filename = os.path.splitext(os.path.basename(txt_file))[0]
# 指定某个
# if '56' not in filename:
# continue
pic_path = images_dir + '/' + filename + ".jpg"
img = cv2.imread(pic_path)
height, width, _ = img.shape
file_handle = open(txt_file)
cnt_info = file_handle.readlines()
new_cnt_info = [line_str.replace("\n", "").split(" ") for line_str in cnt_info]
for new_info in new_cnt_info:
if len(new_info) != 5:
exit("The shape of json_label is error")
# print(new_info)
class_id = new_info[0]
bbox = [float(item) for item in new_info[1:]]
point_1, point_2 = bbox_cxcywh_to_xyxy(height, width, bbox)
# print(point_1, point_2)
cv2.rectangle(img, point_1, point_2, color_map.get(class_id), thickness=3)
cv2.namedWindow('img2', 0)
cv2.imshow('img2', img)
if __name__ == "__main__":
check_labels(txt_labels=r"JPEGImages/labels", images_dir=r'JPEGImages/images')
# -*- coding: utf-8 -*-
import glob
import os
import random
import cv2
import numpy as np
def check_labels(txt_labels, images_dir):
color_map = {}
for i in range(0, 11):
r = random.randint(0, 128)
g = random.randint(0, 128)
b = random.randint(0, 128)
color_map[str(i)] = (r, g, b)
txt_files = glob.glob(txt_labels + "/*.txt")
for txt_file in txt_files:
filename = os.path.splitext(os.path.basename(txt_file))[0]
# 指定某个
# if '56' not in filename:
# continue
pic_path = images_dir + '/' + filename + ".jpg"
img = cv2.imread(pic_path)
height, width, _ = img.shape
file_handle = open(txt_file)
cnt_info = file_handle.readlines()
new_cnt_info = [line_str.replace("\n", "").split(" ") for line_str in cnt_info]
for new_info in new_cnt_info:
if len(new_info) == 5:
exit("The shape of json_label is error")
# print(new_info)
s = []
for i in range(1, len(new_info), 2):
b = [float(tmp) for tmp in new_info[i:i + 2]]
s.append([int(b[0] * width), int(b[1] * height)])
cv2.polylines(img, [np.array(s, np.int32)], True, color_map.get(new_info[0]), thickness=3)
cv2.namedWindow('img2', 0)
cv2.imshow('img2', img)
if __name__ == "__main__":
check_labels(txt_labels=r"JPEGImages/labels", images_dir=r'JPEGImages/images')
1.4 数据集分割
数据量考虑: 数据集的大小是选择划分比例的一个关键因素。如果数据集较小,可能希望将更大的比例分配给训练集,以确保模型有足够的数据来学习。
数据的随机性: 确保在划分数据集时要随机混洗数据,以防止数据集中的任何特定模式或顺序影响模型的性能评估。
代表性: 确保训练集和验证集都代表了整个数据集的不同方面,以避免在验证模型性能时出现偏差。
交叉验证: 对于较小的数据集,您可以考虑使用交叉验证,将数据划分为多个折(folds),并在每次训练中使用不同的折作为验证集,从而更全面地评估模型性能。
超参数调整: 验证集通常用于调整模型的超参数,例如学习率、正则化强度等,以获得更好的性能。
不要在验证集上过拟合: 避免在验证集上进行过多的超参数调整或模型选择,以免模型在验证集上产生过拟合。
# -*- coding:utf-8 -*
import random
import shutil
from pathlib import Path
from typing import List
def data_split(full_list, ratio):
n_total = len(full_list)
offset = int(n_total * ratio)
if n_total == 0 or offset < 1:
return [], full_list
sublist_1 = full_list[:offset]
sublist_2 = full_list[offset:]
return sublist_1, sublist_2
images_p = Path("dataset/images")
labels_p = Path("dataset/labels")
test_p = Path("dataset/test")
train_images_p = images_p.joinpath("train")
train_labels_p = labels_p.joinpath("train")
val_images_p = images_p.joinpath("val")
val_labels_p = labels_p.joinpath("val")
test_images_p = images_p.joinpath("test")
test_labels_p = labels_p.joinpath("test")
# 数据集路径
images_dir = Path("JPEGImages/images")
labels_dir = Path("JPEGImages/labels")
if not images_dir.exists() or not labels_dir.exists():
# 创建训练集
train_images_p.mkdir(parents=True, exist_ok=True)
train_labels_p.mkdir(parents=True, exist_ok=True)
val_images_p.mkdir(parents=True, exist_ok=True)
val_labels_p.mkdir(parents=True, exist_ok=True)
test_images_p.mkdir(parents=True, exist_ok=True)
test_labels_p.mkdir(parents=True, exist_ok=True)
# 划分数据集,设置数据集数量占比
# 训练集占比
proportion_ = 0.9
labels_files: List[Path] = list(labels_dir.iterdir())
images_files: List[Path] = list(images_dir.iterdir())
labels_files[:] = [label_file for label_file in labels_files if label_file.is_file()]
images_files[:] = [images_file for images_file in images_files if images_file.is_file()]
num = len(images_files)
train_list, val_list = data_split([_ for _ in range(0, num)], proportion_)
for i in range(0, len(images_files)):
image = images_files[i]
print(i, ' - ', images_files[i])
file_stem = image.stem
# 训练
if i in train_list:
label = labels_dir.joinpath(f"{file_stem}.txt")
if label.exists():
shutil.copy(image, train_images_p.joinpath(image.name))
shutil.copy(label, train_labels_p.joinpath(f"{file_stem}.txt"))
print(f"No label file found {str(label)}")
elif i in val_list:
label = labels_dir.joinpath(f"{file_stem}.txt")
if label.exists():
shutil.copy(image, val_images_p.joinpath(image.name))
shutil.copy(label, val_labels_p.joinpath(f"{file_stem}.txt"))
print(f"No label file found {str(label)}")
print("数据集划分完成: 总数量:", num, " 训练集数量:", len(train_list), " 验证集数量:", len(val_list))
2 训练
2.1 模型训练
2.1.1 CLI
开始在 dataset 上对YOLOv8n 进行 100 个历元训练,图像大小为 640。
yolo detect train data=dataset/data.yaml model=yolov8n.pt epochs=100 imgsz=640
2.1.2 Python
2.2 参数
3 模型预测
可修改的参数很多,建议查看 官方文档
yolo detect predict model=best.pt source=dataset/test save=True
