534
社区成员




今天是学习OpenMMLab第七次课,讲解MMDetection代码课程
主要包括以下流程:
1.数据集准备和可视化
2.自定义配置文件
3.训练前可视化验证
4.模型训练
5.模型测试和推理
6.可视化分析
环境检测代码
from mmengine.utils import get_git_hash
from mmengine.utils.dl_utils import collect_env as collect_base_env
import mmdet
# 环境信息手机和打印
def collect_env():
env_info = collect_base_env()
env_info['MMDetection'] = f'{mmdet.__version__}+{get_git_hash()[:7]}'
return env_info
if __name__ == '__main__':
for name,val in collect_env().items():
print(f'{name}:{val}')
sys.platform:linux
Python:3.10.11 (main, Apr 20 2023, 19:02:41) [GCC 11.2.0]
CUDA available:True
numpy_random_seed:2147483648
GPU 0:NVIDIA GeForce RTX 3060 Laptop GPU
CUDA_HOME:/usr/local/cuda
NVCC:Cuda compilation tools, release 11.1, V11.1.74
GCC:gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
PyTorch:2.0.1+cu117
PyTorch compiling details:PyTorch built with:
- GCC 9.3
- C++ Version: 201703
- Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
- Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- LAPACK is enabled (usually provided by MKL)
- NNPACK is enabled
- CPU capability usage: AVX2
- CUDA Runtime 11.7
- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86
- CuDNN 8.5
- Magma 2.6.1
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.7, CUDNN_VERSION=8.5.0, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.0.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
TorchVision:0.15.2+cu117
OpenCV:4.7.0
MMEngine:0.7.4
MMDetection:3.0.0+97a36bb
下载作业中要求的气球数据集
cd mmdetection/data
wget https://download.openmmlab.com/mmyolo/data/balloon_dataset.zip
unzip balloon_dataset.zip
import matplotlib.pyplot as plt
import os
import glob
from PIL import Image
#matplotlib inline
#config InlineBackend.figure_format = 'retina'
original_images = []
images = []
texts = []
plt.figure(figsize=(16,5))
#image_paths = [filename for filename in os.listdir('./data/balloon/train')][:8]
image_paths = glob.glob(os.path.join('./data/balloon/train', '*.jpg'))[:8]
for i,filename in enumerate(image_paths):
name = filename.split('/')[-1]
image = Image.open(filename).convert('RGB')
plt.subplot(2,4,i+1)
plt.imshow(image)
plt.title(f"{name}")
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
查看气球数据json文件,需要将其转换为COCO格式,转换过程需要将mmcv改为full模式
import os.path as osp
import mmcv
def convert_balloon_to_coco(ann_file, out_file, image_prefix):
data_infos = mmcv.load(ann_file)
annotations = []
images = []
obj_count = 0
for idx, v in enumerate(mmcv.track_iter_progress(data_infos.values())):
filename = v['filename']
img_path = osp.join(image_prefix, filename)
height, width = mmcv.imread(img_path).shape[:2]
images.append(dict(
id=idx,
file_name=filename,
height=height,
width=width))
bboxes = []
labels = []
masks = []
for _, obj in v['regions'].items():
assert not obj['region_attributes']
obj = obj['shape_attributes']
px = obj['all_points_x']
py = obj['all_points_y']
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
x_min, y_min, x_max, y_max = (
min(px), min(py), max(px), max(py))
data_anno = dict(
image_id=idx,
id=obj_count,
category_id=0,
bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
area=(x_max - x_min) * (y_max - y_min),
segmentation=[poly],
iscrowd=0)
annotations.append(data_anno)
obj_count += 1
coco_format_json = dict(
images=images,
annotations=annotations,
categories=[{'id':0, 'name': 'balloon'}])
mmcv.dump(coco_format_json, out_file)
convert_balloon_to_coco('./data/balloon/train/via_region_data.json', './data/balloon/train/coco.json', './data/balloon/train')
convert_balloon_to_coco('./data/balloon/val/via_region_data.json', './data/balloon/val/coco.json', './data/balloon/val')
转换完成后,可视化数据集测试
from pycocotools.coco import COCO
import numpy as np
import os.path as osp
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
import matplotlib.pyplot as plt
from PIL import Image
def apply_exif_orientation(image):
_EXIF_ORIENT = 274
if not hasattr(image, 'getexif'):
return image
try:
exif = image.getexif()
except Exception:
exif = None
if exif is None:
return image
orientation = exif.get(_EXIF_ORIENT)
method = {
2: Image.FLIP_LEFT_RIGHT,
3: Image.ROTATE_180,
4: Image.FLIP_TOP_BOTTOM,
5: Image.TRANSPOSE,
6: Image.ROTATE_270,
7: Image.TRANSVERSE,
8: Image.ROTATE_90,
}.get(orientation)
if method is not None:
return image.transpose(method)
return image
def show_bbox_only(coco, anns, show_label_bbox=True, is_filling=True):
"""Show bounding box of annotations Only."""
if len(anns) == 0:
return
ax = plt.gca()
ax.set_autoscale_on(False)
image2color = dict()
for cat in coco.getCatIds():
image2color[cat] = (np.random.random((1, 3)) * 0.7 + 0.3).tolist()[0]
polygons = []
colors = []
for ann in anns:
color = image2color[ann['category_id']]
bbox_x, bbox_y, bbox_w, bbox_h = ann['bbox']
poly = [[bbox_x, bbox_y], [bbox_x, bbox_y + bbox_h],
[bbox_x + bbox_w, bbox_y + bbox_h], [bbox_x + bbox_w, bbox_y]]
polygons.append(Polygon(np.array(poly).reshape((4, 2))))
colors.append(color)
if show_label_bbox:
label_bbox = dict(facecolor=color)
else:
label_bbox = None
ax.text(
bbox_x,
bbox_y,
'%s' % (coco.loadCats(ann['category_id'])[0]['name']),
color='white',
bbox=label_bbox)
if is_filling:
p = PatchCollection(
polygons, facecolor=colors, linewidths=0, alpha=0.4)
ax.add_collection(p)
p = PatchCollection(
polygons, facecolor='none', edgecolors=colors, linewidths=2)
ax.add_collection(p)
coco = COCO('./data/balloon/train/coco.json')
image_ids = coco.getImgIds()
print(image_ids)
np.random.shuffle(image_ids)
plt.figure(figsize=(16, 5))
# 只可视化 8 张图片
for i in range(8):
image_data = coco.loadImgs(image_ids[i])[0]
image_path = osp.join('./data/balloon/train/',image_data['file_name'])
title = image_path.split('/')[-1]
annotation_ids = coco.getAnnIds(
imgIds=image_data['id'], catIds=[], iscrowd=0)
annotations = coco.loadAnns(annotation_ids)
ax = plt.subplot(2, 4, i+1)
image = Image.open(image_path).convert("RGB")
# 这行代码很关键,否则可能图片和标签对不上
image=apply_exif_orientation(image)
ax.imshow(image)
show_bbox_only(coco, annotations)
plt.title(f"{title}")
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
生成配置文件代码
# 当前路径位于 mmdetection/data, 配置将写到 mmdetection/data 路径下
config_balloon = """
_base_ = '../configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py'
data_root = './data/balloon/'
# 非常重要
metainfo = {
# 类别名,注意 classes 需要是一个 tuple,因此即使是单类,
# 你应该写成 `balloon,` 很多初学者经常会在这犯错220, 20, 60
'classes': ('balloon',),
'palette': [
(0, 255, 255),
]
}
num_classes = 1
# 训练 40 epoch
max_epochs = 40
# 训练单卡 bs= 16
train_batch_size_per_gpu = 16
# 可以根据自己的电脑修改
train_num_workers = 4
# 验证集 batch size 为 1
val_batch_size_per_gpu = 1
val_num_workers = 2
# RTMDet 训练过程分成 2 个 stage,第二个 stage 会切换数据增强 pipeline
num_epochs_stage2 = 5
# batch 改变了,学习率也要跟着改变, 0.004 是 8卡x32 的学习率
base_lr = 16 * 0.004 / (32*8)
# 采用 COCO 预训练权重
load_from = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa
model = dict(
# 考虑到数据集太小,且训练时间很短,我们把 backbone 完全固定
# 用户自己的数据集可能需要解冻 backbone
backbone=dict(frozen_stages=4),
# 不要忘记修改 num_classes
bbox_head=dict(dict(num_classes=num_classes)))
# 数据集不同,dataset 输入参数也不一样
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
pin_memory=False,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='train/coco.json',
data_prefix=dict(img='train/')))
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='val/coco.json',
data_prefix=dict(img='val/')))
test_dataloader = val_dataloader
# 默认的学习率调度器是 warmup 1000,但是 balloon 数据集太小了,需要修改 为 30 iter
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=30),
dict(
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2, # max_epoch 也改变了
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
optim_wrapper = dict(optimizer=dict(lr=base_lr))
# 第二 stage 切换 pipeline 的 epoch 时刻也改变了
_base_.custom_hooks[1].switch_epoch = max_epochs - num_epochs_stage2
val_evaluator = dict(ann_file=data_root + 'val/coco.json')
test_evaluator = val_evaluator
# 一些打印设置修改
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'), # 同时保存最好性能权重
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
"""
with open('./data/rtmdet_tiny_1xb16-40e_balloon.py', 'w') as f:
f.write(config_balloon)
#训练前可视化
import matplotlib.pyplot as plt
import os
import os.path as osp
from mmdet.registry import DATASETS, VISUALIZERS
from mmengine.config import Config
from mmengine.registry import init_default_scope
cfg = Config.fromfile('./data/rtmdet_tiny_1xb16-40e_balloon.py')
init_default_scope(cfg.get('default_scope', 'mmdet'))
dataset = DATASETS.build(cfg.train_dataloader.dataset)
visualizer = VISUALIZERS.build(cfg.visualizer)
visualizer.dataset_meta = dataset.metainfo
plt.figure(figsize=(16, 5))
# 只可视化前 8 张图片
for i in range(8):
item=dataset[i]
img = item['inputs'].permute(1, 2, 0).numpy()
data_sample = item['data_samples'].numpy()
gt_instances = data_sample.gt_instances
img_path = osp.basename(item['data_samples'].img_path)
gt_bboxes = gt_instances.get('bboxes', None)
gt_instances.bboxes = gt_bboxes.tensor
data_sample.gt_instances = gt_instances
visualizer.add_datasample(
osp.basename(img_path),
img,
data_sample,
draw_pred=False,
show=False)
drawed_image=visualizer.get_image()
plt.subplot(2, 4, i+1)
plt.imshow(drawed_image[..., [2, 1, 0]])
plt.title(f"{osp.basename(img_path)}")
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
python tools/train.py data/rtmdet_tiny_1xb16-40e_balloon.py
DONE (t=0.01s).
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.710
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.812
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.800
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.434
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.825
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.228
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.772
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.806
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.708
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.883
06/10 15:24:08 - mmengine - INFO - bbox_mAP_copypaste: 0.710 0.812 0.800 0.000 0.434 0.825
python tools/test.py ./data/rtmdet_tiny_1xb16-40e_balloon.py work_dirs/rtmdet_tiny_1xb16-40e_balloon/best_coco_bbox_mAP_epoch_40.pth --show-dir results
git clone -b tutorials https://github.com/open-mmlab/mmyolo.git
python demo/featmap_vis_demo.py \
resized_image.jpg \
../mmdetection/data/rtmdet_tiny_1xb16-40e_balloon.py \
../mmdetection/work_dirs/rtmdet_tiny_1xb16-40e_balloon/best_coco/bbox_mAP_epoch_40.pth \
--target-layers backbone \
--channel-reduction squeeze_mean
Image.open('output/resized_image.jpg')
python demo/boxam_vis_demo.py \
resized_image.jpg \
../mmdetection/data/rtmdet_tiny_1xb16-40e_balloon.py \
../mmdetection/work_dirs/rtmdet_tiny_1xb16-40e_balloon/best_coco_bbox_mAP_epoch_40.pth \
--target-layer neck.out_convs[1]
# 当前路径位于 mmdetection/data, 配置将写到 mmdetection/data 路径下
config_drink = """
_base_ = '../configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py'
data_root = './data/Drink_284_Detection_coco/'
# 非常重要
metainfo = {
# 类别名,注意 classes 需要是一个 tuple,因此即使是单类,
# 你应该写成 `cola,` 很多初学者经常会在这犯错220, 20, 60
'classes': ('cola','pepsi','sprite','fanta','spring','ice','scream','milk','red','king',),
'palette': [
(255, 255, 255),(0, 255, 255),(255, 0, 255),(255, 255, 0),(0, 0, 255),
(0, 255, 0),(255, 0, 0),(0, 0, 0),(128, 255, 255),(255, 128, 255),
]
}
num_classes = 10
# 训练 40 epoch
max_epochs = 40
# 训练单卡 bs= 16
train_batch_size_per_gpu = 16
# 可以根据自己的电脑修改
train_num_workers = 4
# 验证集 batch size 为 1
val_batch_size_per_gpu = 1
val_num_workers = 2
# RTMDet 训练过程分成 2 个 stage,第二个 stage 会切换数据增强 pipeline
num_epochs_stage2 = 5
# batch 改变了,学习率也要跟着改变, 0.004 是 8卡x32 的学习率
base_lr = 16 * 0.004 / (32*8)
# 采用 COCO 预训练权重
load_from = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa
model = dict(
# 考虑到数据集太小,且训练时间很短,我们把 backbone 完全固定
# 用户自己的数据集可能需要解冻 backbone
backbone=dict(frozen_stages=4),
# 不要忘记修改 num_classes
bbox_head=dict(dict(num_classes=num_classes)))
# 数据集不同,dataset 输入参数也不一样
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
pin_memory=False,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='train_coco.json',
data_prefix=dict(img='images/')))
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
dataset=dict(
metainfo=metainfo,
data_root=data_root,
ann_file='val_coco.json',
data_prefix=dict(img='images/')))
test_dataloader = val_dataloader
# 默认的学习率调度器是 warmup 1000,但是 balloon 数据集太小了,需要修改 为 30 iter
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=30),
dict(
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2, # max_epoch 也改变了
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
optim_wrapper = dict(optimizer=dict(lr=base_lr))
# 第二 stage 切换 pipeline 的 epoch 时刻也改变了
_base_.custom_hooks[1].switch_epoch = max_epochs - num_epochs_stage2
val_evaluator = dict(ann_file=data_root + 'val_coco.json')
test_evaluator = val_evaluator
# 一些打印设置修改
default_hooks = dict(
checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'), # 同时保存最好性能权重
logger=dict(type='LoggerHook', interval=5))
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
"""
with open('./data/rtmdet_tiny_1xb16-40e_drink.py', 'w') as f:
f.write(config_drink)
DONE (t=0.06s).
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.931
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.993
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.993
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.931
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.921
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.949
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.949
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.949
06/10 18:41:24 - mmengine - INFO - bbox_mAP_copypaste: 0.931 0.993 0.993 -1.000 -1.000 0.931