KITTI dataset
Download dataset
- KITTI 3D Object Detection Evaluation 2017 link
- 下载四个部分,共
41.4GB
- 解压后为四部分内容(
相机校准矩阵calib
、RGB图像image_2
、标签label_2
、点云数据velodyne
) 对应的testing
和training
数据。其中,training数据为7481
张(图片和点云对应的场景),testing数据7518
张(无label_2数据)。
Data Preprocess
Part 1 3D Point Cloud Data Croping
- clone这个利用TensorFlow实现的voxelNet,里面有数据预处理的相关内容。voxelnet
- 这个tensorflow版本的voxelnet,首先将training集里的所有点云数据进行
裁剪操作
,具体的就是利用calib中存放的相机校准矩阵,将3d点云的点投影到2dRGB图像中(利用Tr_velo_to_cam将3d点云坐标映射到0号3d相机坐标系中,然后利用R_rect将多个相机图像位于同一平面内,最后利用对应相机的投影矩阵P将点投影到相机的平面上),在2dRGB图像坐标系以外的点云将被移除掉。
这里有两个点需要注意一下
- 图片读取的问题:1.6.3版本的scipy无法import imread,两种解决方法1)scipy降级 2)换库引入 from imageio import imread
- 输出文件名,和原来保持一致,6位数字高位补0
output_name = PC_NEW_ROOT + str(frame).zfill(6) + '.bin' print(output_name) # str(frame) int -> str # zfill(6) 六位数字 高位补0
- 想要复现voxelnet的小伙伴,这个项目中原本用c生成的部分需要删掉,然后再在自己电脑里重新生成。还有就是pip install(速度慢只能换源解决)的时候强力推荐中科大源,
https://pypi.mirrors.ustc.edu.cn/simple/
- Crop.py代码详解,建议断点debug过一遍流程,过程中的矩阵中间量就很清晰了~
import numpy as np
from imageio import imread
# 换了imageio库来进行图片读取
CAM = 2 # 最终投影到2号相机上(0,1,2,3)
def load_velodyne_points(filename):
# 读某一个.bin文件里的数据,000000.bin中,就有大概19000个点,即19000行
# 4列表示x,y,z,intensity。intensity是回波强度,和物体与雷达的距离以及物体本身的反射率有关。
points = np.fromfile(filename, dtype=np.float32).reshape(-1, 4)
return points
def load_calib(calib_dir):
# P2 * R0_rect * Tr_velo_to_cam * y
# 最后就是利用上面这个转换式来转换
# 按行读取并保存
lines = open(calib_dir).readlines()
lines = [ line.split()[1:] for line in lines ][:-1]
# 投影矩阵 3x4矩阵
# 这里是最终要投影到2号相机上,即左边彩色相机
P = np.array(lines[CAM]).reshape(3,4)
# Tr_velo_to_cam,4x4矩阵
# 点云坐标系转换到0号相机坐标系,都是3维的哦~
Tr_velo_to_cam = np.array(lines[5]).reshape(3,4)
Tr_velo_to_cam = np.concatenate( [ Tr_velo_to_cam, np.array([0,0,0,1]).reshape(1,4) ] , 0 )
# R_cam_to_rect,4x4矩阵
# 校准,将多个相机图像位于同一个平面上
R_cam_to_rect = np.eye(4)
R_cam_to_rect[:3,:3] = np.array(lines[4][:9]).reshape(3,3)
# 类型转换
P = P.astype('float32')
Tr_velo_to_cam = Tr_velo_to_cam.astype('float32')
R_cam_to_rect = R_cam_to_rect.astype('float32')
return P, Tr_velo_to_cam, R_cam_to_rect
def prepare_velo_points(pts3d_raw):
# 将反射率值,其实就是intensity,设为1
# 并将点云数据矩阵转置,比如19000x4 变成 4x19000
# 后期可以直接和投影矩阵相乘~
pts3d = pts3d_raw
# Reflectance > 0
# 第4个元素组成的矩阵,判断intensity是否大于0 1x19000的数组,里面是true/false
indices = pts3d[:, 3] > 0
pts3d = pts3d[indices ,:]
pts3d[:,3] = 1 # intensity大于0的都设为1
return pts3d.transpose(), indices
def project_velo_points_in_img(pts3d, T_cam_velo, Rrect, Prect):
'''将3d点云数据投影到2d图片坐标系,
点云数据为4xN矩阵
只保留在相机前方的点云数据的投影点'''
# 3D points in camera reference frame.
pts3d_cam = Rrect.dot(T_cam_velo.dot(pts3d))
# Before projecting, keep only points with z>0
# (points that are in fronto of the camera).
idx = (pts3d_cam[2,:]>=0)
pts2d_cam = Prect.dot(pts3d_cam[:,idx])
return pts3d[:, idx], pts2d_cam/pts2d_cam[2,:], idx
def align_img_and_pc(img_dir, pc_dir, calib_dir):
img = imread(img_dir)
pts = load_velodyne_points( pc_dir )
P, Tr_velo_to_cam, R_cam_to_rect = load_calib(calib_dir)
pts3d, indices = prepare_velo_points(pts)
pts3d_ori = pts3d.copy()
reflectances = pts[indices, 3]
pts3d, pts2d_normed, idx = project_velo_points_in_img( pts3d, Tr_velo_to_cam, R_cam_to_rect, P )
#print reflectances.shape, idx.shape
reflectances = reflectances[idx]
#print reflectances.shape, pts3d.shape, pts2d_normed.shape
assert reflectances.shape[0] == pts3d.shape[1] == pts2d_normed.shape[1]
rows, cols = img.shape[:2]
points = []
for i in range(pts2d_normed.shape[1]):
c = int(np.round(pts2d_normed[0,i]))
r = int(np.round(pts2d_normed[1,i]))
# 保留在图片坐标系内的点
if c < cols and r < rows and r > 0 and c > 0:
color = img[r, c, :]
point = [ pts3d[0,i], pts3d[1,i], pts3d[2,i], reflectances[i], color[0], color[1], color[2], pts2d_normed[0,i], pts2d_normed[1,i] ]
points.append(point)
points = np.array(points)
return points
# update the following directories
IMG_ROOT = '/home/vanessa/Desktop/voxelnet/data/KITTI/image/training/image_2/'
PC_ROOT = '/home/vanessa/Desktop/voxelnet/data/KITTI/point_cloud/training/velodyne/'
PC_NEW_ROOT = '/home/vanessa/Desktop/voxelnet/data/KITTI/point_cloud/training/velodyne_new/'
CALIB_ROOT = '/home/vanessa/Desktop/voxelnet/data/KITTI/calib/training/calib/'
for frame in range(0, 7481):
img_dir = IMG_ROOT + '%06d.png' % frame
pc_dir = PC_ROOT + '%06d.bin' % frame
calib_dir = CALIB_ROOT + '%06d.txt' % frame
points = align_img_and_pc(img_dir, pc_dir, calib_dir)
output_name = PC_NEW_ROOT + str(frame).zfill(6) + '.bin'
print(output_name)
points[:,:4].astype('float32').tofile(output_name)
- 修改数据集路径,然后运行crop.py,就开始裁剪了,过程有点耗时,需要耐心等待一下。
Part 2 Split dataset
- voxelnet,dataset划分Split protocol
complex-yolov4
的数据集划分:train 6000 val 1481 test 7518voxelnet
的数据集划分:train 3712 val 3769 test 7518
我们按照voxelnet
的数据集计划分进行后续操作。
- clone后配置一下基础环境Complex-YOLOv4-Pytorch Github Link,Complex_YOLOv4这里面还有个classes_names.txt,只检测三类目标,汽车、行人和骑自行车的
- split.py,数据路径修改为你自己的路径,training的点云数据要用上一步裁剪好的新数据哦!
import os
from shutil import copyfile
import pdb
def rearrange(original_folders, target_folders, split_file):
for _, v in original_folders.items():
if not os.path.isdir(v):
raise Exception('No such folder: %s' % v)
for _, v in target_folders.items():
if not os.path.isdir(v):
os.makedirs(v)
if not os.path.exists(split_file):
raise Exception('No such split file: %s' % split_file)
else:
with open(split_file) as f:
content = f.readlines()
content = [x.strip() for x in content]
for file in content:
src_img = os.path.join(original_folders['IMG_ROOT'], file + '.png')
src_pc = os.path.join(original_folders['PC_ROOT'], file + '.bin')
src_label = os.path.join(original_folders['LABEL_ROOT'], file + '.txt')
src_calib = os.path.join(original_folders['CALIB_ROOT'], file + '.txt')
if (not os.path.exists(src_img)) or (not os.path.exists(src_pc)) or (not os.path.exists(src_label) or (not os.path.exists(src_calib))):
print('No such file: %s' % file)
else:
dst_img = os.path.join(target_folders['IMG_ROOT'], file + '.png')
dst_pc = os.path.join(target_folders['PC_ROOT'], file + '.bin')
dst_label = os.path.join(target_folders['LABEL_ROOT'], file + '.txt')
dst_calib = os.path.join(target_folders['CALIB_ROOT'], file + '.txt')
copyfile(src_img, dst_img)
copyfile(src_pc, dst_pc)
copyfile(src_label, dst_label)
copyfile(src_calib, dst_calib)
if __name__ == '__main__':
# 加入calib的split
# Original folder
original_folders = dict()
original_folders['IMG_ROOT'] = '/home/vanessa/Desktop/voxelnet/data/KITTI/image/training/image_2/'
original_folders['PC_ROOT'] = '/home/vanessa/Desktop/voxelnet/data/KITTI/point_cloud/training/velodyne_new/'
original_folders['LABEL_ROOT'] = '/home/vanessa/Desktop/voxelnet/data/KITTI/label/training/label_2/'
original_folders['CALIB_ROOT'] = '/home/vanessa/Desktop/voxelnet/data/KITTI/calib/training/calib/'
# Modified folder
train_folders = dict()
train_folders['IMG_ROOT'] = './data/MD_KITTI/training/image_2/'
train_folders['PC_ROOT'] = './data/MD_KITTI/training/velodyne/'
train_folders['LABEL_ROOT'] = './data/MD_KITTI/training/label_2/'
train_folders['CALIB_ROOT'] = './data/MD_KITTI/training/calib/'
val_folders = dict()
val_folders['IMG_ROOT'] = './data/MD_KITTI/validation/image_2/'
val_folders['PC_ROOT'] = './data/MD_KITTI/validation/velodyne/'
val_folders['LABEL_ROOT'] = './data/MD_KITTI/validation/label_2/'
val_folders['CALIB_ROOT'] = './data/MD_KITTI/validation/calib/'
# Split file
train_split_file = '/home/vanessa/Desktop/voxelnet/data/KITTI/ImageSets/train.txt'
val_split_file = '/home/vanessa/Desktop/voxelnet/data/KITTI/ImageSets/val.txt'
rearrange(original_folders, train_folders, train_split_file)
rearrange(original_folders, val_folders, val_split_file)
Dataset folder structure
- 按照如下结构构建数据集
└── dataset/
└── kitti/
├──ImageSets/
│ ├── train.txt
│ └── val.txt
│ └── trainval.txt
│ └── test.txt
├── training/
│ ├── image_2/ <-- for visualization
│ ├── calib/
│ ├── label_2/
│ └── velodyne/
├── validation/
│ ├── image_2/ <-- for visualization
│ ├── calib/
│ ├── label_2/
│ └── velodyne/
└── testing/
│ ├── image_2/ <-- for visualization
│ ├── calib/
│ └── velodyne/
└── classes_names.txt
至此,我们的KITTI数据集预处理和制作就完成啦~~~