第七章 机器视觉处理 课后作业
- 代码包
- 课后作业
- 编译代码包
- 第一题
- (1)face_detector.py
- (2)test1.cpp
- (3)运行结果
- 人脸向前移动,机器人前进
- 人脸向后移动,机器人后退
- 人脸向左移动,机器人左转
- 人脸向右移动,机器人右转
- 第二题
- (1)安装tensorflow人工智能开源神器
- (2)detect_ros.py
- (3)test2.cpp
- (4)运行结果
- 杯子向后移动,机器人后退
- 杯子向前移动,机器人前进
- 杯子向左移动,机器人左转
- 杯子向右移动,机器人右转
代码包
所有代码已上传至github网站:
github链接: https://github.com/YuemingBi/ros_practice/tree/master
课后作业
编译代码包
本章作业需要上一章的代码包,所以ch6和ch7一起编译。直接编译可能会出现以下错误:
解决方法是将ch7中的CMakeLists中的生成可执行文件注释掉,编译一遍,然后把注释去掉再编译一遍。
第一题
第一题利用opencv进行人脸识别。
(1)face_detector.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import rospy
import cv2
import numpy as np
from sensor_msgs.msg import Image, RegionOfInterest
from cv_bridge import CvBridge, CvBridgeError
from test1.msg import Size
class faceDetector:
def __init__(self):
rospy.on_shutdown(self.cleanup);
# 创建cv_bridge
self.bridge = CvBridge()
self.size = Size()
self.image_pub = rospy.Publisher("cv_bridge_image", Image, queue_size=1)
self.size_pub = rospy.Publisher("Size", Size, queue_size=1)
# 获取haar特征的级联表的XML文件,文件路径在launch文件中传入
cascade_1 = rospy.get_param("~cascade_1", "")
cascade_2 = rospy.get_param("~cascade_2", "")
# 使用级联表初始化haar特征检测器
self.cascade_1 = cv2.CascadeClassifier(cascade_1)
self.cascade_2 = cv2.CascadeClassifier(cascade_2)
# 设置级联表的参数,优化人脸识别,可以在launch文件中重新配置
self.haar_scaleFactor = rospy.get_param("~haar_scaleFactor", 1.2)
self.haar_minNeighbors = rospy.get_param("~haar_minNeighbors", 2)
self.haar_minSize = rospy.get_param("~haar_minSize", 40)
self.haar_maxSize = rospy.get_param("~haar_maxSize", 60)
self.color = (50, 255, 50)
# 初始化订阅rgb格式图像数据的订阅者,此处图像topic的话题名可以在launch文件中重映射
self.image_sub = rospy.Subscriber("input_rgb_image", Image, self.image_callback, queue_size=1)
def image_callback(self, data):
# 使用cv_bridge将ROS的图像数据转换成OpenCV的图像格式
try:
cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
frame = np.array(cv_image, dtype=np.uint8)
except CvBridgeError, e:
print e
# 创建灰度图像
grey_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 创建平衡直方图,减少光线影响
grey_image = cv2.equalizeHist(grey_image)
# 尝试检测人脸
faces_result = self.detect_face(grey_image)
# 在opencv的窗口中框出所有人脸区域
if len(faces_result)>0:
for face in faces_result:
x, y, w, h = face
self.size.width = abs(int(w))
self.size.high = abs(int(h))
self.size.x = abs(int(x))
self.size_pub.publish(self.size)
cv2.rectangle(cv_image, (x, y), (x+w, y+h), self.color, 2)
# 将识别后的图像转换成ROS消息并发布
self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8"))
def detect_face(self, input_image):
# 首先匹配正面人脸的模型
if self.cascade_1:
faces = self.cascade_1.detectMultiScale(input_image,
self.haar_scaleFactor,
self.haar_minNeighbors,
cv2.CASCADE_SCALE_IMAGE,
(self.haar_minSize, self.haar_maxSize))
# 如果正面人脸匹配失败,那么就尝试匹配侧面人脸的模型
if len(faces) == 0 and self.cascade_2:
faces = self.cascade_2.detectMultiScale(input_image,
self.haar_scaleFactor,
self.haar_minNeighbors,
cv2.CASCADE_SCALE_IMAGE,
(self.haar_minSize, self.haar_maxSize))
return faces
def cleanup(self):
print "Shutting down vision node."
cv2.destroyAllWindows()
if __name__ == '__main__':
try:
# 初始化ros节点
rospy.init_node("face_detector")
faceDetector()
rospy.loginfo("Face detector is started..")
rospy.loginfo("Please subscribe the ROS image.")
rospy.spin()
except KeyboardInterrupt:
print "Shutting down face detector node."
cv2.destroyAllWindows()
代码中主要是新增了一个发布的话题,发布人脸的中心位置、宽度和长度。
(2)test1.cpp
#include "ros/ros.h"
#include "test1/Size.h"
#include "geometry_msgs/Twist.h"
#include "std_msgs/Int32.h"
int flag = 0;
std_msgs::Int32 size_width, size_high, size_x;
class SubscribeAndPublish
{
public:
SubscribeAndPublish()
{
pub = n.advertise<geometry_msgs::Twist>("cmd_vel", 1);
sub = n.subscribe("Size", 1, &SubscribeAndPublish::callback, this);
}
void callback(const test1::Size::ConstPtr& size)
{
if(flag==0){
size_width.data = size->width;
size_high.data = size->high;
size_x.data = size->x;
flag = 1;
}
else{
if((size->width>size_width.data+50)&&(size->high>size_high.data+50)){
ROS_INFO("go forward");
//ROS_INFO("size->width:%d size_width:%d", size->width, size_width.data);
size_width.data = size->width;
size_high.data = size->high;
size_x.data = size->x;
geometry_msgs::Twist msg;
msg.linear.x = 0.3;
msg.angular.z = 0;
pub.publish(msg);
}
else if((size->width<size_width.data-50)&&(size->high<size_high.data-50)){
ROS_INFO("go back");
size_width.data = size->width;
size_high.data = size->high;
size_x.data = size->x;
geometry_msgs::Twist msg;
msg.linear.x = -0.3;
msg.angular.z = 0;
pub.publish(msg);
}
else if( (size->x + size->width/2) < (size_x.data + size_width.data/2 - 50) ){
ROS_INFO("turn right");
size_width.data = size->width;
size_high.data = size->high;
size_x.data = size->x;
geometry_msgs::Twist msg;
msg.linear.x = 0;
msg.angular.z = -0.3;
pub.publish(msg);
}
else if( (size->x + size->width/2) > (size_x.data + size_width.data/2 + 50) ){
ROS_INFO("turn left");
size_width.data = size->width;
size_high.data = size->high;
size_x.data = size->x;
geometry_msgs::Twist msg;
msg.linear.x = 0;
msg.angular.z = 0.3;
pub.publish(msg);
}
}
}
private:
ros::NodeHandle n;
ros::Publisher pub;
ros::Subscriber sub;
};
int main(int argc, char **argv)
{
ros::init(argc, argv, "test1");
SubscribeAndPublish SAPObject;
ros::spin();
return 0;
}
编程思路:此节点接收人脸识别之后的信息(即人脸的中心位置、宽度和长度),利用人脸的宽度和长度的变化判断人脸的前后移动,利用人脸的中心位置的变化判断人脸的左右移动。此节点涉及到在回调函数中发布消息,具体写法可以参考以前写的文章: 在回调函数中发布消息
(3)运行结果
$ roslaunch robot_vision usb_cam.launch
$ roslaunch robot_vision face_detector.launch
$ rqt_image_view
$ roslaunch robot_gazebo view_robot_kinetic_rplidar.launch
$ rosrun test1 test1
人脸向前移动,机器人前进
人脸向后移动,机器人后退
人脸向左移动,机器人左转
人脸向右移动,机器人右转
第二题
第二题使用tensorflow进行物体识别。
(1)安装tensorflow人工智能开源神器
最好不要安装anaconda,很麻烦,在linux下可以直接安装tensorflow。安装方法不再描述。
(2)detect_ros.py
#!/usr/bin/env python
## Author: Rohit
## Date: July, 25, 2017
# Purpose: Ros node to detect objects using tensorflow
import os
import sys
import cv2
import numpy as np
try:
import tensorflow as tf
except ImportError:
print("unable to import TensorFlow. Is it installed?")
print(" sudo apt install python-pip")
print(" sudo pip install tensorflow")
sys.exit(1)
# ROS related imports
import rospy
from std_msgs.msg import String , Header
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
from vision_msgs.msg import Detection2D, Detection2DArray, ObjectHypothesisWithPose
# Object detection module imports
import object_detection
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from test2.msg import pose
# SET FRACTION OF GPU YOU WANT TO USE HERE
GPU_FRACTION = 0.4
######### Set model here ############
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
# By default models are stored in data/models/
MODEL_PATH = os.path.join(os.path.dirname(sys.path[0]),'data','models' , MODEL_NAME)
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_PATH + '/frozen_inference_graph.pb'
######### Set the label map file here ###########
LABEL_NAME = 'mscoco_label_map.pbtxt'
# By default label maps are stored in data/labels/
PATH_TO_LABELS = os.path.join(os.path.dirname(sys.path[0]),'data','labels', LABEL_NAME)
######### Set the number of classes here #########
NUM_CLASSES = 90
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
## Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`,
# we know that this corresponds to `airplane`. Here we use internal utility functions,
# but anything that returns a dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Setting the GPU options to use fraction of gpu that has been set
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = GPU_FRACTION
# Detection
class Detector:
def __init__(self):
self.image_pub = rospy.Publisher("debug_image",Image, queue_size=1)
self.object_pub = rospy.Publisher("objects", Detection2DArray, queue_size=1)
self.bridge = CvBridge()
self.image_sub = rospy.Subscriber("image", Image, self.image_cb, queue_size=1, buff_size=2**24)
self.sess = tf.Session(graph=detection_graph,config=config)
self.pose_pub = rospy.Publisher("pose", pose, queue_size=1)
self.pose = pose()
def image_cb(self, data):
objArray = Detection2DArray()
try:
cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
except CvBridgeError as e:
print(e)
image=cv2.cvtColor(cv_image,cv2.COLOR_BGR2RGB)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.asarray(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Each box represents a part of the image where a particular object was detected.
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represent how level of confidence for each of the objects.
# Score is shown on the result image, together with the class label.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
(boxes, scores, classes, num_detections) = self.sess.run([boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
objects=vis_util.visualize_boxes_and_labels_on_image_array(
image,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=2)
objArray.detections =[]
objArray.header=data.header
object_count=1
for i in range(len(objects)):
object_count+=1
objArray.detections.append(self.object_predict(objects[i],data.header,image_np,cv_image))
self.object_pub.publish(objArray)
if self.pose.id == "47":
self.pose_pub.publish(self.pose)
img=cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
image_out = Image()
try:
image_out = self.bridge.cv2_to_imgmsg(img,"bgr8")
except CvBridgeError as e:
print(e)
image_out.header = data.header
self.image_pub.publish(image_out)
def object_predict(self,object_data, header, image_np,image):
image_height,image_width,channels = image.shape
obj=Detection2D()
obj_hypothesis= ObjectHypothesisWithPose()
object_id=object_data[0]
object_score=object_data[1]
dimensions=object_data[2]
obj.header=header
obj_hypothesis.id = str(object_id)
obj_hypothesis.score = object_score
obj.results.append(obj_hypothesis)
obj.bbox.size_y = int((dimensions[2]-dimensions[0])*image_height)
obj.bbox.size_x = int((dimensions[3]-dimensions[1] )*image_width)
obj.bbox.center.x = int((dimensions[1] + dimensions [3])*image_height/2)
obj.bbox.center.y = int((dimensions[0] + dimensions[2])*image_width/2)
self.pose.pose_x = int((dimensions[1] + dimensions [3])*image_height/2)
self.pose.pose_y = int((dimensions[0] + dimensions[2])*image_width/2)
self.pose.size_x = int((dimensions[3]-dimensions[1] )*image_width)
self.pose.size_y = int((dimensions[2]-dimensions[0])*image_height)
self.pose.id = str(object_id)
return obj
def main(args):
rospy.init_node('detector_node')
obj=Detector()
try:
rospy.spin()
except KeyboardInterrupt:
print("ShutDown")
cv2.destroyAllWindows()
if __name__=='__main__':
main(sys.argv)
编程思想与第一题类似。
(3)test2.cpp
#include "ros/ros.h"
#include "std_msgs/Int64.h"
#include "std_msgs/String.h"
#include "geometry_msgs/Twist.h"
#include "test2/pose.h"
int flag = 0;
std_msgs::Int64 pose_width, pose_high, pose_x;
class SubscribeAndPublish
{
public:
SubscribeAndPublish()
{
pub = n.advertise<geometry_msgs::Twist>("cmd_vel", 1);
sub = n.subscribe("pose", 1, &SubscribeAndPublish::callback, this);
}
void callback(const test2::pose::ConstPtr& pose)
{
if(flag==0){
pose_width.data = pose->size_x;
pose_high.data = pose->size_y;
pose_x.data = pose->pose_x;
flag = 1;
}
else{
if((pose->size_x>pose_width.data+20)&&(pose->size_y>pose_high.data+20)){
ROS_INFO("go forward");
pose_width.data = pose->size_x;
pose_high.data = pose->size_y;
pose_x.data = pose->pose_x;
geometry_msgs::Twist msg;
msg.linear.x = 0.3;
msg.angular.z = 0;
pub.publish(msg);
}
else if((pose->size_x<pose_width.data-20)&&(pose->size_y<pose_high.data-20)){
ROS_INFO("go back");
pose_width.data = pose->size_x;
pose_high.data = pose->size_y;
pose_x.data = pose->pose_x;
geometry_msgs::Twist msg;
msg.linear.x = -0.3;
msg.angular.z = 0;
pub.publish(msg);
}
else if( (pose->pose_x + pose->size_x/2) < (pose_x.data + pose_width.data/2 - 30) ){
ROS_INFO("turn left");
pose_width.data = pose->size_x;
pose_high.data = pose->size_y;
pose_x.data = pose->pose_x;
geometry_msgs::Twist msg;
msg.linear.x = 0;
msg.angular.z = 0.3;
pub.publish(msg);
}
else if( (pose->pose_x + pose->size_x/2) > (pose_x.data + pose_width.data/2 + 30) ){
ROS_INFO("turn right");
pose_width.data = pose->size_x;
pose_high.data = pose->size_y;
pose_x.data = pose->pose_x;
geometry_msgs::Twist msg;
msg.linear.x = 0;
msg.angular.z = -0.3;
pub.publish(msg);
}
}
}
private:
ros::NodeHandle n;
ros::Publisher pub;
ros::Subscriber sub;
};
int main(int argc, char** argv){
ros::init(argc, argv, "test2");
SubscribeAndPublish SAPObject;
ros::spin();
return 0;
}
(4)运行结果
运行人脸识别节点之前需要进入tensorflow的虚拟环境。运行命令:source tf/bin/activate
$ roslaunch tensorflow_object_detector usb_cam_detector.launch
$ rqt_image
$ rostopic echo /pose
$ roslaunch robot_gazebo view_robot_kinetic_rplidar.launch
$ rosrun test2 test2
杯子向后移动,机器人后退
杯子向前移动,机器人前进
杯子向左移动,机器人左转
杯子向右移动,机器人右转