首先是正样本的制作,这里我用opencv的拍照功能进行本地保存,接下来是拍照的代码。
void TakePhotoForPos()
{
VideoCapture capture(0);
Mat src;
if (!capture.isOpened())
{
cout << "ERROR" << endl;
system("pause");
}
int PressKeyTime = 0;
while (true)
{
capture >> src;
imshow("src", src);
if (waitKey(30)==32)
{
ostringstream oss;
oss << PressKeyTime <<endl;
imwrite("D:\\traindatabase\\pos\\"+to_string(PressKeyTime)+".jpg", src);
cout << oss.str() << endl;
PressKeyTime++;
}
if (PressKeyTime>499)
{
break;
}
}
}
这里用空格键拍照,并保存到D:\traindatabase\pos目录下,一共500张。下面是声明。
#include<opencv.hpp>
#include<iostream>
#include<highgui/highgui.hpp>
#include<ml/ml.hpp>
#include<string>
#include<fstream>
#include<vector>
using namespace std;
using namespace cv;
利用cmd将图片名字写入txt文件中,这个与Linux命令相似,利用cd切换到当前目录,cd 文件目录。在输入 dir /s/b > path.txt。则将图片路径写入txt文件中。
当要进入其他盘时,可以win+R,打开运行窗口,再输入cmd,进去之后输入d:,这样就进入了D盘了,就可以对D盘中的文件进行读取了。
制作样本时将样本复制到word中制作标签。选中jpg换成jpg^p1,ctrl+h表示替换全局的这个值,加个小三角表示将转义字符,加p表示换行,在加入1表示后面追加1。这样就成功的添加了标签了。
路径文件为这种格式的:
D:\traindatabase\neg\1.jpg
0
D:\traindatabase\neg\10.jpg
0
D:\traindatabase\neg\100.jpg
0
D:\traindatabase\neg\1000.jpg
0
D:\traindatabase\neg\1001.jpg
0
D:\traindatabase\neg\1002.jpg
0
D:\traindatabase\pos\1003.jpg
1
D:\traindatabase\pos\1004.jpg
1
接下来我们通过使用HOG对样本进行分类,以及模型的训练。data_path保存的是样本及其标签的txt文件,save表示的是训练出的xml文件保存的路径。
void Train_SVMmodel(const string& data_path,const string& save_path)
{
int ImgWidght = 64;
int ImgHeight = 128;
vector<string> img_path;
vector<int> img_catg;
int nLine = 0;
string buf;
ifstream svm_data(data_path);
unsigned long n;
while (svm_data)
{
if (getline(svm_data, buf))
{
nLine++;
if (nLine % 2 == 0)
{
img_catg.push_back(atoi(buf.c_str())); //atoi将字符串转化为整型,标志(0,1)
}
else
{
img_path.push_back(buf); //图像路径
}
}
} //对文件进行解析
svm_data.close();
Mat data_mat, res_mat;
int nImgNum = nLine / 2;
data_mat = Mat::zeros(nImgNum, 3780, CV_32FC1);
res_mat = Mat::zeros(nImgNum, 1, CV_32SC1);
Mat src;
Mat small;
Mat trainImg = Mat(Size(ImgWidght, ImgHeight), 8, 3);
for (string::size_type i = 0; i != img_path.size(); i++)
{
src = imread(img_path[i].c_str());
if (src.empty())
{
cout << "can not load the image" << img_path[i] << endl;
continue;
}
cout << "processing" << img_path[i].c_str() << endl;
resize(src, small, Size(ImgWidght, ImgHeight), (0, 0), (0, 0), 1);
//cvtColor(small, small, COLOR_RGB2GRAY);
HOGDescriptor *hog = new HOGDescriptor(Size(ImgWidght, ImgHeight), Size(16, 16), Size(8, 8), Size(8, 8), 9);
vector<float>descriptors;
hog->compute(small, descriptors, Size(1, 1), Size(0, 0));
cout << "HOG dimision is" << descriptors.size() << endl;
n = 0;
for (vector<float>::iterator iter = descriptors.begin(); iter != descriptors.end(); iter++)
{
data_mat.at<float>(i, n) = *iter;
n++;
}
res_mat.at<float>(i, 0) = img_catg[i];
cout << "end processing " << img_path[i].c_str() << img_catg[i] << endl;
}
//cout << data_mat << endl;
//cout << res_mat << endl;
Ptr<ml::SVM>svm = ml::SVM::create();
cout << "training" << endl;
svm->setType(ml::SVM::C_SVC);
svm->setKernel(ml::SVM::RBF);
svm->setDegree(10.0);
svm->setGamma(8.0);
svm->setCoef0(1.0);
svm->setC(10.0);
svm->setNu(0.5);
svm->setP(0);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01));
svm->train(data_mat, ml::ROW_SAMPLE, res_mat);
cout << "End of training" << endl;
svm->save(save_path);
}
因为这个样本是第一行为路径,第二行为标签,因此通过这样的方法可以将路径转化为数组,将对应的每个标签储存到数组中,实现对饮料瓶的二分类。接下来就是对SVM的参数的设置,这些参数的说明可以查看官方文档进行设置,最后将训练好的xml文件保存到当前文件夹,并通过此模型来判断物体是不是属于饮料瓶。
接下来是SVM的预测。filename是xml文件的路径。
void Detect_Unit(const string& filename)
{
//HOGDescriptor hog(Size(32, 64), Size(4, 4), Size(8, 8), Size(8, 8), 9);
int DescriptorDim;
Ptr<ml::SVM>svm = ml::SVM::load(filename);
DescriptorDim = svm->getVarCount();
Mat supportVector = svm->getSupportVectors();
int supportVectorNum = supportVector.rows;
cout << "支持向量个数为" << supportVectorNum << endl;
vector<float>svm_alpha;
vector<float>svm_svidx;
float svm_rho;
svm_rho = svm->getDecisionFunction(0, svm_alpha, svm_svidx);
Mat alphaMat = Mat::zeros(1, supportVectorNum, CV_32FC1);
Mat supportVectorMat = Mat::zeros(supportVectorNum, DescriptorDim, CV_32FC1);
Mat resultMat = Mat::zeros(1, DescriptorDim, CV_32FC1);
supportVectorMat = supportVector;
//将alpha向量的数据复制到alphaMat中,返回SVM决策函数中的alpha向量
for (int i = 0; i < supportVectorNum; i++)
{
alphaMat.at<float>(0, i) = svm_alpha[i];
}
//计算-(alphaMat*supportVectorMat),结果放在resultMat中。
resultMat = -1 * alphaMat * supportVectorMat;
vector<float>myDetector;
//将resultMat中的数据复制到数组myDetector;
for (int i = 0; i < DescriptorDim; i++)
{
myDetector.push_back(resultMat.at<float>(0, i));
}
//最后添加偏移量rho,得到检测子
myDetector.push_back(svm_rho);
HOGDescriptor myHOG;
Size s1(16,16);
Size s2(8, 8);
myHOG.winSize = Size(64,128);
myHOG.blockSize = s1;
myHOG.blockStride = s2;
myHOG.cellSize = s2;
myHOG.nbins = 9;
if (_access("D:\\svm\\svm\\HOGDetectorForOpenCv.txt",0)== -1)
{
ofstream fout("HOGDetectorForOpenCv.txt");
for (unsigned int i = 0; i < myDetector.size(); i++)
{
fout << myDetector[i] << endl;
}
}
}
使用训练好的模型进行判断,可以预测结果。
下面对检测子进行保存并初始化:
/*加载检测子*/
/*如果有检测子的话跳过此步骤*/
void Load_Detect_Vector(const string& filename)
{
/*当myDetector为空时加载*/
ifstream finPos(filename);
string buf;
int nLine = 0;
while (finPos)
{
if (getline(finPos, buf))
{
myDetector.push_back(atof(buf.c_str()));
nLine++;
}
}
}
接下来是对结果的预测:
void Detect_Final()
{
/*VideoCapture detect(0);
if (!detect.isOpened())
{
cout << "ERROR" << endl;
system("pause");
}*/
//Mat frame;
//Mat src;D:\traindatabase\pos\0.jpg
Mat inter = imread("D:\\traindatabase\\pos\\4.jpg");
//detect >> inter;
if (inter.empty())
{
cout << "the picture is empty" << endl;
system("pause");
};
//resize(inter, frame, Size(64, 128));
//cvtColor(src, frame, COLOR_BGR2GRAY);
vector<Rect>found, found_filtered;
myHOG.detectMultiScale(inter, found, 0, Size(8, 8), Size(32, 32), 1.05, 2);
//cout << "找到的矩形框个数为" << found.size() << endl;
cout << found.size();
//找出所有没有嵌套的矩形框,并放入found_filtered中,如果有嵌套,取最外面的一个
for (unsigned int i = 0; i < found.size(); i++)
{
Rect r = found[i];
unsigned int j = 0;
for (; j < found.size(); j++)
if (j != i && (r & found[j]) == r)
break;
if (j == found.size())
{
found_filtered.push_back(r);
}
}
//画矩形框
for (unsigned int i = 0; i < found_filtered.size(); i++)
{
Rect r = found_filtered[i];
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(inter, r.tl(), r.br(), Scalar(0, 255, 0), 3);
putText(inter, "Drinks", Point2f(r.x + 5, r.y + 10), cv::FONT_HERSHEY_PLAIN, 0.4, Scalar(0, 255, 0), 1, 8, false);
}
imshow("output", inter);
//resize(inter, inter, Size(4 * inter.cols, 4 * inter.rows));
//imshow("frame", inter);
//Mat output;
//resize(src, output, Size(4 * src.cols, 4 * src.rows));
//imshow("src", output);
waitKey(0);
}
这就是SVM的简单的识别,这段代码仍然具有很多的错误,但是可以实现一些简单的识别。也存在着精度和速度上的问题,没有GPU的加速,而且HOG进行检测时需要算很多,特征向量有2000个,因此带来的计算问题还需要优化。特征向量矩阵和分类使用的xml文件如果有需要的可以私信我,因为文件比较大,在这就不方便发了。
接下来附上结果图片:
调用顺序为先训练xml文件再保存特征向量,再进行初始化并进行预测。
接下来是完整代码:
#include<opencv.hpp>
#include<iostream>
#include<highgui/highgui.hpp>
#include<ml/ml.hpp>
#include<string>
#include<fstream>
#include<vector>
#include<objdetect.hpp>
#include<stdlib.h>
#include<algorithm>
#include<io.h>
using namespace std;
using namespace cv;
vector<float>myDetector; //得到自己的检测子
vector<string>FILENAME; //正样本文件名称
HOGDescriptor myHOG; //得到自己的HOG特征向量
string Forward = "D:\\traindatabase\\pos\\";
string Back = ".jpg";
int SumPicture = 500; //正样本数量
/*训练自己的SVM模型,并以此得到检测子*/
void Train_SVMmodel(const string& data_path,const string& save_path)
{
int ImgWidght = 64;
int ImgHeight = 128;
vector<string> img_path;
vector<int> img_catg;
int nLine = 0;
string buf;
ifstream svm_data(data_path);
unsigned long n;
while (svm_data)
{
if (getline(svm_data, buf))
{
nLine++;
if (nLine % 2 == 0)
{
img_catg.push_back(atoi(buf.c_str())); //atoi将字符串转化为整型,标志(0,1)
}
else
{
img_path.push_back(buf); //图像路径
}
}
}
svm_data.close();
Mat data_mat, res_mat;
int nImgNum = nLine / 2;
data_mat = Mat::zeros(nImgNum, 3780, CV_32FC1);
res_mat = Mat::zeros(nImgNum, 1, CV_32SC1);
Mat src;
Mat small;
Mat trainImg = Mat(Size(ImgWidght, ImgHeight), 8, 3);
for (string::size_type i = 0; i != img_path.size(); i++)
{
src = imread(img_path[i].c_str());
if (src.empty())
{
cout << "can not load the image" << img_path[i] << endl;
continue;
}
cout << "processing" << img_path[i].c_str() << endl;
resize(src, small, Size(ImgWidght, ImgHeight), (0, 0), (0, 0), 1);
//cvtColor(small, small, COLOR_RGB2GRAY);
HOGDescriptor *hog = new HOGDescriptor(Size(ImgWidght, ImgHeight), Size(16, 16), Size(8, 8), Size(8, 8), 9);
vector<float>descriptors;
hog->compute(small, descriptors, Size(1, 1), Size(0, 0));
cout << "HOG dimision is" << descriptors.size() << endl;
n = 0;
for (vector<float>::iterator iter = descriptors.begin(); iter != descriptors.end(); iter++)
{
data_mat.at<float>(i, n) = *iter;
n++;
}
res_mat.at<float>(i, 0) = img_catg[i];
cout << "end processing " << img_path[i].c_str() << img_catg[i] << endl;
}
//cout << data_mat << endl;
//cout << res_mat << endl;
Ptr<ml::SVM>svm = ml::SVM::create();
cout << "training" << endl;
svm->setType(ml::SVM::C_SVC);
svm->setKernel(ml::SVM::LINEAR);
svm->setDegree(10.0);
svm->setGamma(8.0);
svm->setCoef0(1.0);
svm->setC(10.0);
svm->setNu(0.5);
svm->setP(0);
svm->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 3000, 1e-6));
svm->train(data_mat, ml::ROW_SAMPLE, res_mat);
cout << "End of training" << endl;
svm->save(save_path);
}
/*得到自己的检测子*/
void Detect_Unit(const string& filename)
{
//HOGDescriptor hog(Size(32, 64), Size(4, 4), Size(8, 8), Size(8, 8), 9);
int DescriptorDim;
Ptr<ml::SVM>svm = ml::SVM::load(filename);
DescriptorDim = svm->getVarCount();
Mat supportVector = svm->getSupportVectors();
int supportVectorNum = supportVector.rows;
cout << "支持向量个数为" << supportVectorNum << endl;
vector<float>svm_alpha;
vector<float>svm_svidx;
float svm_rho;
svm_rho = svm->getDecisionFunction(0, svm_alpha, svm_svidx);
Mat alphaMat = Mat::zeros(1, supportVectorNum, CV_32FC1);
Mat supportVectorMat = Mat::zeros(supportVectorNum, DescriptorDim, CV_32FC1);
Mat resultMat = Mat::zeros(1, DescriptorDim, CV_32FC1);
supportVectorMat = supportVector;
//将alpha向量的数据复制到alphaMat中,返回SVM决策函数中的alpha向量
for (int i = 0; i < supportVectorNum; i++)
{
alphaMat.at<float>(0, i) = svm_alpha[i];
}
//计算-(alphaMat*supportVectorMat),结果放在resultMat中。
resultMat = -1 * alphaMat * supportVectorMat;
vector<float>myDetector;
//将resultMat中的数据复制到数组myDetector;
for (int i = 0; i < DescriptorDim; i++)
{
myDetector.push_back(resultMat.at<float>(0, i));
}
//最后添加偏移量rho,得到检测子
myDetector.push_back(svm_rho);
HOGDescriptor myHOG;
Size s1(16,16);
Size s2(8, 8);
myHOG.winSize = Size(64,128);
myHOG.blockSize = s1;
myHOG.blockStride = s2;
myHOG.cellSize = s2;
myHOG.nbins = 9;
if (_access("D:\\svm\\svm\\HOGDetectorForOpenCv.txt",0)== -1)
{
ofstream fout("HOGDetectorForOpenCv.txt");
for (unsigned int i = 0; i < myDetector.size(); i++)
{
fout << myDetector[i] << endl;
}
}
}
/*加载检测子*/
/*如果有检测子的话跳过此步骤*/
void Load_Detect_Vector()
{
/*当myDetector为空时加载*/
ifstream finPos("D;\\svm\\svm\\HOGDetectorForOpenCv.txt");
string buf;
int nLine = 0;
while (finPos)
{
if (getline(finPos, buf))
{
myDetector.push_back(atof(buf.c_str()));
nLine++;
}
}
}
/*最终的检测*/
void Detect_Final()
{
/*VideoCapture detect(0);
if (!detect.isOpened())
{
cout << "ERROR" << endl;
system("pause");
}*/
//Mat frame;
//Mat src;D:\traindatabase\pos\0.jpg
HOGDescriptor myHOG(Size(64,128),Size(16,16),Size(8,8),Size(8,8),9); //得到自己的HOG特征向量
myHOG.setSVMDetector(myDetector);
Mat inter = imread("D:\\traindatabase\\pos\\4.jpg");
//detect >> inter;
if (inter.empty())
{
cout << "the picture is empty" << endl;
system("pause");
};
//resize(inter, frame, Size(64, 128));
//cvtColor(src, frame, COLOR_BGR2GRAY);
vector<Rect>found, found_filtered;
myHOG.detectMultiScale(inter, found, 0, Size(8, 8), Size(32, 32), 1.05, 2);
//cout << "找到的矩形框个数为" << found.size() << endl;
cout << found.size();
//找出所有没有嵌套的矩形框,并放入found_filtered中,如果有嵌套,取最外面的一个
for (unsigned int i = 0; i < found.size(); i++)
{
Rect r = found[i];
unsigned int j = 0;
for (; j < found.size(); j++)
if (j != i && (r & found[j]) == r)
break;
if (j == found.size())
{
found_filtered.push_back(r);
}
}
//画矩形框
for (unsigned int i = 0; i < found_filtered.size(); i++)
{
Rect r = found_filtered[i];
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(inter, r.tl(), r.br(), Scalar(0, 255, 0), 3);
putText(inter, "Drinks", Point2f(r.x + 5, r.y + 10), cv::FONT_HERSHEY_PLAIN, 0.4, Scalar(0, 255, 0), 1, 8, false);
}
imshow("output", inter);
//resize(inter, inter, Size(4 * inter.cols, 4 * inter.rows));
//imshow("frame", inter);
//Mat output;
//resize(src, output, Size(4 * src.cols, 4 * src.rows));
//imshow("src", output);
waitKey(0);
}
/*利用opencv来对正样本进行拍照*/
void GenerateFileName()
{
for (size_t i = 0; i < SumPicture; i++)
{
ostringstream filename;
/*stringstream ss;
string res;
ss << i;
ss >> res;*/
filename << i << Back << endl;
FILENAME.push_back(filename.str());
}
}
/*利用opencv拍照得到正样本*/
void TakePhotoForPos()
{
VideoCapture capture(0);
Mat src;
if (!capture.isOpened())
{
cout << "ERROR" << endl;
system("pause");
}
int PressKeyTime = 0;
while (true)
{
capture >> src;
imshow("src", src);
if (waitKey(30)==32)
{
ostringstream oss;
oss << PressKeyTime <<endl;
imwrite("D:\\traindatabase\\pos\\"+to_string(PressKeyTime)+".jpg", src);
cout << oss.str() << endl;
PressKeyTime++;
}
if (PressKeyTime>499)
{
break;
}
}
}
int main(int argc, char** argv)
{
/*训练SVM+HOG*/
//TakePhotoForPos();
//Train_SVMmodel("D:\\traindatabase\\path.txt", "SVM_HOG.xml");
//Detect_Unit("SVM_HOG.xml");
Load_Detect_Vector();
Detect_Final();
return 0;
}