一、数字识别基础
FPGA全并行运行的特点,特别适合用于图像的处理。数字识别是一项关键技术,是图像识别等的基础,数字识别主要由以下这几种方法。
基于模板分类匹配的数字识别算法
模版匹配法是一种传统的比较早的应用在计算机软件上的数字识别算法,该数字识别算法的重点是要先对要识别的阿拉伯数字 0-9 进行模版构建,然后再将图像中识别到的数字与阿拉伯数字 0-9 的模板进行匹配对比,从中计算出所待识别图像中的数字与每个模版的匹配程度,然后根据所计算得到的相似度结果进行识别,相似度最高的阿拉伯数字模板即是我们所要输出的果。
基于神经网络的数字识别方法
神经网络识别算法是一种比较新颖的算法,神经网络识别是一种通过模仿生物神经网络的方法。神经网络识别算法对外界的干扰有一定的隔绝作用,在神经网络的训练数据运算中,需要通过较大的数据运算才能保证数字识别的准确性。
基于数字几何特征的数字识别方法
基于数字特征的识别算法的关键是对阿拉伯数字 0-9 的形状、构造等几何特征进行分析计数,构造特征模板,通过算法获取图像数字的几何特征数,与阿拉伯数字 0-9 模板的几何特征数进行比对,从而识别出该数字。在选择算法要识别的几何特征时,通常选择比较直观、跟数字大小无关、阿拉伯数字 0-9 特征数各不相同的算法。获取数字的几何特征一般有两种:一种是数字的结构特征,即获取字符的笔画线轮廓等几何特征,该结构特征计算量大,并且获取过程复杂,特征不稳定,容易受待识别字体的大小影响。二是获取像素个数以及交点个数的统计特征。
对这几种方法做一个总结,可以发现模板匹配的实现难度和准确度都还算可以,因此这个项目中主要采用模板匹配来实现印刷体的实现,之后会考虑用神经网络来实现手写体识别。
二、模板匹配数字识别的matlab仿真
2.1 读取图像和加噪
tic
clc;
clear all;
%read picture
%height and width
img_rgb=imread('./pic/number01234.bmp');
h=size(img_rgb,1);
w=size(img_rgb,2);
%rgb picture
figure(1);
subplot(121);
imshow(img_rgb);
title('rgb picture');
%noise picture
img_rgb=double(img_rgb)/255;
img_salt=imnoise(img_rgb,'salt & pepper',0.001);
img_rgb=uint8(img_rgb*255);
img_salt=uint8(img_salt*255);
subplot(122);
imshow(img_salt);
title('salt picture');
先读取图像,然后为了模拟实际的图片,对读取的图片加上噪声。
2.2 RGB转YUV
% Relized method:myself Algorithm realized
% Y = ( R*77 + G*150 + B*29) >>8
% Cb = (-R*44 - G*84 + B*128) >>8
% Cr = ( R*128 - G*108 - B*20) >>8
img_salt=double(img_salt);
img_y=zeros(h,w);
img_u=zeros(h,w);
img_v=zeros(h,w);
for i = 1 : h
for j = 1 : w
img_y(i,j) = bitshift(( img_salt(i,j,1)*77 + img_salt(i,j,2)*150 + img_salt(i,j,3)*29),-8);
img_u(i,j) = bitshift((-img_salt(i,j,1)*44 - img_salt(i,j,2)*84 + img_salt(i,j,3)*128 + 32678),-8);
img_v(i,j) = bitshift(( img_salt(i,j,1)*128 - img_salt(i,j,2)*108 - img_salt(i,j,3)*20 + 32678),-8);
end
end
img_y = uint8(img_y);
img_u = uint8(img_u);
img_v = uint8(img_v);
img_salt = uint8(img_salt);
figure(2);
subplot(131);
imshow(img_y);
title('gray picture');
subplot(132);
imshow(img_u);
title('Cb picture');
subplot(133);
imshow(img_v);
title('Cr picture');
对加入噪声的图片进行颜色空间的转换,RGB转为YUV格式。
2.3 图像中值滤波、二值化、腐蚀膨胀
%median value filter
%algorithms suitable for FPGA
%[a11 a12 a13] [max1 med1 min1]
%[a21 a22 a23] to [max2 med2 min2]
%[a31 a32 a33] [max3 med3 min3]
% to
% [min_of_max med_of_med max_of_min]
% to
% [med]
img_u=double(img_u);
img_median_filter=zeros(h-2,w-2);
for i=2:h-1
for j=2:w-1
max1=max(img_u(i-1,j-1:j+1));
med1=median(img_u(i-1,j-1:j+1));
min1=min(img_u(i-1,j-1:j+1));
max2=max(img_u(i,j-1:j+1));
med2=median(img_u(i,j-1:j+1));
min2=min(img_u(i,j-1:j+1));
max3=max(img_u(i+1,j-1:j+1));
med3=median(img_u(i+1,j-1:j+1));
min3=min(img_u(i+1,j-1:j+1));
min_of_max=min([max1,max2,max3]);
med_of_med=median([med1,med2,med3]);
max_of_min=max([min1,min2,min3]);
img_median_filter(i-1,j-1)=median([min_of_max,med_of_med,max_of_min]);
end
end
img_u = uint8(img_u);
%median value filter dog show
img_median_filter=uint8(img_median_filter);
figure(3);
subplot(221);
imshow(img_median_filter);
title('median value filter');
% bin
img_median_filter=double(img_median_filter);
img_thresh=zeros(h-2,w-2);
THRESH_HOLD=150;
for i=1:h-2
for j=1:w-2
if(img_median_filter(i,j)>THRESH_HOLD)
img_thresh(i,j)=255;
else
img_thresh(i,j)=0;
end
end
end
img_median_filter=uint8(img_median_filter);
img_thresh=uint8(img_thresh);
subplot(222);
imshow(img_thresh);
title('bin picture');
%open operation:corrosion and expansion
img_corrosion=zeros(h-4,w-4);
for i=2:h-3
for j=2:w-3
a=img_thresh(i-1,j-1)&img_thresh(i-1,j)&img_thresh(i-1,j+1);
b=img_thresh(i,j-1)&img_thresh(i,j)&img_thresh(i,j+1);
c=img_thresh(i+1,j-1)&img_thresh(i+1,j)&img_thresh(i+1,j+1);
if((a&b&c)==1)
img_corrosion(i-1,j-1)=255;
else
img_corrosion(i-1,j-1)=0;
end
end
end
img_corrosion=uint8(img_corrosion);
subplot(223);
imshow(img_corrosion);
title('corrosion picture');
img_expansion=zeros(h-6,w-6);
for i=2:h-5
for j=2:w-5
a=img_corrosion(i-1,j-1)|img_corrosion(i-1,j)|img_corrosion(i-1,j+1);
b=img_corrosion(i,j-1)|img_corrosion(i,j)|img_corrosion(i,j+1);
c=img_corrosion(i+1,j-1)|img_corrosion(i+1,j)|img_corrosion(i+1,j+1);
if((a|b|c)==1)
img_expansion(i-1,j-1)=255;
else
img_expansion(i-1,j-1)=0;
end
end
end
img_expansion=uint8(img_expansion);
subplot(224);
imshow(img_expansion);
title('expansion picture');
对原始图像的U分量进行滤波然后二值化,为啥是U分量呢,因为U分量是YUV的蓝色分量,可以检测到蓝色边框。之后滤波+腐蚀+膨胀。
2.4 水平投影与垂直投影
%Vertical projection
vertical_value=ones(1,w-6);
k=0;
for j=1:w-6
for i=1:h-6
if(img_expansion(i,j)==255)
k=k+1;
end
end
vertical_value(1,j)=k;
k=0;
end
figure(4);
subplot(121);
plot(vertical_value);
%Horizontal projection
horizontal_value=ones(1,h-6);
k=0;
for i=1:h-6
for j=1:w-6
if(img_expansion(i,j)==255)
k=k+1;
end
end
horizontal_value(1,i)=k;
k=0;
end
subplot(122);
plot(horizontal_value);
利用水平投影和竖直投影可以找到图像的边界,由此定位出蓝色区域。
2.5 原始图和识别的边框叠加输出
%find border
char border_v1;
char border_v2;
char border_v1_rgb;
char border_v2_rgb
for i=1:w-7
if(vertical_value(i)==0 && vertical_value(i+1)~=0)
border_v1=i+1;
end
if(vertical_value(i)~=0 && vertical_value(i+1)==0)
border_v2=i;
end
end
border_v1_rgb=border_v1+3;
border_v2_rgb=border_v2+3;
char border_h1;
char border_h2;
char border_h1_rgb;
char border_h2_rgb
for i=1:h-7
if(horizontal_value(i)==0 && horizontal_value(i+1)~=0)
border_h1=i+1;
end
if(horizontal_value(i)~=0 && horizontal_value(i+1)==0)
border_h2=i;
end
end
border_h1_rgb=border_h1+3;
border_h2_rgb=border_h2+3;
%overlap pic and border
for i=1:h
for j=1:w
if( (i==border_h1_rgb || i==border_h2_rgb) && j>border_v1_rgb && j<border_v2_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (j==border_v1_rgb || j==border_v2_rgb) && i>border_h1_rgb && i<border_h2_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
end
end
figure(5);
imshow(img_rgb);
2.6 灰度图像滤波二值化腐蚀膨胀
%median value filter
%algorithms suitable for FPGA
%[a11 a12 a13] [max1 med1 min1]
%[a21 a22 a23] to [max2 med2 min2]
%[a31 a32 a33] [max3 med3 min3]
% to
% [min_of_max med_of_med max_of_min]
% to
% [med]
img_y=double(img_y);
img_median_filter_y=zeros(h-2,w-2);
for i=2:h-1
for j=2:w-1
max1=max(img_y(i-1,j-1:j+1));
med1=median(img_y(i-1,j-1:j+1));
min1=min(img_y(i-1,j-1:j+1));
max2=max(img_y(i,j-1:j+1));
med2=median(img_y(i,j-1:j+1));
min2=min(img_y(i,j-1:j+1));
max3=max(img_y(i+1,j-1:j+1));
med3=median(img_y(i+1,j-1:j+1));
min3=min(img_y(i+1,j-1:j+1));
min_of_max=min([max1,max2,max3]);
med_of_med=median([med1,med2,med3]);
max_of_min=max([min1,min2,min3]);
img_median_filter_y(i-1,j-1)=median([min_of_max,med_of_med,max_of_min]);
end
end
img_y = uint8(img_y);
%median value filter dog show
img_median_filter_y=uint8(img_median_filter_y);
figure(6);
subplot(221);
imshow(img_median_filter_y);
title('median value filter y');
% bin
img_median_filter_y=double(img_median_filter_y);
img_thresh_y=zeros(h-2,w-2);
THRESH_HOLD=150;
for i=1:h-2
for j=1:w-2
if(img_median_filter_y(i,j)>THRESH_HOLD)
img_thresh_y(i,j)=255;
else
img_thresh_y(i,j)=0;
end
end
end
img_median_filter_y=uint8(img_median_filter_y);
img_thresh_y=uint8(img_thresh_y);
subplot(222);
imshow(img_thresh_y);
title('bin picture y');
%open operation:corrosion and expansion
img_corrosion_y=zeros(h-4,w-4);
for i=2:h-3
for j=2:w-3
a=img_thresh_y(i-1,j-1)&img_thresh_y(i-1,j)&img_thresh_y(i-1,j+1);
b=img_thresh_y(i,j-1)&img_thresh_y(i,j)&img_thresh_y(i,j+1);
c=img_thresh_y(i+1,j-1)&img_thresh_y(i+1,j)&img_thresh_y(i+1,j+1);
if((a&b&c)==1)
img_corrosion_y(i-1,j-1)=255;
else
img_corrosion_y(i-1,j-1)=0;
end
end
end
img_corrosion_y=uint8(img_corrosion_y);
subplot(223);
imshow(img_corrosion_y);
title('corrosion picture y');
img_expansion_y=zeros(h-6,w-6);
for i=2:h-5
for j=2:w-5
a=img_corrosion_y(i-1,j-1)|img_corrosion_y(i-1,j)|img_corrosion_y(i-1,j+1);
b=img_corrosion_y(i,j-1)|img_corrosion_y(i,j)|img_corrosion_y(i,j+1);
c=img_corrosion_y(i+1,j-1)|img_corrosion_y(i+1,j)|img_corrosion_y(i+1,j+1);
if((a|b|c)==1)
img_expansion_y(i-1,j-1)=255;
else
img_expansion_y(i-1,j-1)=0;
end
end
end
img_expansion_y=uint8(img_expansion_y);
subplot(224);
imshow(img_expansion_y);
title('expansion picture y');
在前面已经定位除了蓝色边框,之后我们对灰度图进行滤波和二值化,然后腐蚀和膨胀。
2.7 字符区域水平和垂直投影
%Vertical projection
vertical_value_y=ones(1,w-6);
k=0;
for j=1:w-6
for i=1:h-6
if(j>=border_v1+3 && j<=border_v2-3 && i>=border_h1+3 && i<=border_h2-3 && img_expansion_y(i,j)==255)
k=k+1;
end
end
vertical_value_y(1,j)=k;
k=0;
end
figure(7);
subplot(121);
plot(vertical_value_y);
%Horizontal projection
horizontal_value_y=ones(1,h-6);
k=0;
for i=1:h-6
for j=1:w-6
if(j>=border_v1+3 && j<=border_v2-3 && i>=border_h1+3 && i<=border_h2-3 && img_expansion_y(i,j)==255)
k=k+1;
end
end
horizontal_value_y(1,i)=k;
k=0;
end
subplot(122);
plot(horizontal_value_y);
地字符区域也就是蓝色区域的部分进行竖直和水平投影,来确定每个数字的边框区域。
2.8 原始图和每个字符边框叠加输出
%find border of number
char border_v1_num;
char border_v2_num;
char border_v1_num_rgb;
char border_v2_num_rgb;
char cnt;
cnt=1;
for i=1:w-7
if(vertical_value_y(i)==0 && vertical_value_y(i+1)~=0)
border_v1_num(1,cnt)=i+1;
end
if(vertical_value_y(i)~=0 && vertical_value_y(i+1)==0)
border_v2_num(1,cnt)=i;
cnt=cnt+1;
end
end
border_v1_num_rgb=border_v1_num+3;
border_v2_num_rgb=border_v2_num+3;
char border_h1_num;
char border_h2_num;
char border_h1_num_rgb;
char border_h2_num_rgb
for i=1:h-7
if(horizontal_value_y(i)==0 && horizontal_value_y(i+1)~=0)
border_h1_num=i+1;
end
if(horizontal_value_y(i)~=0 && horizontal_value_y(i+1)==0)
border_h2_num=i;
end
end
border_h1_num_rgb=border_h1_num+3;
border_h2_num_rgb=border_h2_num+3;
%overlap pic and border
for i=1:h
for j=1:w
for k=1:cnt-1
if( (i==border_h1_rgb || i==border_h2_rgb) && j>border_v1_rgb && j<border_v2_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (j==border_v1_rgb || j==border_v2_rgb) && i>border_h1_rgb && i<border_h2_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (i==border_h1_num_rgb || i==border_h2_num_rgb) && j>border_v1_num_rgb(1,k) && j<border_v2_num_rgb(1,k))
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (j==border_v1_num_rgb(1,k) || j==border_v2_num_rgb(1,k)) && i>border_h1_num_rgb && i<border_h2_num_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
end
end
end
figure(8);
imshow(img_rgb);
将投影得到的边框叠加原始图像进行输出。
2.9 字符区域分割
%Character region segmentation
char cut_v;
char cut_h;
cut_h=(border_h2_num_rgb-border_h1_num_rgb)/8;
for k=1:cnt-1
cut_v(1,k)=(border_v2_num_rgb(1,k)-border_v1_num_rgb(1,k))/5;
end
%overlap pic_grb and border and cut num
for i=1:h
for j=1:w
for k=1:cnt-1
if( (i==border_h1_rgb || i==border_h2_rgb) && j>border_v1_rgb && j<border_v2_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (j==border_v1_rgb || j==border_v2_rgb) && i>border_h1_rgb && i<border_h2_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (i==border_h1_num_rgb || i==border_h2_num_rgb) && j>border_v1_num_rgb(1,k) && j<border_v2_num_rgb(1,k))
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (i==round(border_h1_num_rgb+cut_h) || i==round(border_h1_num_rgb+2*cut_h) || i==round(border_h1_num_rgb+3*cut_h) || i==round(border_h1_num_rgb+4*cut_h) || i==round(border_h1_num_rgb+5*cut_h) || i==round(border_h1_num_rgb+6*cut_h) || i==round(border_h1_num_rgb+7*cut_h)) && j>border_v1_num_rgb(1,k) && j<border_v2_num_rgb(1,k))
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (j==border_v1_num_rgb(1,k) || j==border_v2_num_rgb(1,k)) && i>border_h1_num_rgb && i<border_h2_num_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
if( (j==round(border_v1_num_rgb(1,k)+cut_v(1,k)) || j==round(border_v1_num_rgb(1,k)+2*cut_v(1,k)) || j==round(border_v1_num_rgb(1,k)+3*cut_v(1,k)) || j==round(border_v1_num_rgb(1,k)+4*cut_v(1,k))) && i>border_h1_num_rgb && i<border_h2_num_rgb)
img_rgb(i,j,1)=255;
img_rgb(i,j,2)=0;
img_rgb(i,j,3)=0;
end
end
end
end
figure(9);
imshow(img_rgb);
img_expansion_y1=img_expansion_y;
%overlap pic_expan_y and border and cut num
for i=1:h-6
for j=1:w-6
for k=1:cnt-1
if( (i==border_h1 || i==border_h2) && j>border_v1 && j<border_v2)
img_expansion_y1(i,j)=120;
end
if( (j==border_v1 || j==border_v2) && i>border_h1 && i<border_h2)
img_expansion_y1(i,j)=120;
end
if( (i==border_h1_num || i==border_h2_num) && j>border_v1_num(1,k) && j<border_v2_num(1,k))
img_expansion_y1(i,j)=120;
end
if( (i==round(border_h1_num+cut_h) || i==round(border_h1_num+2*cut_h) || i==round(border_h1_num+3*cut_h) || i==round(border_h1_num+4*cut_h) || i==round(border_h1_num+5*cut_h) || i==round(border_h1_num+6*cut_h) || i==round(border_h1_num+7*cut_h)) && j>border_v1_num(1,k) && j<border_v2_num(1,k))
img_expansion_y1(i,j)=120;
end
if( (j==border_v1_num(1,k) || j==border_v2_num(1,k)) && i>border_h1_num && i<border_h2_num)
img_expansion_y1(i,j)=120;
end
if( (j==round(border_v1_num(1,k)+cut_v(1,k)) || j==round(border_v1_num(1,k)+2*cut_v(1,k)) || j==round(border_v1_num(1,k)+3*cut_v(1,k)) || j==round(border_v1_num(1,k)+4*cut_v(1,k))) && i>border_h1_num && i<border_h2_num)
img_expansion_y1(i,j)=120;
end
end
end
end
figure(10);
imshow(img_expansion_y1);
对每个字符区域进行分割,这里分割成5*8总共40个小方块来进行模板匹配。
2.10 字符识别
%get num
for k=1:cnt-1
for i=0:7
for j=0:4
x=img_expansion_y(round(border_h1_num+i*cut_h):round(border_h1_num+(i+1)*cut_h),round(border_v1_num(1,k)+j*cut_v(1,k)):round(border_v1_num(1,k)+(j+1)*cut_v(1,k)))/255;
y=size(x,1)*size(x,2)/2;
x=sum(sum(x));
if(x>y*0.8)
num(i+1,j+1+(k-1)*5)=1;
else
num(i+1,j+1+(k-1)*5)=0;
end
end
end
end
%num template
num0=[0 1 1 1 0;
1 1 0 1 1;
1 0 0 0 1;
1 0 0 0 1;
1 0 0 0 1;
1 0 0 0 1;
1 1 0 1 1;
0 1 1 1 0;
];
num1=[0 1 1 0 0;
0 0 1 0 0;
0 0 1 0 0;
0 0 1 0 0;
0 0 1 0 0;
0 0 1 0 0;
0 0 1 0 0;
0 1 1 1 0;
];
num2=[0 1 1 1 0;
1 0 0 1 1;
0 0 0 0 1;
0 0 0 1 0;
0 0 0 1 0;
0 0 1 0 0;
0 1 0 0 0;
1 1 1 1 1;
];
num3=[0 1 1 1 0;
0 0 0 1 1;
0 0 0 1 0;
0 0 1 1 0;
0 0 0 1 1;
0 0 0 0 1;
0 0 0 0 1;
1 1 1 1 0;
];
num4=[0 0 0 1 0;
0 0 1 1 0;
0 1 0 1 0;
0 1 0 1 0;
1 0 0 1 0;
1 1 1 1 1;
0 0 0 1 0;
0 0 0 1 0;
];
num5=[0 0 1 1 1;
0 1 0 0 0;
0 1 1 0 0;
0 0 1 1 0;
0 0 0 0 1;
0 0 0 0 1;
0 0 0 0 1;
1 1 1 0 0;
];
num6=[0 0 1 1 0;
0 1 1 0 0;
1 1 0 0 0;
1 1 1 1 0;
1 0 0 0 1;
1 0 0 0 1;
1 0 0 0 1;
0 1 1 1 0;
];
num7=[1 1 1 1 1;
1 0 0 0 1;
0 0 0 1 0;
0 0 0 1 0;
0 0 0 1 0;
0 0 1 0 0;
0 0 1 0 0;
0 0 1 0 0;
];
num8=[0 1 1 1 1;
1 0 0 0 1;
1 1 0 0 1;
0 1 1 0 0;
0 1 1 1 0;
1 0 0 0 1;
1 0 0 0 1;
0 1 0 1 0;
];
num9=[0 1 0 1 0;
1 0 0 0 1;
1 0 0 0 1;
1 0 0 0 1;
0 1 1 1 1;
0 0 0 1 1;
0 0 0 1 0;
0 0 1 0 0;
];
num_template=[num0,num1,num2,num3,num4,num5,num6,num7,num8,num9];
%num recog
t_max=0;
for k=1:cnt-1
for i=1:10
t=sum( sum( ~xor( num(1:8,(k-1)*5+1:(k-1)*5+5),num_template(1:8,(i-1)*5+1:(i-1)*5+5) ) ) );
if(t>t_max)
t_max=t;
result(k)=i-1;
end
end
t_max=0;
end
disp(result);
toc;
最后提取每个字符的特征值,来与模板进行匹配,最后得到每个字符结果如图是正确的。再用其他的模板测试一下,可以发现,检测的结果也是完全正确的。
还注意到,完成这一张图像的处理和识别用了40s时间,这对于实时图像处理是无法接受的,因此接下来需要将其移植到ZYNQ来实现。