图片去黑边(只考虑了去水平方向上的黑边)的核心算法是要找到图片顶部或顶部的黑边位置,即两个纵坐标值, 主要用到了canny边缘计算、
houghlines直线检测、easyocr识别等算法。
给图片去黑边的实现逻辑为:
- 先进行canny边缘计算,再进行houghlines直线检测,取出图片的水平边缘 如果没有找到水平边缘,那么不做处理
- 对目标水平边缘进行过滤和分类
过滤逻辑是: 一侧为黑色,另一侧非黑色
分类逻辑是:
上边是黑色,下边是非黑色的,且位于图片水平中线以上,作为候选上边缘;
上边是非黑色,下边是黑色的,,且位于图片水平中线以下,作为候选下边缘 - 对候选的上下边缘从外向内逐一校验,校验标准是边缘之外不应存在文字(因为图片上的文字对于图片也是有意义的) 也不应存在高度超过一定阈值的元素, 从而得出符合条件且最靠内侧的上下边缘
如果找不到符合条件的上边缘,那么上边缘就是0
如果找不到符合条件的下边缘,那么下边缘就是图片高度-1 - 根据找出的上线边缘对原图进行裁剪
import cv2
import numpy as np
import easyocr
def isPixelBlack(pixel):
return pixel[0] <= 10 and pixel[1] <= 10 and pixel[2] <= 10
def checkLineIsBlack(img, width, y):
midX = int((width - 1) / 2)
pixel = img[y, midX]
if not isPixelBlack(pixel):
return False
for x in range(1, midX + 1):
if midX - x >= 0:
leftPixel = img[y, midX - x]
if not isPixelBlack(leftPixel):
return False
if midX + x < width:
rightPixel = img[y, midX + x]
if not isPixelBlack(rightPixel):
return False
return True
def computeBlackPixelNum(img, fromY, toY, x):
totalNum = 0
for y in range(fromY, toY):
curPixel = img[y, x]
if isPixelBlack(curPixel):
totalNum += 1
return totalNum
# 对于接近顶部或底部的边缘忽略;对于中线附近的边缘也忽略;
def isLevelLineNeedIgnore(height, y):
if y <= 50 or height - 1 - y <= 50:
return True
# 判断y是否介于3/8 到 5/8 的高度之间
midZoneStart = int(0.4 * height)
midZoneEnd = int(0.6 * height)
if y >= midZoneStart and y <= midZoneEnd:
return True
return False
# 将宽度的1/6视作最小线段长度
def getMinLineLength(width):
return int(width / 10)
def computeValidFlag(valid_flag_list, left, right):
sum = 0
for index in range(left, right):
if valid_flag_list[index] > 0:
sum += 1
if sum <= 5:
return 0
return sum
# 计算水平线的边缘类型: 0=无效 1=潜在的上边缘 2=潜在的下边缘 3 潜在的边缘
def checkEdgeType(valid_flag_list, y, height, init):
midY = int(height / 2)
aboveFlag = computeValidFlag(valid_flag_list, max(0, y - 10 - init), y - 10)
belowFlag = computeValidFlag(valid_flag_list, y + 10, min(y + 10 + init, height - 1))
if aboveFlag > 0 and belowFlag > 0:
return 0
elif aboveFlag > 0 and belowFlag == 0 and y > midY:
return 2
elif aboveFlag == 0 and belowFlag > 0 and y < midY:
return 1
elif aboveFlag == 0 and belowFlag == 0:
return 3
return 0
# 挑选合适的上边缘
def pickOutFinalTopY(img, height, width, valid_topY_array, valid_flag_list, reader):
bestTopY = 0
matchedTopY = []
otherTopY = []
for currentY in valid_topY_array:
validFlagNum = computeValidFlag(valid_flag_list, 0, currentY - 2)
if validFlagNum <= 20:
matchedTopY.append(currentY)
else:
otherTopY.append(currentY)
if len(otherTopY) == 0:
return matchedTopY[0]
else:
matchedTopY.sort()
if len(matchedTopY) > 0:
bestTopY = matchedTopY[len(matchedTopY) - 1]
# 将topY列表升序排列, 逐一验证是否符合条件
valid_topY_array.sort()
midX = int(width / 2)
for candidateY in valid_topY_array:
if candidateY < bestTopY:
continue
sumFlag = computeValidFlag(valid_flag_list, 0, candidateY)
if sumFlag > 100:
break
sumBlack = computeBlackPixelNum(img, 0, candidateY, midX)
if sumBlack > 100:
break
# ocr读取 (0,candidateY) 范围内的子图, 判断是否包含有文字
# 如果包含了文字,那么就不符合条件
roi = img[0:candidateY, 0:width]
result = reader.readtext(roi)
if len(result) > 0:
break
bestTopY = candidateY
return bestTopY
def pickOutFinalEndY(img, height, width, valid_endY_array, valid_flag_list, reader):
bestEndY = height - 1
matchedEndY = []
otherEndY = []
for currentY in valid_endY_array:
validFlagNum = computeValidFlag(valid_flag_list, currentY + 2, height)
if validFlagNum <= 20:
matchedEndY.append(currentY)
else:
otherEndY.append(currentY)
if len(otherEndY) == 0:
return matchedEndY[0]
else:
matchedEndY.sort(reverse=True)
if len(matchedEndY) > 0:
bestEndY = matchedEndY[0]
# 将endY列表降序排列, 逐一验证是否符合条件
valid_endY_array.sort(reverse=True)
midX = int(width / 2)
for candidateY in valid_endY_array:
if candidateY > bestEndY:
continue
sum = computeValidFlag(valid_flag_list, candidateY, height)
if sum > 100:
break
sumBlack = computeBlackPixelNum(img, candidateY, height, midX)
if sumBlack > 100:
break
# ocr读取 (candidateY,height) 范围内的子图, 判断是否包含有文字
# 如果包含了文字,那么就不符合条件
roi = img[candidateY:height, 0:width]
result = reader.readtext(roi)
if len(result) > 0:
break
bestEndY = candidateY
return bestEndY
def computeTopAndEnd(img, height, width, valid_flag_list, level_lines, reader):
# 1.过滤出有效的边缘
valid_topY_array = []
valid_endY_array = []
midY = int(height / 2)
for level_line in level_lines:
x1, y, x2, y2 = level_line[0]
# 临时划线
# cv2.line(img, (0, y), (width - 1, y), (0, 0, 255), 1)
# 先判断是否是有效的边缘,如果是有效的边缘, 再放入候选集合中
edgeType = checkEdgeType(valid_flag_list, y, height, 50)
if edgeType == 0:
continue
elif edgeType == 1:
valid_topY_array.append(y)
elif edgeType == 2:
valid_endY_array.append(y)
elif edgeType == 3:
if y > midY:
valid_endY_array.append(y)
elif y < midY:
valid_topY_array.append(y)
if len(valid_topY_array) <= 0 and len(valid_endY_array) <= 0:
return 0, height - 1
# 2.判断有效的边缘是否可以上边缘或下边缘(这个步骤里可能会用到ocr技术)
finalTopY = 0
finalEndY = height - 1
if len(valid_topY_array) > 0:
finalTopY = pickOutFinalTopY(img, height, width, valid_topY_array, valid_flag_list, reader)
if len(valid_endY_array) > 0:
finalEndY = pickOutFinalEndY(img, height, width, valid_endY_array, valid_flag_list, reader)
# 3.返回上下黑边纵坐标
return finalTopY, finalEndY
# 对于无边缘的纵坐标, 重新计算该纵坐标上是否存在非黑像素
def recomputeValidFlagList(img, height, width, valid_flag_list):
for y in range(0, height):
if valid_flag_list[y] == 0:
lineBlackFlag = checkLineIsBlack(img, width, y)
if not lineBlackFlag:
valid_flag_list[y] = 1
def recognizeImageValidZone(imagePath, reader):
# 读取图片
img = cv2.imread(imagePath)
# 获取图像尺寸
height, width = img.shape[:2]
edges = cv2.Canny(img, 100, 200)
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength=getMinLineLength(width), maxLineGap=10)
if lines is None:
print(imagePath + "不存在直线")
return 0, height - 1
levelLines = []
for line in lines:
x1, y1, x2, y2 = line[0]
if y1 != y2:
continue
if isLevelLineNeedIgnore(height, y1):
continue
# print(f"水平直线===================={y1}")
# cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
levelLines.append(line)
if len(levelLines) == 0:
print(imagePath + "-----不存在水平直线")
return 0, height - 1
# 计算标识数组,用于标识各行是否存在非黑像素
valid_flag_list = [0 for _ in range(height)]
# 遍历边缘检测后的图像,找到边缘像素的坐标
for y in range(edges.shape[0]):
for x in range(edges.shape[1]):
if edges[y][x] != 0: # 如果当前像素不是背景(即边缘)
valid_flag_list[y] = 1
break
recomputeValidFlagList(img, height, width, valid_flag_list)
return computeTopAndEnd(img, height, width, valid_flag_list, levelLines, reader)
def doDropForImage(srcDir, srcFile, targetDir, reader):
# 读取图片
img = cv2.imread(srcDir + srcFile)
# 获取图像尺寸
height, width = img.shape[:2]
# 获取起止的纵坐标
startY, overY = recognizeImageValidZone(srcDir + srcFile, reader)
crop_img = img[startY:overY + 1, 0:width]
cv2.imwrite(targetDir + srcFile + "_dealed.jpg", crop_img)
def preDropForImage(srcDir, srcFile, targetDir, reader):
# 读取图片
img = cv2.imread(srcDir + srcFile)
# 获取图像尺寸
height, width = img.shape[:2]
# 获取起止的纵坐标
startY, overY = recognizeImageValidZone(srcDir + srcFile, reader)
# 标记一下图片边缘
if startY != 0:
cv2.line(img, (0, startY), (width - 1, startY), (0, 255, 0), 2)
if overY != height - 1:
cv2.line(img, (0, overY), (width - 1, overY), (0, 255, 0), 2)
if startY == 0 and overY == height - 1:
cv2.imwrite(targetDir + 'unchanged/' + srcFile + "_dealed.jpg", img)
else:
cv2.imwrite(targetDir + 'changed/' + srcFile, img)
reader = easyocr.Reader(['ch_sim', 'en'], gpu=False)
preDropForImage('E:/black/sample_images_black/', "1.jpg", 'E:/black/success_dealed/', reader)