图片去黑边(只考虑了去水平方向上的黑边)的核心算法是要找到图片顶部或顶部的黑边位置,即两个纵坐标值, 主要用到了canny边缘计算、
houghlines直线检测、easyocr识别等算法。

给图片去黑边的实现逻辑为:

  1. 先进行canny边缘计算,再进行houghlines直线检测,取出图片的水平边缘 如果没有找到水平边缘,那么不做处理
  2. 对目标水平边缘进行过滤和分类
    过滤逻辑是: 一侧为黑色,另一侧非黑色
    分类逻辑是:
    上边是黑色,下边是非黑色的,且位于图片水平中线以上,作为候选上边缘;
    上边是非黑色,下边是黑色的,,且位于图片水平中线以下,作为候选下边缘
  3. 对候选的上下边缘从外向内逐一校验,校验标准是边缘之外不应存在文字(因为图片上的文字对于图片也是有意义的) 也不应存在高度超过一定阈值的元素, 从而得出符合条件且最靠内侧的上下边缘
    如果找不到符合条件的上边缘,那么上边缘就是0
    如果找不到符合条件的下边缘,那么下边缘就是图片高度-1
  4. 根据找出的上线边缘对原图进行裁剪
import cv2
import numpy as np
import easyocr


def isPixelBlack(pixel):
    return pixel[0] <= 10 and pixel[1] <= 10 and pixel[2] <= 10


def checkLineIsBlack(img, width, y):
    midX = int((width - 1) / 2)
    pixel = img[y, midX]
    if not isPixelBlack(pixel):
        return False

    for x in range(1, midX + 1):
        if midX - x >= 0:
            leftPixel = img[y, midX - x]
            if not isPixelBlack(leftPixel):
                return False

        if midX + x < width:
            rightPixel = img[y, midX + x]
            if not isPixelBlack(rightPixel):
                return False

    return True


def computeBlackPixelNum(img, fromY, toY, x):
    totalNum = 0
    for y in range(fromY, toY):
        curPixel = img[y, x]
        if isPixelBlack(curPixel):
            totalNum += 1
    return totalNum


# 对于接近顶部或底部的边缘忽略;对于中线附近的边缘也忽略;
def isLevelLineNeedIgnore(height, y):
    if y <= 50 or height - 1 - y <= 50:
        return True

    # 判断y是否介于3/8 到 5/8 的高度之间
    midZoneStart = int(0.4 * height)
    midZoneEnd = int(0.6 * height)

    if y >= midZoneStart and y <= midZoneEnd:
        return True

    return False


# 将宽度的1/6视作最小线段长度
def getMinLineLength(width):
    return int(width / 10)


def computeValidFlag(valid_flag_list, left, right):
    sum = 0
    for index in range(left, right):
        if valid_flag_list[index] > 0:
            sum += 1
    if sum <= 5:
        return 0
    return sum


# 计算水平线的边缘类型: 0=无效   1=潜在的上边缘   2=潜在的下边缘  3 潜在的边缘
def checkEdgeType(valid_flag_list, y, height, init):
    midY = int(height / 2)

    aboveFlag = computeValidFlag(valid_flag_list, max(0, y - 10 - init), y - 10)
    belowFlag = computeValidFlag(valid_flag_list, y + 10, min(y + 10 + init, height - 1))

    if aboveFlag > 0 and belowFlag > 0:
        return 0
    elif aboveFlag > 0 and belowFlag == 0 and y > midY:
        return 2
    elif aboveFlag == 0 and belowFlag > 0 and y < midY:
        return 1
    elif aboveFlag == 0 and belowFlag == 0:
        return 3
    return 0


# 挑选合适的上边缘
def pickOutFinalTopY(img, height, width, valid_topY_array, valid_flag_list, reader):
    bestTopY = 0

    matchedTopY = []
    otherTopY = []
    for currentY in valid_topY_array:
        validFlagNum = computeValidFlag(valid_flag_list, 0, currentY - 2)
        if validFlagNum <= 20:
            matchedTopY.append(currentY)
        else:
            otherTopY.append(currentY)

    if len(otherTopY) == 0:
        return matchedTopY[0]
    else:
        matchedTopY.sort()
        if len(matchedTopY) > 0:
            bestTopY = matchedTopY[len(matchedTopY) - 1]
        # 将topY列表升序排列, 逐一验证是否符合条件
        valid_topY_array.sort()
        midX = int(width / 2)
        for candidateY in valid_topY_array:
            if candidateY < bestTopY:
                continue

            sumFlag = computeValidFlag(valid_flag_list, 0, candidateY)
            if sumFlag > 100:
                break

            sumBlack = computeBlackPixelNum(img, 0, candidateY, midX)
            if sumBlack > 100:
                break

            # ocr读取 (0,candidateY) 范围内的子图, 判断是否包含有文字
            # 如果包含了文字,那么就不符合条件
            roi = img[0:candidateY, 0:width]
            result = reader.readtext(roi)
            if len(result) > 0:
                break

            bestTopY = candidateY

    return bestTopY


def pickOutFinalEndY(img, height, width, valid_endY_array, valid_flag_list, reader):
    bestEndY = height - 1

    matchedEndY = []
    otherEndY = []
    for currentY in valid_endY_array:
        validFlagNum = computeValidFlag(valid_flag_list, currentY + 2, height)
        if validFlagNum <= 20:
            matchedEndY.append(currentY)
        else:
            otherEndY.append(currentY)

    if len(otherEndY) == 0:
        return matchedEndY[0]
    else:
        matchedEndY.sort(reverse=True)
        if len(matchedEndY) > 0:
            bestEndY = matchedEndY[0]

        # 将endY列表降序排列, 逐一验证是否符合条件
        valid_endY_array.sort(reverse=True)
        midX = int(width / 2)
        for candidateY in valid_endY_array:
            if candidateY > bestEndY:
                continue

            sum = computeValidFlag(valid_flag_list, candidateY, height)
            if sum > 100:
                break

            sumBlack = computeBlackPixelNum(img, candidateY, height, midX)
            if sumBlack > 100:
                break

            # ocr读取 (candidateY,height) 范围内的子图, 判断是否包含有文字
            # 如果包含了文字,那么就不符合条件
            roi = img[candidateY:height, 0:width]
            result = reader.readtext(roi)
            if len(result) > 0:
                break

            bestEndY = candidateY

    return bestEndY


def computeTopAndEnd(img, height, width, valid_flag_list, level_lines, reader):
    # 1.过滤出有效的边缘
    valid_topY_array = []
    valid_endY_array = []

    midY = int(height / 2)
    for level_line in level_lines:
        x1, y, x2, y2 = level_line[0]

        # 临时划线
        # cv2.line(img, (0, y), (width - 1, y), (0, 0, 255), 1)

        # 先判断是否是有效的边缘,如果是有效的边缘, 再放入候选集合中
        edgeType = checkEdgeType(valid_flag_list, y, height, 50)
        if edgeType == 0:
            continue
        elif edgeType == 1:
            valid_topY_array.append(y)
        elif edgeType == 2:
            valid_endY_array.append(y)
        elif edgeType == 3:
            if y > midY:
                valid_endY_array.append(y)
            elif y < midY:
                valid_topY_array.append(y)

    if len(valid_topY_array) <= 0 and len(valid_endY_array) <= 0:
        return 0, height - 1

    # 2.判断有效的边缘是否可以上边缘或下边缘(这个步骤里可能会用到ocr技术)
    finalTopY = 0
    finalEndY = height - 1
    if len(valid_topY_array) > 0:
        finalTopY = pickOutFinalTopY(img, height, width, valid_topY_array, valid_flag_list, reader)

    if len(valid_endY_array) > 0:
        finalEndY = pickOutFinalEndY(img, height, width, valid_endY_array, valid_flag_list, reader)

    # 3.返回上下黑边纵坐标
    return finalTopY, finalEndY


# 对于无边缘的纵坐标, 重新计算该纵坐标上是否存在非黑像素
def recomputeValidFlagList(img, height, width, valid_flag_list):
    for y in range(0, height):
        if valid_flag_list[y] == 0:
            lineBlackFlag = checkLineIsBlack(img, width, y)
            if not lineBlackFlag:
                valid_flag_list[y] = 1


def recognizeImageValidZone(imagePath, reader):
    # 读取图片
    img = cv2.imread(imagePath)
    # 获取图像尺寸
    height, width = img.shape[:2]

    edges = cv2.Canny(img, 100, 200)
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength=getMinLineLength(width), maxLineGap=10)

    if lines is None:
        print(imagePath + "不存在直线")
        return 0, height - 1

    levelLines = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        if y1 != y2:
            continue

        if isLevelLineNeedIgnore(height, y1):
            continue
        # print(f"水平直线===================={y1}")
        # cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        levelLines.append(line)

    if len(levelLines) == 0:
        print(imagePath + "-----不存在水平直线")
        return 0, height - 1

    # 计算标识数组,用于标识各行是否存在非黑像素
    valid_flag_list = [0 for _ in range(height)]
    # 遍历边缘检测后的图像,找到边缘像素的坐标
    for y in range(edges.shape[0]):
        for x in range(edges.shape[1]):
            if edges[y][x] != 0:  # 如果当前像素不是背景(即边缘)
                valid_flag_list[y] = 1
                break

    recomputeValidFlagList(img, height, width, valid_flag_list)

    return computeTopAndEnd(img, height, width, valid_flag_list, levelLines, reader)


def doDropForImage(srcDir, srcFile, targetDir, reader):
    # 读取图片
    img = cv2.imread(srcDir + srcFile)
    # 获取图像尺寸
    height, width = img.shape[:2]
    # 获取起止的纵坐标
    startY, overY = recognizeImageValidZone(srcDir + srcFile, reader)
    crop_img = img[startY:overY + 1, 0:width]
    cv2.imwrite(targetDir + srcFile + "_dealed.jpg", crop_img)


def preDropForImage(srcDir, srcFile, targetDir, reader):
    # 读取图片
    img = cv2.imread(srcDir + srcFile)
    # 获取图像尺寸
    height, width = img.shape[:2]
    # 获取起止的纵坐标
    startY, overY = recognizeImageValidZone(srcDir + srcFile, reader)
    # 标记一下图片边缘
    if startY != 0:
        cv2.line(img, (0, startY), (width - 1, startY), (0, 255, 0), 2)
    if overY != height - 1:
        cv2.line(img, (0, overY), (width - 1, overY), (0, 255, 0), 2)

    if startY == 0 and overY == height - 1:
        cv2.imwrite(targetDir + 'unchanged/' + srcFile + "_dealed.jpg", img)
    else:
        cv2.imwrite(targetDir + 'changed/' + srcFile, img)


reader = easyocr.Reader(['ch_sim', 'en'], gpu=False)
preDropForImage('E:/black/sample_images_black/', "1.jpg", 'E:/black/success_dealed/', reader)