from PIL import Image
import os.path
import re
import pytesseract
pic_content=pytesseract.image_to_string(Image.open(r’D:\IMG\1.png’),lang=‘chi_sim’)
print(pic_content)
f = open(‘D:/testtxt/test.txt’,‘w’)
f.write(pic_content)
f.close()
allFile = []
def eachFile(filepath):
pathDir = os.listdir(filepath)
for allDir in pathDir:
child = os.path.join(’%s%s’ % (filepath, allDir))
# return child
if os.path.isfile(child):
allFile.append(child)
# return child
# print(child.decode('gbk')) # .decode('gbk')是解决中文显示乱码问题
continue
# eachFile(child)
eachFile(‘D:\IMG’)
for one in allFile:
pic_content = pytesseract.image_to_string(Image.open(one),lang=‘chi_sim’)
with open(‘D:/testtxt/1allFile.txt’, ‘a’) as f:
f.write(f’\n-----------------------------------------------------------------\n’)
f.write(pic_content)
with open(‘D:/testtxt/test.txt’, ‘w’) as f:
f.write(pic_content)