目录
一、使用场景
二、语言背景
三、环境搭建
四、程序结构
五、代码
一、使用场景
公司一直以点阵屏显示为业务。最近希望替换原有的点阵字库,转用FreeType渲染矢量字形,且需要支持阿拉伯语。验证可行性阶段因为python的各种库用起来相当舒服,所以就先用Python进行验证,通过后再转C++实现。
二、语言背景
阿拉伯语与常规语言不同,它属于复杂文本语言。它有以下3个特点:
1.阅读顺序从右往左
2.字符在词前中后有不同写法
3.带有修饰符号
因为阿拉伯文有以上特点,所以不能单纯的一个字符一个字符的读取并渲染,在对字符串渲染之前需要经过一次特殊处理,转成正确的Unicode码串后再使用FreeTyoe对转换后的Unicode码串进行渲染。
三、环境搭建
这里需要用到3个Python库
1.numpy
引用原因:方便对点阵数组的操作, 而且Freetype好像也用到了
安装命令:pip install numpy
2.freetype-py
引用原因:矢量字体渲染模块
安装命令:pip install freetype-py
3.matplotlib
引用原因:显示渲染结果,如果需要在别的地方显示,可以不需要
安装命令:pip install matplotlib
四、程序结构
首先,对多语言的渲染大致分为两个模块,解析模块和渲染模块。解析模块用于处理原始Unicode码;渲染模块根据Unicode码取字模并合成点阵数组。
流程大致如下:原始字符串 -> 解析模块 -> 处理后Unicode码串 -> 渲染模块 -> 点阵数组 -> 显示。
这里的解析模块本人是自己写,但是推荐使用HarfBuzz。
这里放个用matplotlib显示的效果
五、 代码
解析模块
class ArabicText(object):
# first, last, middle, alone
__arabic_Positions=[[ 0xfe80, 0xfe80, 0xfe80, 0xfe80], #0x621
[ 0xfe82, 0xfe81, 0xfe82, 0xfe81],
[ 0xfe84, 0xfe83, 0xfe84, 0xfe83],
[ 0xfe86, 0xfe85, 0xfe86, 0xfe85],
[ 0xfe88, 0xfe87, 0xfe88, 0xfe87],
[ 0xfe8a, 0xfe8b, 0xfe8c, 0xfe89],
[ 0xfe8e, 0xfe8d, 0xfe8e, 0xfe8d],
[ 0xfe90, 0xfe91, 0xfe92, 0xfe8f],
[ 0xfe94, 0xfe93, 0xfe94, 0xfe93],
[ 0xfe96, 0xfe97, 0xfe98, 0xfe95],
[ 0xfe9a, 0xfe9b, 0xfe9c, 0xfe99],
[ 0xfe9e, 0xfe9f, 0xfea0, 0xfe9d],
[ 0xfea2, 0xfea3, 0xfea4, 0xfea1],
[ 0xfea6, 0xfea7, 0xfea8, 0xfea5],
[ 0xfeaa, 0xfea9, 0xfeaa, 0xfea9],
[ 0xfeac, 0xfeab, 0xfeac, 0xfeab],
[ 0xfeae, 0xfead, 0xfeae, 0xfead],
[ 0xfeb0, 0xfeaf, 0xfeb0, 0xfeaf],
[ 0xfeb2, 0xfeb3, 0xfeb4, 0xfeb1],
[ 0xfeb6, 0xfeb7, 0xfeb8, 0xfeb5],
[ 0xfeba, 0xfebb, 0xfebc, 0xfeb9],
[ 0xfebe, 0xfebf, 0xfec0, 0xfebd],
[ 0xfec2, 0xfec3, 0xfec4, 0xfec1],
[ 0xfec6, 0xfec7, 0xfec8, 0xfec5],
[ 0xfeca, 0xfecb, 0xfecc, 0xfec9],
[ 0xfece, 0xfecf, 0xfed0, 0xfecd],
[ 0x63b, 0x63b, 0x63b, 0x63b],
[ 0x63c, 0x63c, 0x63c, 0x63c],
[ 0x63d, 0x63d, 0x63d, 0x63d],
[ 0x63e, 0x63e, 0x63e, 0x63e],
[ 0x63f, 0x63f, 0x63f, 0x63f],
[ 0x640, 0x640, 0x640, 0x640],
[ 0xfed2, 0xfed3, 0xfed4, 0xfed1],
[ 0xfed6, 0xfed7, 0xfed8, 0xfed5],
[ 0xfeda, 0xfedb, 0xfedc, 0xfed9],
[ 0xfede, 0xfedf, 0xfee0, 0xfedd],
[ 0xfee2, 0xfee3, 0xfee4, 0xfee1],
[ 0xfee6, 0xfee7, 0xfee8, 0xfee5],
[ 0xfeea, 0xfeeb, 0xfeec, 0xfee9],
[ 0xfeee, 0xfeed, 0xfeee, 0xfeed],
[ 0xfef0, 0xfef3, 0xfef4, 0xfeef],
[0xfef2, 0xfef3, 0xfef4, 0xfef1]]
__preSet = [0x62c, 0x62d, 0x62e, 0x647, 0x639, 0x63a, 0x641, 0x642,
0x62b, 0x635, 0x636, 0x637, 0x643, 0x645, 0x646, 0x62a,
0x644, 0x628, 0x64a, 0x633, 0x634, 0x638, 0x626, 0x640]
__nextSet = [0x62c, 0x62d, 0x62e, 0x647, 0x639, 0x63a, 0x641, 0x642,
0x62b, 0x635, 0x636, 0x637, 0x643, 0x645, 0x646, 0x62a,
0x644, 0x628, 0x64a, 0x633, 0x634, 0x638, 0x626,
0x627, 0x623, 0x625, 0x622, 0x62f, 0x630, 0x631, 0x632,
0x648, 0x624, 0x629, 0x649, 0x640]
__replaceSet = [[0xFEF5,0xFEF6],[0xFEF7,0xFEF8],[0xFEF9,0xFEFA],[0xFEFB,0xFEFC]]
# 将传入的字符串转换为显示时的数组,显示时用FreeType直接取数组中的每一个值进行排版显示即可
# 返回前已经将阿拉伯文倒置(阿拉伯文从右往左书写)
@staticmethod
def Translate(text):
retArr = []
textLen = len(text)
lastIdx = -3 # 上一个非阿拉伯字符所在下标
begIdxs = [] # 非阿拉伯字符串开始下标集合
endIdxs = [] # 非阿拉伯字符串结束下标集合
for i in range(0,textLen):
charCode = ord(text[i])
# 非阿拉伯语字符直接添加
if charCode not in range(0x621,0x6ff):
retArr.append(charCode)
arrLen = len(retArr)
# 不连续
if arrLen - 1 != lastIdx + 1:
begIdxs.append(arrLen - 1)
# 最后一个字符是非阿拉伯字符
if i == textLen - 1:
endIdxs.append(arrLen - 1)
lastIdx = arrLen - 1
continue
else:
arrLen = len(retArr)
# 当前阿拉伯字符的前一个字符是非阿拉伯字符
if lastIdx == arrLen-2:
endIdxs.append(lastIdx)
#----rule 1----
# 前一个字符的Unicode码
preCh = (0 if (i==0) else ord(text[i-1])) # preCh = i==0 ? 0 : (int)text[i-1]
# 当前字符的Unicode码
ch = charCode # ch = (int)text[i]
# 后一个字符的Unicode码
nextCh = (0 if(i==(textLen-1)) else ord(text[i+1])) # nextCh = i == (textLen-1) ? 0 : (int)text[i+1]
val = ArabicText.__GetTransform(preCh,ch,nextCh)
retArr.append(val)
#----rule 2----
replace = ArabicText.__GetContinuousWriting(preCh,ch,nextCh)
if replace > 0:
retArr.append(replace)
i = i + 2
# 阿拉伯文从右往左显示,所以要把结果反过来
retArr.reverse()
ArabicText.__NonArabicReverse(retArr,begIdxs,endIdxs)
return retArr
# 处理非阿拉伯字符,非阿拉伯字符不用反转,这里把他们再反回来
@classmethod
def __NonArabicReverse(cls,charArr=[],begIdxs=[],endIdxs=[]):
lastIdx = len(charArr) - 1 # 最后一个下标
loopCnt = len(begIdxs)
for i in range(0,loopCnt):
beg = (lastIdx - endIdxs[i])
end = (lastIdx - begIdxs[i])
switchTimes = int((end + 1 - beg)/2)
for j in range(0,switchTimes):
temp = charArr[beg+j]
charArr[beg+j] = charArr[end-j]
charArr[end-j] = temp
# 处理连写字符 某些情况下需要将后续两个字符替换成其他字符
@classmethod
def __GetContinuousWriting(cls,preCh=0,ch=0,nextCh=0):
retVal = 0
nextChArr = [0x622,0x623,0x625,0x627]
positionIdx = -1
charIdx = 0
if (ch == 0x644) and (nextCh in nextChArr):
charIdx = nextChArr.index(nextCh)
if preCh in cls.__preSet:
positionIdx = 1
else:
positionIdx = 0
retVal = cls.__replaceSet[charIdx][positionIdx]
return retVal
# 处理字符因前连写后连写的变形
@classmethod
def __GetTransform(cls,preCh=0,ch=0,nextCh=0):
preConnect = False
nextConnect = False
positionIdx = -1
charIdx = 0
# 是前连字符
if preCh in cls.__preSet:
preConnect = True
positionIdx = 0
# 是后连字符
if nextCh in cls.__nextSet:
nextConnect = True
positionIdx = 1
# 既是前连又是后连,等于在中间
if preConnect and nextConnect:
positionIdx = 2
# 不是前连又不是后连,等于要单独显示
elif (preConnect == False) and (nextConnect == False):
positionIdx = 3
charIdx = ch - 0x621
retVal = cls.__arabic_Positions[charIdx][positionIdx]
return retVal
入口和渲染模块
# -*- coding: utf-8 -*-
import freetype
import numpy
import matplotlib.pyplot as plt
import ArabicTextHelper as Arabic
def main():
text = u'شبح ، شبح الشيوعية ، يتجول في جميع أنحاء القارة الأوروبية'
textArr = []
# 处理原始字符串,生成转换后的数组
textArr = Arabic.ArabicText.Translate(text=text)
# 显示转换后的数组
FreeTypeDisplay(textArr,0x33,0xe4,0xff)
def FreeTypeDisplay(textArr=[],R=255,G=255,B=255):
RGB = [('R',numpy.uint8), ('G',numpy.uint8), ('B',numpy.uint8)]
face = freetype.Face('Fonts/ARIALUNI.TTF')
face.set_char_size( 48*64 )
slot = face.glyph
# First pass to compute bbox
width, height, = 0, 0
previous = 0
# 计算总宽高
for c in textArr:
face.load_char(c)
bitmap = slot.bitmap
height = max(height, (face.size._FT_Size_Metrics.height >> 6))
kerning = face.get_kerning(previous, c)
width += (slot.advance.x >> 6) + (kerning.x >> 6)
previous = c
imgBuf = numpy.zeros((height,width), dtype=numpy.ubyte)
colorBuf = numpy.zeros((height,width),dtype=RGB)
# Second pass for actual rendering
xBeg, yBeg = 0, 0
previous = 0
# 把每个字添加到imgBuf里
for c in textArr:
face.load_char(c)
# 可以理解为校正值
descender = (-face.size._FT_Size_Metrics.descender) >> 6
bitmap = slot.bitmap
#基线到字模顶部的距离
top = slot.bitmap_top
w = bitmap.width
h = bitmap.rows
yBeg = height - top - descender
# 间隔
kerning = face.get_kerning(previous, c)
xBeg += (kerning.x >> 6)
newChar = numpy.array(bitmap.buffer, dtype='ubyte').reshape(h,w)
yEnd = yBeg+h
xEnd = xBeg+w
# 添加到imgBuf中
imgBuf[yBeg:yEnd,xBeg:xEnd] += newChar
xBeg += (slot.advance.x >> 6)
previous = c
FillColor(imgBuf,colorBuf,R,G,B)
# 显示imgBuf的内容
plt.figure(figsize=(10, 10*imgBuf.shape[0]/float(imgBuf.shape[1])))
showing = colorBuf.view(dtype=numpy.uint8).reshape(colorBuf.shape[0],colorBuf.shape[1],3)
plt.imshow(showing, interpolation='nearest', origin='upper')
plt.xticks([]), plt.yticks([])
plt.show()
def FillColor(srcBuf,colorBuf,R,G,B):
rows = srcBuf.shape[0]
columns = srcBuf.shape[1]
for y in range(0,rows):
for x in range(0,columns):
if srcBuf[y][x] > 0:
colorBuf[y][x] = (R,G,B)
if __name__ == '__main__':
main()