这个版本已有使用价值。如果要求不高,基本满足需要。一并贴出来,也对比一下。
这个版本的汉字库已纳入程序资源里头,大小为324K。
主要代码如下:
Namespace
Businness.PinYin
Public
Class
SpellInformation
Private
gTone
As
String
Private
gSpellInput
As
String
Private
gSpellExpress
As
String
'
'' <summary>
'
'' 声调
'
'' </summary>
Public
Property
Tone()
As
String
Get
Return
gTone
End
Get
Set
(
ByVal
value
As
String
)
gTone
=
value.Trim
End
Set
End Property
'
'' <summary>
'
'' 拼音输入码
'
'' </summary>
Public
Property
SpellInput()
As
String
Get
Return
gSpellInput
End
Get
Set
(
ByVal
value
As
String
)
gSpellInput
=
value.Trim
End
Set
End Property
'
'' <summary>
'
'' 拼音
'
'' </summary>
Public
Property
SpellExpress()
As
String
Get
Return
gSpellExpress
End
Get
Set
(
ByVal
value
As
String
)
gSpellExpress
=
value.Trim
End
Set
End Property
Sub
New
()
End Sub
'
'' <param name="tone">声调</param>
'
'' <param name="spellinput">拼音输入码</param>
'
'' <param name="spellexpress">拼音</param>
Sub
New
(
ByVal
tone
As
String
,
ByVal
spellinput
As
String
,
ByVal
spellexpress
As
String
)
Me
.Tone
=
tone
Me
.SpellInput
=
spellinput
Me
.SpellExpress
=
spellexpress
End Sub
'
'' <param name="spellexpress">拼音</param>
Public
Overrides
Function
Equals(
ByVal
spellexpress
As
Object
)
As
Boolean
Return
Me
.SpellExpress.Equals(spellexpress)
End Function
Public
Overrides
Function
ToString()
As
String
Return
String
.Format(
"
拼音码 {0,-6} 声调 {1} 拼音 {2}
"
,
Me
.SpellInput,
Me
.Tone,
Me
.SpellExpress)
End Function
End Class
End Namespace
Namespace
Businness.PinYin
Public
Class
Word
Private
gValue
As
String
'
'' <param name="word">单个汉字</param>
Sub
New
(
ByVal
word
As
String
)
Me
.gValue
=
word
End Sub
'
'' <summary>
'
'' 汉字
'
'' </summary>
Public
ReadOnly
Property
Value()
As
String
Get
Return
gValue
End
Get
End Property
Public
ReadOnly
Property
Code()
As
String
Get
Return
Common.Code(gValue)
End
Get
End Property
Private
gSpellList
As
New
Dictionary(
Of
String
, SpellInformation)
'
'' <summary>
'
'' 拼音集
'
'' </summary>
Public
ReadOnly
Property
Spells()
As
SpellInformation()
Get
Dim
tmp(gSpellList.Count
-
1
)
As
SpellInformation
gSpellList.Values.CopyTo(tmp,
0
)
Return
tmp
End
Get
End Property
'
'' <summary>
'
'' 是否多音字
'
'' </summary>
Public
ReadOnly
Property
IsMutiSpell()
As
Boolean
Get
Return
gSpellList.Count
>
1
End
Get
End Property
Public
Sub
AddSpell(
ByVal
spell
As
SpellInformation)
If
String
.IsNullOrEmpty(spell.SpellExpress)
Then
Exit Sub
If
Me
.gSpellList.ContainsKey(spell.SpellExpress)
Then
Exit Sub
Me
.gSpellList.Add(spell.SpellExpress, spell)
End Sub
Public
Sub
AddSpell(
ByVal
tone
As
String
,
ByVal
spellinput
As
String
,
ByVal
spellexpress
As
String
)
Dim
tmp
As
New
SpellInformation(tone, spellinput, spellexpress)
AddSpell(tmp)
End Sub
Public
ReadOnly
Property
DefaultSpell()
As
SpellInformation
Get
If
Me
.gSpellList.Count
=
0
Then
Return
Nothing
Return
Me
.Spells(
0
)
End
Get
End Property
Public
ReadOnly
Property
DefaultSpellExpress()
As
String
Get
If
DefaultSpell
Is
Nothing
Then
Return
Nothing
Return
Me
.DefaultSpell.SpellExpress
End
Get
End Property
Public
ReadOnly
Property
DefaultSpellInput()
As
String
Get
If
DefaultSpell
Is
Nothing
Then
Return
Nothing
Return
Me
.DefaultSpell.SpellInput
End
Get
End Property
Public
ReadOnly
Property
DefaultTone()
As
String
Get
If
DefaultSpell
Is
Nothing
Then
Return
Nothing
Return
Me
.DefaultSpell.Tone
End
Get
End Property
Public
ReadOnly
Property
AllSpellExpress()
As
String
Get
If
Me
.gSpellList.Count
=
0
Then
Return
Nothing
If
Not
Me
.IsMutiSpell
Then
Return
Me
.DefaultSpellExpress
Dim
tmp(gSpellList.Count
-
1
)
As
String
For
i
As
Integer
=
0
To
gSpellList.Count
-
1
tmp(i)
=
Me
.Spells(i).SpellExpress
Next
Return
String
.Join(
"
"
, tmp)
End
Get
End Property
Public
ReadOnly
Property
AllSpellInput()
As
String
Get
If
Me
.gSpellList.Count
=
0
Then
Return
Nothing
If
Not
Me
.IsMutiSpell
Then
Return
Me
.DefaultSpellInput
Dim
tmp(gSpellList.Count
-
1
)
As
String
For
i
As
Integer
=
0
To
gSpellList.Count
-
1
tmp(i)
=
Me
.Spells(i).SpellInput
Next
Return
String
.Join(
"
"
, tmp)
End
Get
End Property
Public
ReadOnly
Property
AllTone()
As
String
Get
If
Me
.gSpellList.Count
=
0
Then
Return
Nothing
If
Not
Me
.IsMutiSpell
Then
Return
Me
.DefaultTone
Dim
tmp(gSpellList.Count
-
1
)
As
String
For
i
As
Integer
=
0
To
gSpellList.Count
-
1
tmp(i)
=
Me
.Spells(i).Tone
Next
Return
String
.Join(
"
"
, tmp)
End
Get
End Property
Public
Overrides
Function
ToString()
As
String
Dim
mBuilder
As
New
System.Text.StringBuilder
For
Each
spell
As
SpellInformation
In
Me
.Spells
mBuilder.AppendLine(
String
.Concat(
Me
.Value,
"
"
, spell.ToString))
Next
Return
mBuilder.ToString
End Function
End Class
End Namespace
Imports
System.IO
Imports
System.Text.RegularExpressions
Namespace
Businness.PinYin
Public
Class
PYService
Private
gDataSet
As
dsPinYin
'
'' <summary>
'
'' 汉字表
'
'' </summary>
Public
ReadOnly
Property
PinYinTable()
As
dsPinYin.PinYinDataTable
Get
Return
gDataSet.PinYin
End
Get
End Property
'
'' <summary>
'
'' 单个汉字信息
'
'' </summary>
'
'' <param name="word">单个汉字</param>
Public
Function
GetWord(
ByVal
word
As
String
)
As
Word
Dim
mRow
As
dsPinYin.PinYinRow
=
Me
.gDataSet.PinYin.FindBy代码(GetCode(word))
Return
RowConverter(mRow)
End Function
Private
Function
RowConverter(
ByVal
row
As
dsPinYin.PinYinRow)
As
Word
If
row
Is
Nothing
Then
Return
Nothing
Dim
mWord
As
New
Word(row.汉字)
Dim
mSpellExpressArray()
As
String
=
row.拼音.Split(
"
"
c)
Dim
mToneArray()
As
String
=
row.声调.Split(
"
"
c)
Dim
mSpellInputArray()
As
String
=
row.拼音码.Split(
"
"
c)
For
i
As
Integer
=
0
To
mSpellExpressArray.Length
-
1
mWord.AddSpell(mToneArray(i), mSpellInputArray(i), mSpellExpressArray(i))
Next
Return
mWord
End Function
Private
Sub
RowUpdate(
ByVal
word
As
Word)
Dim
mWord
As
Word
=
GetWord(word.Value)
If
mWord
Is
Nothing
Then
Dim
tmpRow
As
dsPinYin.PinYinRow
=
Me
.gDataSet.PinYin.AddPinYinRow(word.Value, word.Code,
""
,
""
,
""
,
True
)
mWord
=
RowConverter(tmpRow)
End
If
For
Each
spell
As
SpellInformation
In
word.Spells
mWord.AddSpell(spell)
Next
Dim
mRow
As
dsPinYin.PinYinRow
=
Me
.gDataSet.PinYin.FindBy代码(mWord.Code)
With
mRow
.拼音码
=
mWord.AllSpellInput
.拼音
=
mWord.AllSpellExpress
.声调
=
mWord.AllTone
.单音
=
Not
mWord.IsMutiSpell
End
With
End Sub
Public
Sub
Load()
UpdateFromTxt()
End Sub
'
文件存放的格式是:汉字,拼音,音调,拼音码
Private
Sub
UpdateFromTxt()
Me
.gDataSet
=
New
dsPinYin
LzmTW.uSystem.uCollections.CommonServices.MoveNext(
Of
String
)(My.Resources.pinyin.Split(
CChar
(vbCrLf)),
AddressOf
Action)
Me
.gDataSet.AcceptChanges()
End Sub
'
文件存放的格式是:汉字,拼音,音调,拼音码
Private
Sub
Action(
ByVal
line
As
String
)
Dim
mArray
As
String
()
mArray
=
line.Split(
"
,
"
c)
If
mArray.Length
<>
4
Then
Exit Sub
Dim
mWord
As
String
=
mArray(
0
).Trim
Dim
mSpellExpress
As
String
=
mArray(
1
).Trim
Dim
mTone
As
String
=
mArray(
2
).Trim
Dim
mSpellInput
As
String
=
mArray(
3
).Trim
Dim
mWordInformation
As
New
Word(mWord)
mWordInformation.AddSpell(mTone, mSpellInput, mSpellExpress)
RowUpdate(mWordInformation)
End Sub
'
'' <summary>
'
'' 将字符串转为拼音
'
'' </summary>
'
'' <param name="line">字符串</param>
'
'' <param name="isgetfirst">如是多音字,取第一个拼音</param>
'
'' <param name="forInput">是则查拼音码,否则查拼音</param>
Public
Function
ToPinyin(
ByVal
line
As
String
,
ByVal
isgetfirst
As
Boolean
,
ByVal
forInput
As
Boolean
)
As
String
Dim
mBuilder
As
New
Text.StringBuilder
For
Each
s
As
Char
In
line.ToCharArray
If
Common.IsSingleWord(s)
Then
mBuilder.Append(GetPinyin(s, isgetfirst, forInput))
Else
mBuilder.Append(s)
End
If
Next
Return
mBuilder.ToString
End Function
Private
Function
GetPinyin(
ByVal
word
As
String
,
ByVal
isgetfirst
As
Boolean
,
ByVal
forInput
As
Boolean
)
As
String
Dim
mResult
As
String
Dim
mWord
As
Word
=
GetWord(word)
If
isgetfirst
Or
Not
mWord.IsMutiSpell
Then
If
forInput
Then
mResult
=
mWord.DefaultSpellInput
Else
mResult
=
mWord.DefaultSpellExpress
End
If
Else
If
forInput
Then
Dim
tmpList(
-
1
)
As
String
For
Each
spell
As
SpellInformation
In
mWord.Spells
If
Array.IndexOf(tmpList, spell.SpellInput)
=
-
1
Then
LzmTW.uSystem.uCollections.CommonServices.Append(tmpList, spell.SpellInput)
End
If
Next
If
tmpList.Length
=
1
Then
mResult
=
mWord.DefaultSpellInput
Else
mResult
=
String
.Format(
"
({0})
"
,
String
.Join(
"
"
, tmpList))
End
If
Else
mResult
=
String
.Format(
"
({0})
"
, mWord.AllSpellExpress)
End
If
End
If
Return
mResult
End Function
'
'' <summary>
'
'' 按拼音查字
'
'' </summary>
'
'' <param name="pinyin">拼音</param>
Public
Function
WordArray(
ByVal
pinyin
As
String
)
As
String
()
Dim
mRows
As
dsPinYin.PinYinRow()
=
CType
(
Me
.gDataSet.PinYin.Select(
String
.Format(
"
拼音码 LIKE '%{0}%'
"
, pinyin)), dsPinYin.PinYinRow())
Dim
mResult(
-
1
)
As
String
For
i
As
Integer
=
0
To
mRows.Length
-
1
If
Array.IndexOf(mRows(i).拼音码.Split(
"
"
c), pinyin)
<>
-
1
Then
LzmTW.uSystem.uCollections.CommonServices.Append(mResult, mRows(i).汉字)
End
If
Next
Return
mResult
End Function
'
'' <summary>
'
'' 按拼音查字
'
'' </summary>
'
'' <param name="pinyin">拼音</param>
Public
Function
Words(
ByVal
pinyin
As
String
)
As
String
Return
String
.Concat(WordArray(pinyin))
End Function
Public
Function
GetCode(
ByVal
word
As
String
)
As
String
Return
Common.Code(word)
End Function
End Class
End Namespace
调用的代码
Public
Class
Form1
Dim
gPinyinService
As
New
LzmTW.Businness.PinYin.PYService
Private
Sub
ButtonLoad_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
ButtonLoad.Click
gPinyinService.Load()
Me
.DataGridView1.DataSource
=
gPinyinService.PinYinTable
End Sub
Private
Sub
ButtonTran_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
ButtonTran.Click
Me
.RichTextBox2.Text
=
gPinyinService.ToPinyin(
Me
.RichTextBox1.Text,
Me
.CheckBox1.Checked,
Me
.CheckBox2.Checked)
End Sub
Private
Sub
ButtonWord_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
ButtonWord.Click
Me
.RichTextBox3.Text
=
gPinyinService.Words(
Me
.TextBoxPinyin.Text)
End Sub
Private
Sub
Button1_Click(
ByVal
sender
As
System.Object,
ByVal
e
As
System.EventArgs)
Handles
Button1.Click
Me
.RichTextBox3.Text
=
gPinyinService.GetWord(
Me
.TextBox1.Text).ToString
End Sub
End Class
效果图:
下载方案:代码
Trackback:
平常中,经常用到汉字转拼音,比如批量生成姓名->拼音作为登录帐号。
这个方法只是简单的利用汉字拼音库。至于怎么找这个库,网上多有介绍。在最后提供下载的方案中也提供了这个库文本文件。
主要代码如下:
Imports
System.IO
Imports
System.Text.RegularExpressions
Namespace
Businness.PinYin
Public
Class
PYService
Private
gDataSet
As
New
dsPinYin
'
'' <summary>
'
'' 汉字表
'
'' </summary>
Public
ReadOnly
Property
PinYinTable()
As
dsPinYin.PinYinDataTable
Get
Return
gDataSet.PinYin
End
Get
End Property
Private
gTxtFile
As
String
=
AppDomain.CurrentDomain.SetupInformation.ApplicationBase
&
"
pinyin.txt
"
Private
gxmlFile
As
String
=
AppDomain.CurrentDomain.SetupInformation.ApplicationBase
&
"
pinyin.xml
"
Private
gRegex
As
New
Regex("(?<Word>^[/u4e00-/u9fa5]+)(?<PingYin>.*)")
'
'' <summary>
'
'' 加载汉字库,文件名为pinyin.xml,在程序当前目录下
'
'' </summary>
Public
Sub
Load()
If
Not
IO.File.Exists(gxmlFile)
Then
Throw
New
Exception(
String
.Format(
"
文件{0}不存在
"
, gxmlFile))
End
If
DataSetInitialize()
gDataSet.ReadXml(gxmlFile)
End Sub
'
'' <summary>
'
'' 从汉字文件中更新,文件名为pinyin.txt,在程序当前目录下
'
'' </summary>
'
'' <remarks></remarks>
Public
Sub
Update()
If
Not
IO.File.Exists(gTxtFile)
Then
Throw
New
Exception(
String
.Format(
"
文件{0}不存在
"
, gTxtFile))
End
If
UpdateFromTxt(gTxtFile)
End Sub
'
'' <summary>
'
'' 保存汉字库,文件为pingyin.xml,在程序当前目录下
'
'' </summary>
'
'' <remarks></remarks>
Public
Sub
Save()
gDataSet.WriteXml(gxmlFile)
End Sub
Private
Sub
DataSetInitialize()
'
在更新或读入时,清除
Me
.gDataSet.Clear()
Me
.gDataSet.AcceptChanges()
End Sub
Private
Sub
UpdateFromTxt(
ByVal
file
As
String
)
DataSetInitialize()
Dim
mLine
As
String
Dim
mBuilder
As
New
System.Text.StringBuilder
Dim
mReader
As
New
IO.StreamReader(file, System.Text.Encoding.Default)
Do
mLine
=
mReader.ReadLine
Add(mLine)
Loop
Until
String
.IsNullOrEmpty(mLine)
mReader.Close()
mReader.Dispose()
Me
.gDataSet.PinYin.AcceptChanges()
End Sub
Private
Sub
Add(
ByVal
line
As
String
)
If
line
Is
Nothing
Then
Exit Sub
With
gRegex.Match(line)
If
.Success
Then
'
只取单字,不取词组
If
.Groups(
"
Word
"
).Value.Length
=
1
Then
Add(.Groups(
"
Word
"
).Value, .Groups(
"
PingYin
"
).Value)
End
If
End
If
End
With
End Sub
Private
Sub
Add(
ByVal
word
As
String
,
ByVal
py
As
String
)
'
多音的,拼音间用单个空枨符隔开
py
=
py.Trim.Replace(
"
"
,
"
"
)
Dim
mCode
As
String
=
ChineseCode(word)
Dim
mRow
As
dsPinYin.PinYinRow
=
Me
.gDataSet.PinYin.FindBy代码(mCode)
If
mRow
Is
Nothing
Then
Me
.gDataSet.PinYin.AddPinYinRow(word, mCode, py)
Else
Dim
pyArray()
As
String
=
py.Split(
"
"
c)
For
Each
s
As
String
In
pyArray
If
Not
mRow.拼音.Contains(s)
Then
mRow.拼音
=
String
.Concat(mRow.拼音,
"
"
&
s)
End
If
Next
End
If
End Sub
'
'' <summary>
'
'' 将字符串转为拼音
'
'' </summary>
'
'' <param name="line">字符串</param>
'
'' <param name="isgetfirst">如是多音字,取第一个拼音</param>
Public
Function
ToPinyin(
ByVal
line
As
String
,
ByVal
isgetfirst
As
Boolean
)
As
String
Dim
mBuilder
As
New
Text.StringBuilder
For
Each
s
As
Char
In
line.ToCharArray
If
IsTrue(s)
Then
mBuilder.Append(GetPinyin(s, isgetfirst))
Else
mBuilder.Append(s)
End
If
Next
Return
mBuilder.ToString
End Function
Private
Function
GetPinyin(
ByVal
word
As
String
,
ByVal
isgetfirst
As
Boolean
)
As
String
Dim
mResult
As
String
=
word
Dim
mArray
As
String
()
=
PinYinArray(ChineseCode(word))
'
取拼音组
If
Not
mArray
Is
Nothing
Then
If
mArray.Length
=
1
Or
isgetfirst
Then
mResult
=
mArray(
0
)
'
单音的
Else
mResult
=
String
.Format(
"
({0})
"
,
String
.Join(
"
,
"
, mArray))
'
多音的用括号括住,拼音间用逗号隔开
End
If
End
If
Return
mResult
End Function
'
取拼音组
Private
Function
PinYinArray(
ByVal
code
As
String
)
As
String
()
Dim
mRow
As
dsPinYin.PinYinRow
=
Me
.gDataSet.PinYin.FindBy代码(code)
If
mRow
Is
Nothing
Then
Return
Nothing
Return
mRow.拼音.Split(
"
"
c)
End Function
'
'' <summary>
'
'' 按拼音查字
'
'' </summary>
'
'' <param name="pinyin">拼音</param>
Public
Function
WordArray(
ByVal
pinyin
As
String
)
As
String
()
Dim
mRows
As
dsPinYin.PinYinRow()
=
CType
(
Me
.gDataSet.PinYin.Select(
String
.Format(
"
拼音 LIKE '%{0}%'
"
, pinyin)), dsPinYin.PinYinRow())
Dim
mResult(
-
1
)
As
String
For
i
As
Integer
=
0
To
mRows.Length
-
1
If
Array.IndexOf(mRows(i).拼音.Split(
"
"
c), pinyin)
<>
-
1
Then
Me
.Append(mResult, mRows(i).汉字)
End
If
Next
Return
mResult
End Function
'
'' <summary>
'
'' 按拼音查字
'
'' </summary>
'
'' <param name="pinyin">拼音</param>
Public
Function
Words(
ByVal
pinyin
As
String
)
As
String
Return
String
.Concat(WordArray(pinyin))
End Function
'
'' <summary>
'
'' 汉字代码
'
'' </summary>
'
'' <param name="word">单个汉字</param>
Public
Shared
Function
ChineseCode(
ByVal
word
As
String
)
As
String
If
Not
IsTrue(word)
Then
Return
Nothing
Dim
bytes()
As
Byte
=
System.Text.Encoding.Default.GetBytes(word)
Return
String
.Concat(
Hex
(bytes(
0
)),
Hex
(bytes(
1
)))
End Function
'
'' <summary>
'
'' 是否是单个汉字
'
'' </summary>
'
'' <param name="word">字符</param>
Public
Shared
Function
IsTrue(
ByVal
word
As
String
)
As
Boolean
If
word
Is
Nothing
Then
Return
False
Return
System.Text.RegularExpressions.Regex.IsMatch(word,
"^[/u4e00-/u9fa5]$"
)
End Function
Private
Sub
Append(
ByRef
collection
As
String
(),
ByVal
value
As
String
)
ReDim
Preserve
collection(collection.Length)
collection(collection.Length
-
1
)
=
value
End Sub
End Class
End Namespace
效果图: