import com.github.promeg.pinyinhelper.Pinyin;
import com.hankcs.hanlp.HanLP;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
public class PinyinBenchmark {
public static void main(String[] args) {
String chinese = "中华人民共和国,欢迎您!";
long startTime, endTime;
// TinyPinyin
startTime = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
Pinyin.toPinyin(chinese, "");
}
endTime = System.currentTimeMillis();
System.out.println("TinyPinyin: " + (endTime - startTime) + " ms");
// HanLP
startTime = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
HanLP.convertToPinyinString(chinese, " ", false);
}
endTime = System.currentTimeMillis();
System.out.println("HanLP: " + (endTime - startTime) + " ms");
// Pinyin4j
startTime = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
convertUsingPinyin4j(chinese);
}
endTime = System.currentTimeMillis();
System.out.println("Pinyin4j: " + (endTime - startTime) + " ms");
}
public static String convertUsingPinyin4j(String chinese) {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
StringBuilder pinyin = new StringBuilder();
for (char c : chinese.toCharArray()) {
try {
if (Character.toString(c).matches("[\\u4E00-\\u9FA5]+")) {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
if (pinyinArray != null) {
pinyin.append(pinyinArray[0]);
}
} else {
pinyin.append(c);
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}
return pinyin.toString();
}
}
JAVA将中文转换为拼音
原创
©著作权归作者所有:来自51CTO博客作者mb650996349c158的原创作品,请联系作者获取转载授权,否则将追究法律责任
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
Java 将中文name 转换为拼音的姓和名
将获取到的中文名拆分成拼音格式的姓氏和名称
System 首字母 字符串转换 中文名获取拼音的姓氏名称