工具类产生是因为个人业务需求需要根据中文汉字排序,而博主又对网上回答不满意,所以才根据相关资料写了该工具类,写入博客 以备不时之需。直接上代码:
Java汉字转成汉语拼音工具类,需要用到pinyin4j.jar包,博主使用的是Maven项目,在pom.xml中引入依赖
<dependency> <groupId>com.belerweb</groupId> <artifactId>pinyin4j</artifactId> <version>2.5.0</version> </dependency>
工具类:
package cn.stephen.study.demoproject.util; import net.sourceforge.pinyin4j.PinyinHelper; import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @program: demo * @description: * @author: Wang.zw * @create: 2020-05-09 16:25 **/ public class ChineseTextToPinyin { /*** * 将汉字转成拼音(取首字母或全拼) * * @param hanzi * @param flag 是否全拼 * @return */ public static String convertHanzi2Pinyin(String hanzi, boolean flag) { /*** * ^[\u2E80-\u9FFF]+$ 匹配所有东亚区的语言 * ^[\u4E00-\u9FFF]+$ 匹配简体和繁体 * ^[\u4E00-\u9FA5]+$ 匹配简体 */ String regExp = "^[\u4E00-\u9FFF]+$"; StringBuilder sb = new StringBuilder(); if (hanzi == null || "".equals(hanzi.trim())) { return ""; } String pinyin = ""; for (int i = 0; i < hanzi.length(); i++) { char unit = hanzi.charAt(i); if (match(String.valueOf(unit), regExp)) { //是汉字,则转拼音 pinyin = convertSingleHanzi2Pinyin(unit); if (flag) { sb.append(pinyin); } else { sb.append(pinyin.charAt(0)); } } else { sb.append(unit); } } return sb.toString(); } /*** * 将单个汉字转成拼音 * * @param hanzi * @return */ private static String convertSingleHanzi2Pinyin(char hanzi) { HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat(); outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); String[] res; StringBuilder sb = new StringBuilder(); try { res = PinyinHelper.toHanyuPinyinStringArray(hanzi, outputFormat); sb.append(res[0]);//对于多音字,只用第一个拼音 } catch (Exception e) { return ""; } return sb.toString(); } /*** * @param str 源字符串 * @param regex 正则表达式 * @return 是否匹配 */ public static boolean match(String str, String regex) { Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(str); return matcher.find(); } }