功能
1.支持简体中文和繁体中文字符。
2.支持转换到汉语拼音,通用拼音, 威妥玛拼音(威玛拼法), 注音符号第二式, 耶鲁拼法和国语罗马字。
3.支持多音字,即可以获取一个中文字符的多种发音。
4.支持多种字符串输出格式,比如支持Unicode格式的字符ü和声调符号(阴平 "ˉ",阳平"ˊ",上声"ˇ",去声"ˋ")的输出。
5.能够根据中文字符获取其对应的拼音,而且拼音的格式可以定制。pinyin4j是一个支持将中文转换到拼音的Java开源类库。
代码
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
/**
* @program: ems
* @description: 中文字符和拼音之间的转换
*/
public class Pinyin4jUtil {
enum Type {
UPPERCASE,
LOWERCASE,
FIRSTUPPER
}
/**
* 转换全部大写
* @param str 字符串
* @return str为宇航员 ,return获取到的是YHY
* @throws BadHanyuPinyinOutputFormatCombination
*/
public static String toPinYinUppercase(String str) throws BadHanyuPinyinOutputFormatCombination{
return toPinYin(str, "", Type.UPPERCASE);
}
/**
* 转换全部大写
* @param str 字符串
* @param spera 转换字母间隔加的字符串,如果不需要为""
* @return str为宇航员 ,spera为** return获取到的是Y**H**Y
* @throws BadHanyuPinyinOutputFormatCombination
*/
public static String toPinYinUppercase(String str,String spera) throws BadHanyuPinyinOutputFormatCombination{
return toPinYin(str, spera, Type.UPPERCASE);
}
/**
* 转换全部小写
* @param str 字符串
* @return str为宇航员 ,return获取到的是yhy
* @throws BadHanyuPinyinOutputFormatCombination
*/
public static String toPinYinLowercase(String str) throws BadHanyuPinyinOutputFormatCombination{
return toPinYin(str, "", Type.LOWERCASE);
}
/**
* 转换全部小写
* @param str 字符串
* @param spera 转换字母间隔加的字符串,如果不需要为""
* @return str为宇航员 ,spera为** return获取到的是y**h**y
* @throws BadHanyuPinyinOutputFormatCombination
*/
public static String toPinYinLowercase(String str,String spera) throws BadHanyuPinyinOutputFormatCombination{
return toPinYin(str, spera, Type.LOWERCASE);
}
/**
* 获取拼音首字母(大写)
* @param str 字符串
* @return str为宇航员 ,return获取到的是Y
* @throws BadHanyuPinyinOutputFormatCombination 异常信息
*/
public static String toPinYinUppercaseInitials(String str) throws BadHanyuPinyinOutputFormatCombination {
String initials = null;
String py = toPinYinUppercase(str);
if(py.length()>1){
initials = py.substring(0, 1);
}
if(py.length()<=1){
initials = py;
}
return initials.trim();
}
/**
* 获取拼音首字母(小写)
* @param str 字符串
* @return str为宇航员 ,return获取到的是y
* @throws BadHanyuPinyinOutputFormatCombination 异常信息
*/
public static String toPinYinLowercaseInitials(String str) throws BadHanyuPinyinOutputFormatCombination {
String initials = null;
String py = toPinYinLowercase(str);
if(py.length()>1){
initials = py.substring(0, 1);
}
if(py.length()<=1){
initials = py;
}
return initials.trim();
}
/**
* 将str转换成拼音,如果不是汉字或者没有对应的拼音,则不作转换
* @param str 字符串
* @param spera 默认,可为""
* @param type 转换格式
* @return 按照转换格式转换成字符串
* @throws BadHanyuPinyinOutputFormatCombination 异常信息
*/
public static String toPinYin(String str, String spera, Type type) throws BadHanyuPinyinOutputFormatCombination {
if(str == null || str.trim().length()==0) {
return "";
}
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
if(type == Type.UPPERCASE) {
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
} else{
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
}
String py = "";
String temp = "";
String[] t;
for(int i=0;i<str.length();i++){
char c = str.charAt(i);
if((int)c <= 128) {
py += c;
}else{
t = PinyinHelper.toHanyuPinyinStringArray(c, format);
if(t == null) {
py += c;
}else{
temp = t[0];
if(type == Type.FIRSTUPPER) {
temp = t[0].toUpperCase().charAt(0)+temp.substring(1);
}
if(temp.length()>=1){
temp = temp.substring(0, 1);
}
py += temp+(i==str.length()-1?"":spera);
}
}
}
return py.trim();
}
/**
* 获取汉字串拼音,英文字符不变
*
* @param chinese 汉字串
* @return 汉语拼音
*/
public static String toPinYinSpell(String chinese) {
StringBuilder pybf = new StringBuilder();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pybf.append(arr[i]);
}
}
return pybf.toString();
}
/**
* 汉字转换位汉语全拼,英文字符不变,特殊字符丢失 支持多音字,生成方式如(重当参:zhongdangcen,zhongdangcan,chongdangcen ,chongdangshen,zhongdangshen,chongdangcan)
*
* @param chines
* 汉字
* @return 拼音
*/
public static String toPinYinMultiple(String chines) {
StringBuffer pinyinName = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
// 取得当前汉字的所有全拼
String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);
if (strs != null) {
for (int j = 0; j < strs.length; j++) {
pinyinName.append(strs[j]);
if (j != strs.length - 1) {
pinyinName.append(",");
}
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName.append(nameChar[i]);
}
pinyinName.append(" ");
}
return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));
}
/**
* 去除多音字重复数据
*
* @param theStr
* @return
*/
private static List<Map<String, Integer>> discountTheChinese(String theStr) {
// 去除重复拼音后的拼音列表
List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>();
// 用于处理每个字的多音字,去掉重复
Map<String, Integer> onlyOne = null;
String[] firsts = theStr.split(" ");
// 读出每个汉字的拼音
for (String str : firsts) {
onlyOne = new Hashtable<String, Integer>();
String[] china = str.split(",");
// 多音字处理
for (String s : china) {
Integer count = onlyOne.get(s);
if (count == null) {
onlyOne.put(s, new Integer(1));
} else {
onlyOne.remove(s);
count++;
onlyOne.put(s, count);
}
}
mapList.add(onlyOne);
}
return mapList;
}
/**
* 解析并组合拼音,对象合并方案(推荐使用)
*
* @return
*/
private static String parseTheChineseByObject(List<Map<String, Integer>> list) {
// 用于统计每一次,集合组合数据
Map<String, Integer> first = null;
// 遍历每一组集合
for (int i = 0; i < list.size(); i++) {
// 每一组集合与上一次组合的Map
Map<String, Integer> temp = new Hashtable<String, Integer>();
// 第一次循环,first为空
if (first != null) {
// 取出上次组合与此次集合的字符,并保存
for (String s : first.keySet()) {
for (String s1 : list.get(i).keySet()) {
String str = s + s1;
temp.put(str, 1);
}
}
// 清理上一次组合数据
if (temp != null && temp.size() > 0) {
first.clear();
}
} else {
for (String s : list.get(i).keySet()) {
String str = s;
temp.put(str, 1);
}
}
// 保存组合数据以便下次循环使用
if (temp != null && temp.size() > 0) {
first = temp;
}
}
String returnStr = "";
if (first != null) {
// 遍历取出组合字符串
for (String str : first.keySet()) {
returnStr += (str + ",");
}
}
if (returnStr.length() > 0) {
returnStr = returnStr.substring(0, returnStr.length() - 1);
}
return returnStr;
}
}
测试
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import org.junit.Test;
/**
* @program: ems
* @description: 拼音工具类测试类
*/
public class Pinyin4jUtilTest {
@Test
public void testPinyin() throws BadHanyuPinyinOutputFormatCombination {
String first1 = Pinyin4jUtil.toPinYinUppercase("手动!");
String first2 = Pinyin4jUtil.toPinYinUppercase("更丰富", "**");
String first3 = Pinyin4jUtil.toPinYinLowercase("1撒大声地");
String first4 = Pinyin4jUtil.toPinYinLowercase("对对对","**");
String first5 = Pinyin4jUtil.toPinYinUppercaseInitials("是是是");
String first6 = Pinyin4jUtil.toPinYinLowercaseInitials("啊啊啊");
String first7 = Pinyin4jUtil.toPinYinSpell("2多岁的s");
String first8 = Pinyin4jUtil.toPinYinMultiple("当参长");
System.out.println(first1);
System.out.println(first2);
System.out.println(first3);
System.out.println(first4);
System.out.println(first5);
System.out.println(first6);
System.out.println(first7);
System.out.println(first8);
}
}
参考:
https://blog.csdn.net/cl11992/article/details/77145962
https://www.cnblogs.com/liaojie970/p/5490180.html