pinyin4j 是一个支持将简体和繁体中文转换到成拼音的Java开源类库;
<!-- pinyin4j 汉语转拼音 -->
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.1</version>
</dependency>
支持同一汉字有多个发音
还支持拼音的格式化输出,比如第几声之类的,
同时支持简体中文、繁体中文转换为,使用起来也非常简单。
有几个常用的类:
net.sourceforge.pinyin4j.PinyinHelper;
net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
PinyinHelper:提供了几个实用程序函数,用于将中文字符(简体和繁体)转换为各种中文罗马化(汉语拼音已经被官方确认为罗马化汉语的标准方案)表示。
HanyuPinyinOutputFormat:这个类定义了如何输出汉语拼音。
HanyuPinyinCaseType:为汉语拼音字符串的输出案例提供了几种选项。
HanyuPinyinToneType:该类提供了几种输出中文音调的选项。
HanyuPinyinVCharType:这个类为’ü’的输出提供了几个选项。
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
/**
* 拼音工具类
*/
public class PinYinUtil {
/**
* 将文字转为汉语拼音
* @param chineseLanguage 要转成拼音的中文
*/
public String toHanyuPinyin(String chineseLanguage){
char[] cl_chars = chineseLanguage.trim().toCharArray();
String hanyupinyin = "";
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 输出拼音全部小写
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 不带声调
defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V) ;
try {
for (int i=0; i<cl_chars.length; i++){
if (String.valueOf(cl_chars[i]).matches("[\u4e00-\u9fa5]+")){// 如果字符是中文,则将中文转为汉语拼音
hanyupinyin += PinyinHelper.toHanyuPinyinStringArray(cl_chars[i], defaultFormat)[0];
} else {// 如果字符不是中文,则不转换
hanyupinyin += cl_chars[i];
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
System.out.println("字符不能转成汉语拼音");
}
return hanyupinyin;
}
public static String getFirstLettersUp(String ChineseLanguage){
return getFirstLetters(ChineseLanguage ,HanyuPinyinCaseType.UPPERCASE); // 转大写
}
public static String getFirstLettersLo(String ChineseLanguage){
return getFirstLetters(ChineseLanguage ,HanyuPinyinCaseType.LOWERCASE); // 转小写
}
public static String getFirstLetters(String ChineseLanguage,HanyuPinyinCaseType caseType) {
char[] cl_chars = ChineseLanguage.trim().toCharArray();
String hanyupinyin = "";
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(caseType);// 输出拼音全部大写
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 不带声调
try {
for (int i = 0; i < cl_chars.length; i++) {
String str = String.valueOf(cl_chars[i]);
if (str.matches("[\u4e00-\u9fa5]+")) {// 如果字符是中文,则将中文转为汉语拼音,并取第一个字母
hanyupinyin += PinyinHelper.toHanyuPinyinStringArray(cl_chars[i], defaultFormat)[0].substring(0, 1);
} else if (str.matches("[0-9]+")) {// 如果字符是数字,取数字
hanyupinyin += cl_chars[i];
} else if (str.matches("[a-zA-Z]+")) {// 如果字符是字母,取字母
hanyupinyin += cl_chars[i];
} else {// 否则不转换
hanyupinyin += cl_chars[i];//如果是标点符号的话,带着
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
System.out.println("字符不能转成汉语拼音");
}
return hanyupinyin;
}
public static String getPinyinString(String ChineseLanguage){
char[] cl_chars = ChineseLanguage.trim().toCharArray();
String hanyupinyin = "";
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 输出拼音全部大写
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 不带声调
try {
for (int i = 0; i < cl_chars.length; i++) {
String str = String.valueOf(cl_chars[i]);
if (str.matches("[\u4e00-\u9fa5]+")) {// 如果字符是中文,则将中文转为汉语拼音,并取第一个字母
hanyupinyin += PinyinHelper.toHanyuPinyinStringArray(
cl_chars[i], defaultFormat)[0];
} else if (str.matches("[0-9]+")) {// 如果字符是数字,取数字
hanyupinyin += cl_chars[i];
} else if (str.matches("[a-zA-Z]+")) {// 如果字符是字母,取字母
hanyupinyin += cl_chars[i];
} else {// 否则不转换
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
System.out.println("字符不能转成汉语拼音");
}
return hanyupinyin;
}
/**
* 取第一个汉字的第一个字符
* @Title: getFirstLetter
* @Description: TODO
* @return String
* @throws
*/
public static String getFirstLetter(String ChineseLanguage){
char[] cl_chars = ChineseLanguage.trim().toCharArray();
String hanyupinyin = "";
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE);// 输出拼音全部大写
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 不带声调
try {
String str = String.valueOf(cl_chars[0]);
if (str.matches("[\u4e00-\u9fa5]+")) {// 如果字符是中文,则将中文转为汉语拼音,并取第一个字母
hanyupinyin = PinyinHelper.toHanyuPinyinStringArray(
cl_chars[0], defaultFormat)[0].substring(0, 1);;
} else if (str.matches("[0-9]+")) {// 如果字符是数字,取数字
hanyupinyin += cl_chars[0];
} else if (str.matches("[a-zA-Z]+")) {// 如果字符是字母,取字母
hanyupinyin += cl_chars[0];
} else {// 否则不转换
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
System.out.println("字符不能转成汉语拼音");
}
return hanyupinyin;
}
}
public class HanZi2PinYinUtil {
public static String getAllPinyin(String hanzi) {
//输出格式设置
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
/**
* 输出大小写设置
* LOWERCASE:输出小写
* UPPERCASE:输出大写
*/
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
/**
* 输出音标设置
* WITH_TONE_MARK:直接用音标符(必须设置WITH_U_UNICODE,否则会抛出异常)
* WITH_TONE_NUMBER:1-4数字表示音标
* WITHOUT_TONE:没有音标
*/
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
/**
* 特殊音标ü设置
*
* WITH_V:用v表示ü
* WITH_U_AND_COLON:用"u:"表示ü
* WITH_U_UNICODE:直接用ü
*/
format.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE);
char[] hanYuArr = hanzi.trim().toCharArray();
StringBuilder pinYin = new StringBuilder();
try {
for (int i = 0, len = hanYuArr.length; i < len; i++) {
//匹配是否是汉字
if (Character.toString(hanYuArr[i]).matches("[\\u4E00-\\u9FA5]+")) {
//如果是多音字,返回多个拼音,这里只取第一个
String[] pys = PinyinHelper.toHanyuPinyinStringArray(hanYuArr[i], format);
pinYin.append(pys[0]).append(" ");
} else {
pinYin.append(hanYuArr[i]).append(" ");
}
}
} catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) {
badHanyuPinyinOutputFormatCombination.printStackTrace();
}
return pinYin.toString();
}
public static String getFirstPinYin(String hanyu) {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
StringBuilder firstPinyin = new StringBuilder();
char[] hanyuArr = hanyu.trim().toCharArray();
try {
for (int i = 0, len = hanyuArr.length; i < len; i++) {
if(Character.toString(hanyuArr[i]).matches("[\\u4E00-\\u9FA5]+")){
String[] pys = PinyinHelper.toHanyuPinyinStringArray(hanyuArr[i],format);
firstPinyin.append(pys[0].charAt(0));
}else {
firstPinyin.append(hanyuArr[i]);
}
}
} catch (BadHanyuPinyinOutputFormatCombination badHanyuPinyinOutputFormatCombination) {
badHanyuPinyinOutputFormatCombination.printStackTrace();
}
return firstPinyin.toString();
}
/**
* 将字符串转移为ASCII码
* @param characters
* @return
*/
public static String getCharactersASCII(String characters) {
StringBuffer strBuf = new StringBuffer();
byte[] bytes = characters.getBytes();
for (int i = 0; i < bytes.length; i++) {
strBuf.append(Integer.toHexString(bytes[i] & 0xff));
}
return strBuf.toString();
}
}
/**
* 获得汉语拼音首字母
*
* @param chines
* 汉字
* @return
*/
public static String getAlpha(String chines) {
String pinyinName = "";
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
pinyinName += PinyinHelper.toHanyuPinyinStringArray(
nameChar[i], defaultFormat)[0].charAt(0);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName += nameChar[i];
}
}
return pinyinName;
}
/**
* 将字符串中的中文转化为拼音,英文字符不变
*
* @param inputString
* 汉字
* @return
*/
public static String getPingYin(String inputString) {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
format.setVCharType(HanyuPinyinVCharType.WITH_V);
String output = "";
if (inputString != null && inputString.length() > 0
&& !"null".equals(inputString)) {
char[] input = inputString.trim().toCharArray();
try {
for (int i = 0; i < input.length; i++) {
if (java.lang.Character.toString(input[i]).matches(
"[\\u4E00-\\u9FA5]+")) {
String[] temp = PinyinHelper.toHanyuPinyinStringArray(
input[i], format);
output += temp[0];
} else
output += java.lang.Character.toString(input[i]);
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
return "*";
}
return output;
}
/**
* 汉字转换为汉语拼音首字母,英文字符不变
*
* @param chines
* 汉字
* @return 拼音
*/
public static String converterToFirstSpell(String chines) {
String pinyinName = "";
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
pinyinName += PinyinHelper.toHanyuPinyinStringArray(
nameChar[i], defaultFormat)[0].charAt(0);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName += nameChar[i];
}
}
return pinyinName;
}