packagecom.zhyea.util;importjava.io.BufferedInputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileNotFoundException;importjava.io.IOException;importorg.mozilla.intl.chardet.nsDetector;importorg.mozilla.intl.chardet.nsICharsetDetectionObserver;/*** 借助JCharDet获取文件字符集
*
*@authorrobin
**/
public classFileCharsetDetector {/*** 字符集名称*/
private staticString encoding;/*** 字符集是否已检测到*/
private static booleanfound;private staticnsDetector detector;private staticnsICharsetDetectionObserver observer;/*** 适应语言枚举
*@authorrobin
**/
enumLanguage{
Japanese(1),
Chinese(2),
SimplifiedChinese(3),
TraditionalChinese(4),
Korean(5),
DontKnow(6);private inthint;
Language(inthint){this.hint =hint;
}public intgetHint(){return this.hint;
}
}/*** 传入一个文件(File)对象,检查文件编码
*
*@paramfile
* File对象实例
*@return文件编码,若无,则返回null
*@throwsFileNotFoundException
*@throwsIOException*/
public static String checkEncoding(File file) throwsFileNotFoundException,
IOException {returncheckEncoding(file, getNsdetector());
}/*** 获取文件的编码
*
*@paramfile
* File对象实例
*@paramlanguage
* 语言
*@return文件编码
*@throwsFileNotFoundException
*@throwsIOException*/
public staticString checkEncoding(File file, Language lang)throwsFileNotFoundException, IOException {return checkEncoding(file, newnsDetector(lang.getHint()));
}/*** 获取文件的编码
*
*@parampath
* 文件路径
*@return文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null
*@throwsFileNotFoundException
*@throwsIOException*/
public static String checkEncoding(String path) throwsFileNotFoundException,
IOException {return checkEncoding(newFile(path));
}/*** 获取文件的编码
*
*@parampath
* 文件路径
*@paramlanguage
* 语言
*@return*@throwsFileNotFoundException
*@throwsIOException*/
public staticString checkEncoding(String path, Language lang)throwsFileNotFoundException, IOException {return checkEncoding(newFile(path), lang);
}/*** 获取文件的编码
*
*@paramfile
*@paramdet
*@return*@throwsFileNotFoundException
*@throwsIOException*/
private staticString checkEncoding(File file, nsDetector detector)throwsFileNotFoundException, IOException {
detector.Init(getCharsetDetectionObserver());if(isAscii(file, detector)) {
encoding= "ASCII";
found= true;
}if (!found) {
String prob[]=detector.getProbableCharsets();if (prob.length > 0) {
encoding= prob[0];
}else{return null;
}
}returnencoding;
}/*** 检查文件编码类型是否是ASCII型
*@paramfile
* 要检查编码的文件
*@paramdetector
*@return*@throwsIOException*/
private static boolean isAscii(File file, nsDetector detector) throwsIOException{
BufferedInputStream input= null;try{
input= new BufferedInputStream(newFileInputStream(file));byte[] buffer = new byte[1024];inthasRead;boolean done = false;boolean isAscii = true;while ((hasRead=input.read(buffer)) != -1) {if(isAscii)
isAscii=detector.isAscii(buffer, hasRead);if (!isAscii && !done)
done= detector.DoIt(buffer, hasRead, false);
}returnisAscii;
}finally{
detector.DataEnd();if(null!=input)input.close();
}
}/*** nsDetector单例创建
*@return
*/
private staticnsDetector getNsdetector(){if(null ==detector){
detector= newnsDetector();
}returndetector;
}/*** nsICharsetDetectionObserver 单例创建
*@return
*/
private staticnsICharsetDetectionObserver getCharsetDetectionObserver(){if(null==observer){
observer= newnsICharsetDetectionObserver() {public voidNotify(String charset) {
found= true;
encoding=charset;
}
};
}returnobserver;
}
}