当前位置: 首页 > 工具软件 > jchardet > 使用案例 >

java jchardet_借助JCharDet获取文件字符集

墨星鹏
2023-12-01

packagecom.zhyea.util;importjava.io.BufferedInputStream;importjava.io.File;importjava.io.FileInputStream;importjava.io.FileNotFoundException;importjava.io.IOException;importorg.mozilla.intl.chardet.nsDetector;importorg.mozilla.intl.chardet.nsICharsetDetectionObserver;/*** 借助JCharDet获取文件字符集

*

*@authorrobin

**/

public classFileCharsetDetector {/*** 字符集名称*/

private staticString encoding;/*** 字符集是否已检测到*/

private static booleanfound;private staticnsDetector detector;private staticnsICharsetDetectionObserver observer;/*** 适应语言枚举

*@authorrobin

**/

enumLanguage{

Japanese(1),

Chinese(2),

SimplifiedChinese(3),

TraditionalChinese(4),

Korean(5),

DontKnow(6);private inthint;

Language(inthint){this.hint =hint;

}public intgetHint(){return this.hint;

}

}/*** 传入一个文件(File)对象,检查文件编码

*

*@paramfile

* File对象实例

*@return文件编码,若无,则返回null

*@throwsFileNotFoundException

*@throwsIOException*/

public static String checkEncoding(File file) throwsFileNotFoundException,

IOException {returncheckEncoding(file, getNsdetector());

}/*** 获取文件的编码

*

*@paramfile

* File对象实例

*@paramlanguage

* 语言

*@return文件编码

*@throwsFileNotFoundException

*@throwsIOException*/

public staticString checkEncoding(File file, Language lang)throwsFileNotFoundException, IOException {return checkEncoding(file, newnsDetector(lang.getHint()));

}/*** 获取文件的编码

*

*@parampath

* 文件路径

*@return文件编码,eg:UTF-8,GBK,GB2312形式,若无,则返回null

*@throwsFileNotFoundException

*@throwsIOException*/

public static String checkEncoding(String path) throwsFileNotFoundException,

IOException {return checkEncoding(newFile(path));

}/*** 获取文件的编码

*

*@parampath

* 文件路径

*@paramlanguage

* 语言

*@return*@throwsFileNotFoundException

*@throwsIOException*/

public staticString checkEncoding(String path, Language lang)throwsFileNotFoundException, IOException {return checkEncoding(newFile(path), lang);

}/*** 获取文件的编码

*

*@paramfile

*@paramdet

*@return*@throwsFileNotFoundException

*@throwsIOException*/

private staticString checkEncoding(File file, nsDetector detector)throwsFileNotFoundException, IOException {

detector.Init(getCharsetDetectionObserver());if(isAscii(file, detector)) {

encoding= "ASCII";

found= true;

}if (!found) {

String prob[]=detector.getProbableCharsets();if (prob.length > 0) {

encoding= prob[0];

}else{return null;

}

}returnencoding;

}/*** 检查文件编码类型是否是ASCII型

*@paramfile

* 要检查编码的文件

*@paramdetector

*@return*@throwsIOException*/

private static boolean isAscii(File file, nsDetector detector) throwsIOException{

BufferedInputStream input= null;try{

input= new BufferedInputStream(newFileInputStream(file));byte[] buffer = new byte[1024];inthasRead;boolean done = false;boolean isAscii = true;while ((hasRead=input.read(buffer)) != -1) {if(isAscii)

isAscii=detector.isAscii(buffer, hasRead);if (!isAscii && !done)

done= detector.DoIt(buffer, hasRead, false);

}returnisAscii;

}finally{

detector.DataEnd();if(null!=input)input.close();

}

}/*** nsDetector单例创建

*@return

*/

private staticnsDetector getNsdetector(){if(null ==detector){

detector= newnsDetector();

}returndetector;

}/*** nsICharsetDetectionObserver 单例创建

*@return

*/

private staticnsICharsetDetectionObserver getCharsetDetectionObserver(){if(null==observer){

observer= newnsICharsetDetectionObserver() {public voidNotify(String charset) {

found= true;

encoding=charset;

}

};

}returnobserver;

}

}

 类似资料: