import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.net.URL;
import org.mozilla.intl.chardet.HtmlCharsetDetector;
import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
import org.mozilla.intl.chardet.nsPSMDetector;
//java字符集探测器jchardet http://jchardet.sourceforge.net/
public class JCharset {
public static void main(String[] args) throws Exception {
// Initalize the nsDetector() ;
int lang = (args.length == 2) ? Integer.parseInt(args[1])
: nsPSMDetector.ALL;
//初始化字符串探测器
nsDetector det = new nsDetector(lang);
det.Init(new nsICharsetDetectionObserver() {
public void Notify(String charset) {
HtmlCharsetDetector.found = true;
System.out.println("CHARSET = " + charset);
}
});
if(args.length<1)
{
System.err.println("args.length<1");
detectorString("This is a 涓枃鐨�String�");
return ;
}
//url网址 ,file:///D:/test.txt http://www.baidu.com/
URL url = new URL(args[0]);
BufferedInputStream imp = new BufferedInputStream(url.openStream());
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = true;
while ((len = imp.read(buf, 0, buf.length)) != -1) {
// Check if the stream is only ascii.
if (isAscii)
isAscii = det.isAscii(buf, len);
// DoIt if non-ascii and not done yet.
if (!isAscii && !done)
done = det.DoIt(buf, len, false);
}
det.DataEnd();
if (isAscii) {
System.out.println("CHARSET = ASCII");
} else {
System.out.println("CHARSET != ASCII");
}
}
private static void detectorString(String str) {
nsDetector det = new nsDetector( nsPSMDetector.ALL);
det.Init(new nsICharsetDetectionObserver() {
public void Notify(String charset) {
System.out.println("detectorString CHARSET = " + charset);
}
});
ByteArrayInputStream byteArrayInputStream=new ByteArrayInputStream(str.getBytes());
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = true;
while ((len = byteArrayInputStream.read(buf, 0, buf.length)) != -1) {
if (isAscii)
isAscii = det.isAscii(buf, len);
if (!isAscii && !done)
done = det.DoIt(buf, len, false);
}
det.DataEnd();
if (isAscii) {
System.out.println("detectorString CHARSET = ASCII");
} else {
System.out.println("detectorString CHARSET != ASCII");
}
}
}