源码下载:
http://jaist.dl.sourceforge.net/project/cpdetector/cpdetector/sources/cpdetector_eclipse_project_1.0.10.zip
jar包下载:
https://sourceforge.net/projects/cpdetector/?source=typ_redirect
cpdetector一个可以自动检测文本编码格式的项目
detector按照“谁最先返回非空的探测结果,就以该结果为准”的原则返回探测到的 字符集编码。
使用需要用到三个第三方JAR包:antlr.jar、chardet.jar和cpdetector.jar
cpDetector是基于统计学原理的,不保证完全正确。
以下是读取xxx.txt文件中的内容,以html的方式返回给浏览器的简单servlet实例。在实现的过程了,遇到的最大问题就是,浏览器打开中文乱码问题,原因是.txt文件保存时的编码不统一,所以在“out.println(new String(buffer, charset));”时charset不能写死,而应该通过某种途径获取.txt文件的编码格式,获取的方式网上主要有以下三种,亲测第三种解决了问题,第一第二中方法都不完善。
package com.hwc.a.servlet;
import info.monitorenter.cpdetector.io.ASCIIDetector;
import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.JChardetFacade;
import info.monitorenter.cpdetector.io.ParsingDetector;
import info.monitorenter.cpdetector.io.UnicodeDetector;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
public class TxtToHtmlServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String path = request.getParameter("path");
if (path != null && !"".equals(path)) {
// TODO 开始下载
path = new String(path.getBytes("ISO-8859-1"), "utf-8");
InputStream fis = null;
PrintWriter out = null;
try {
// path是指欲下载的文件的路径。
// File file = new File(request.getRealPath("/")+"/"+path);
File file = new File(path);
// 取得文件名。
String filename = file.getName();
// 取得文件的后缀名。
filename = filename.substring(0, filename.lastIndexOf("."));
// 以流的形式下载文件。
fis = new BufferedInputStream(new FileInputStream(file));
byte[] buffer = new byte[fis.available()];
fis.read(buffer);
// 清空response
response.reset();
String charset = getFileEncode(path);
System.out.println("============getFileEncode charset:" + charset);
if (charset == null) {
charset = getCharset(path);
System.out.println("============getCharset charset:" + charset);
}
response.setHeader("Content-type", "text/html;charset="+ charset);
response.setContentType("text/html;charset=" + charset);
out = response.getWriter();
out.println(new String(buffer, charset));
out.flush();
} catch (IOException ex) {
ex.printStackTrace();
} finally {
if (fis != null) {
fis.close();
}
if (out != null) {
out.close();
}
}
}
}
/**
* 方法一: 仅作参考,不准确
* @param fileName
* @return
* @throws IOException
*/
private String getCharset(String fileName) throws IOException {
BufferedInputStream bin = new BufferedInputStream(new FileInputStream(
fileName));
int p = (bin.read() << 8) + bin.read();
String code = null;
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
code = "GB2312";
}
return code;
}
/**
* 方法二: 仅作参考,不准确
* @param head
* @return
*/
private String codetype(byte[] head) {
byte[] codehead = new byte[4];
// 截取数组
System.arraycopy(head, 0, codehead, 0, 4);
String code = "";
if (head[0] == -1 && head[1] == -2) {
code = "UTF-16";
} else if (head[0] == -2 && head[1] == -1) {
code = "Unicode";
} else if (head[0] == -17 && head[1] == -69 && head[2] == -65)
code = "UTF-8";
else {
code = "gb2312";
}
return code;
}
/**
* 方法三:比较准确,解决了实际问题
* @param filePath
* @return
*/
public static String getFileEncode(String filePath) {
String charsetName = null;
try {
File file = new File(filePath);
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(new ParsingDetector(false));
detector.add(JChardetFacade.getInstance());
detector.add(ASCIIDetector.getInstance());
detector.add(UnicodeDetector.getInstance());
java.nio.charset.Charset charset = null;
charset = detector.detectCodepage(file.toURI().toURL());
if (charset != null) {
charsetName = charset.name();
} else {
charsetName = "UTF-8";
}
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
return charsetName;
}
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
doGet(request, response);
}
}