<!--enCode-->
<dependency>
<groupId>net.sourceforge.jchardet</groupId>
<artifactId>jchardet</artifactId>
<version>1.0</version>
</dependency>
import java.io.*;
import org.mozilla.intl.chardet.nsDetector;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
/**
* @author liubin
* @create 2019-01-30 17:01
* @desc ${DESCRIPTION}
**/
@Component
public class EncodeUtils {
private boolean found = false;
private String encoding = null;
public String findEnCoding(MultipartFile file) {
String enCode = null;
try {
enCode = guessFileEncoding(file);
} catch (IOException e) {
e.printStackTrace();
}
return enCode;
}
/**
* 传入一个文件(File)对象,检查文件编码
*
* @param file File对象实例
* @return 文件编码,若无,则返回null
* @throws FileNotFoundException
* @throws IOException
*/
public String guessFileEncoding(MultipartFile file) throws IOException {
return guessFileEncoding(file, new nsDetector());
}
/**
* <pre>
* 获取文件的编码
* @param file
* File对象实例
* @param languageHint
* 语言提示区域代码 @see #nsPSMDetector ,取值如下:
* 1 : Japanese
* 2 : Chinese
* 3 : Simplified Chinese
* 4 : Traditional Chinese
* 5 : Korean
* 6 : Dont know(default)
* </pre>
*
* @return 文件编码,eg:UTF-8,GBK,GB2312形式(不确定的时候,返回可能的字符编码序列);若无,则返回null
* @throws IOException
*/
public String guessFileEncoding(MultipartFile file, int languageHint) throws IOException {
return guessFileEncoding(file, new nsDetector(languageHint));
}
/**
* 获取文件的编码
*
* @param file
* @param det
* @return
* @throws IOException
*/
private String guessFileEncoding(MultipartFile file, nsDetector det) throws IOException {
InputStream imp = file.getInputStream();
byte[] buf = new byte[1024];
int len;
boolean done;
boolean isAscii = false;
while ((len = imp.read(buf, 0, buf.length)) != -1) {
// Check if the stream is only ascii.
isAscii = det.isAscii(buf, len);
if (isAscii) {
break;
}
// DoIt if non-ascii and not done yet.
done = det.DoIt(buf, len, false);
if (done) {
break;
}
}
imp.close();
det.DataEnd();
if (isAscii) {
encoding = "ASCII";
found = true;
}
if (!found) {
String[] prob = det.getProbableCharsets();
//可能有多个但只取第一个
if (prob.length > 0) {
// 在没有发现情况下,也可以只取第一个可能的编码,这里返回的是一个可能的序列
//return encoding;
return prob[0];
} else {
return null;
}
}
return encoding;
}
}