public staticDocument transferByNeko(InputStream stream, String charset)
{if (stream == null)return null;if(StringUtils.isEmpty(charset)){
charset=DEFAULT_CHARSET;
}//NEKOHTML的DOMParser会将html标签转化成大写,是否设置下面的配置都没有意义,解决办法是需要使用xerces的DOMParser//DOMParser domParser = new DOMParser();//Document doc = null;//ByteArrayOutputStream byteOs = null;//Writer writer = null;//InputSource inputSource = null;//DocumentType documentType = null;//org.w3c.dom.Document document = null;//DOMReader domReader = null;//try {//domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");//domParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");//domParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");//
//domParser.setFeature("http://xml.org/sax/features/namespaces", false);//domParser.setFeature("http://cyberneko.org/html/features/balance-tags", true);//domParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", false);//
//byteOs = new ByteArrayOutputStream();//writer = new Writer(byteOs, charset);//XMLDocumentFilter domFilter[] = {//writer//};//domParser.setProperty("http://cyberneko.org/html/properties/filters", domFilter);//inputSource = new InputSource(new InputStreamReader(stream, Charset.forName(charset)));//domParser.parse(inputSource);//document = domParser.getDocument();//documentType = document.getDoctype();//if (documentType != null)//document.removeChild(documentType);//domReader = new DOMReader();//doc = domReader.read(document);//} catch (SAXNotRecognizedException e) {//e.printStackTrace();//} catch (SAXNotSupportedException e) {//e.printStackTrace();//} catch (UnsupportedEncodingException e) {//e.printStackTrace();//} catch (SAXException e) {//e.printStackTrace();//} catch (IOException e) {//e.printStackTrace();//}finally{//IOUtils.closeQuietly(byteOs);//IOUtils.closeQuietly(stream);//}//采用xerces的DOMParser
Document doc = null;
DocumentType documentType= null;
org.w3c.dom.Document document= null;
DOMReader domReader= null;
ByteArrayOutputStream byteOs= null;
Writer writer= null;
InputSource inputSource= null;try{
HTMLConfiguration htmlConfiguration= newHTMLConfiguration();
htmlConfiguration.setProperty("http://cyberneko.org/html/properties/names/elems","lower");
org.apache.xerces.parsers.DOMParser parser= neworg.apache.xerces.parsers.DOMParser(htmlConfiguration);
inputSource= new InputSource(newInputStreamReader(stream, Charset.forName(charset)));
parser.parse(inputSource);
document=parser.getDocument();
documentType=document.getDoctype();if (documentType != null)
document.removeChild(documentType);
domReader= newDOMReader();
doc=domReader.read(document);
}catch(SAXException e) {
e.printStackTrace();
}catch(IOException e) {
e.printStackTrace();
}returndoc;
}