当前位置: 首页 > 工具软件 > NekoHTML > 使用案例 >

用dw html标签转换,nekohtml转换html时标签变大写的问题

晁文斌
2023-12-01

public staticDocument transferByNeko(InputStream stream, String charset)

{if (stream == null)return null;if(StringUtils.isEmpty(charset)){

charset=DEFAULT_CHARSET;

}//NEKOHTML的DOMParser会将html标签转化成大写,是否设置下面的配置都没有意义,解决办法是需要使用xerces的DOMParser//DOMParser domParser = new DOMParser();//Document doc = null;//ByteArrayOutputStream byteOs = null;//Writer writer = null;//InputSource inputSource = null;//DocumentType documentType = null;//org.w3c.dom.Document document = null;//DOMReader domReader = null;//try {//domParser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");//domParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");//domParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "UTF-8");//

//domParser.setFeature("http://xml.org/sax/features/namespaces", false);//domParser.setFeature("http://cyberneko.org/html/features/balance-tags", true);//domParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", false);//

//byteOs = new ByteArrayOutputStream();//writer = new Writer(byteOs, charset);//XMLDocumentFilter domFilter[] = {//writer//};//domParser.setProperty("http://cyberneko.org/html/properties/filters", domFilter);//inputSource = new InputSource(new InputStreamReader(stream, Charset.forName(charset)));//domParser.parse(inputSource);//document = domParser.getDocument();//documentType = document.getDoctype();//if (documentType != null)//document.removeChild(documentType);//domReader = new DOMReader();//doc = domReader.read(document);//} catch (SAXNotRecognizedException e) {//e.printStackTrace();//} catch (SAXNotSupportedException e) {//e.printStackTrace();//} catch (UnsupportedEncodingException e) {//e.printStackTrace();//} catch (SAXException e) {//e.printStackTrace();//} catch (IOException e) {//e.printStackTrace();//}finally{//IOUtils.closeQuietly(byteOs);//IOUtils.closeQuietly(stream);//}//采用xerces的DOMParser

Document doc = null;

DocumentType documentType= null;

org.w3c.dom.Document document= null;

DOMReader domReader= null;

ByteArrayOutputStream byteOs= null;

Writer writer= null;

InputSource inputSource= null;try{

HTMLConfiguration htmlConfiguration= newHTMLConfiguration();

htmlConfiguration.setProperty("http://cyberneko.org/html/properties/names/elems","lower");

org.apache.xerces.parsers.DOMParser parser= neworg.apache.xerces.parsers.DOMParser(htmlConfiguration);

inputSource= new InputSource(newInputStreamReader(stream, Charset.forName(charset)));

parser.parse(inputSource);

document=parser.getDocument();

documentType=document.getDoctype();if (documentType != null)

document.removeChild(documentType);

domReader= newDOMReader();

doc=domReader.read(document);

}catch(SAXException e) {

e.printStackTrace();

}catch(IOException e) {

e.printStackTrace();

}returndoc;

}

 类似资料: