给定一段HTML代码, 如何将其转化为DOM树以便处理?
如果使用jQuery
, 可以使用其$.parseHTML 方法将HTML代码转化为DOM树.
var markup = '<p>' +
'<img src="http://note.youdao.com/styles/images/icon-1.png?96" data-media-type="image">' +
'<script type="text/javascript">' +
'document.οnclick=function(){console.log("click");};' +
'window.alert("hello");' +
'document.write("hello");' +
'</script>' +
'</p>',
domArray = $.parseHTML(markup);
window.console.log(domArray);
翻看jQuery的源代码, 其parse HTML的原理与以下代码一致:
/**
* @param {String} markup
* @param {Document} [context]
* @return {Array}
*/
var parseHTMLWithDiv = function (markup, context) {
context = context || document;
var wrapper = context.createElement('div'),
domArray = [],
index,
len;
wrapper.innerHTML = markup;
len = wrapper.childNodes.length;
for (index = 0; index < len; index++) {
domArray.push(wrapper.childNodes[index]);
}
return domArray;
};
亦可以在将HTML内容放置在隐藏的iframe中进行parse.
/**
* @param {String} markup
* @param {Document} [context]
* @return {Array}
*/
var parseHTMLWithIframe = function (markup, context) {
context = context || document;
var iframe = context.createElement('iframe'),
body,
index,
len,
domArray = [];
iframe.src = '';
iframe.style.display = 'none';
context.body.appendChild(iframe);
body = iframe.contentDocument.body;
body.innerHTML = markup;
len = body.childNodes.length;
for (index = 0; index < len; index++) {
domArray.push(body.childNodes[index]);
}
context.body.removeChild(iframe);
return domArray;
}
在parse HTML过程中, 如果仔细观察, 可以发现以下几点:
如果被parse过后的HTML代码并不需要注入到页面上, parse HTML过程中浏览器自动发出图片 src请求就会占用网络请求等资源, 这是不完美的.
该如何做才不会让浏览器自动发出图片src请求呢?
jQuery中有一个与$.parseHTML()
类似的方法, 叫做$.parseXML()
, 用于parse xml. 查看源码:
// Cross-browser xml parsing
jQuery.parseXML = function( data ) {
var xml, tmp;
if ( !data || typeof data !== "string" ) {
return null;
}
// Support: IE9
try {
tmp = new DOMParser();
xml = tmp.parseFromString( data, "text/xml" );
} catch ( e ) {
xml = undefined;
}
if ( !xml || xml.getElementsByTagName( "parsererror" ).length ) {
jQuery.error( "Invalid XML: " + data );
}
return xml;
};
jQuery 使用了 DOMParser 对XML文档进行parse. DOMParser 不但能够parse XML文档, 还 能 parse HTML文档.
enum SupportedType {
"text/html",
"text/xml",
"application/xml",
"application/xhtml+xml",
"image/svg+xml"
};
[Constructor]
interface DOMParser {
Document parseFromString(DOMString str, SupportedType type);
};
使用 DOMParser 分析HTML文档时, 浏览器不会自动发出图片src的请求. 在不支持DOMParser的浏览器中, 有一个替代方案: DOMImplementation.createHTMLDocument
/**
* There are two ways to parse html snippet:
* 1. parse html in a virtual Document/DOMParser object.
* 2. create a `div` element as wrapper and set html as its innerHTML.
*
* The 1st way can prevent loading images that in the html and is safer.
*
* NOTE: This function does not support ie8 and ie8-
*
* @param {String} markup the html string that can be set as the innserHTML
* @param {Document} [context]
* of <body/>
* @return {Document} if returned value is null, you can follow the 2ed way.
*/
function parseHTML(markup, context) {
var doc,
parser,
win;
context = context || document;
if (context.implementation &&
context.implementation.createHTMLDocument) {
doc = context.implementation.createHTMLDocument();
doc.body.innerHTML = markup;
return doc;
}
win = context.defaultView || window;
if (win.DOMParser) {
parser = new win.DOMParser();
try {
doc = parser.parseFromString('', 'text/html');
} catch (ex) {
// do nothing
}
if (doc) {
doc.body.innerHTML = markup;
return doc;
}
}
};