jQuery源码分析 Sizzle选择器

终安和

2023-12-01

jQuery版本 version = "1.11.1"

前言

Sizzle是jQuery里面的选择器引擎，在jQuery版本1.11.1中。Sizzle 这部分可以看做jQuery中相对独立的一部分，大概有2000多行的代码。Sizzle选择器非常注重效率，优先使用浏览器自带的选择器进行选择比如： getElementById 、getElementsByTagName、getElementsByClassName。如果selector比较复杂，首先判断浏览器中的高级选择器querySelectorAll是否可靠，如果可靠就直接使用 querySelectorAll。现在的很多高级浏览器比如Firefox、Chrome等一般都可以直接使用querySelectorAll就可以完成选择。除此之外的情况则使用Sizzle.select = function( selector, context, results, seed )方法。Sizzle.select这部分的内容比较复杂，基本上模拟了类似于querySelectorAll的功能。

由于浏览器的种类多种多样，不同浏览器的不同版本对一些JavaScript函数的支持情况不一致，有些则存在一些bug。比如有些环境使用getElementsByClassName不能获取到<div class='a i'></div>后面class为i的这个<div>。Sizzle.setDocument这个函数在的作用大致是是检测当前环境对各个选择器的支持情况比如：support.attributes检测support.attributes是否可靠。若支持则返回true，否者范围false。

jQuery中Sizzle的大体框架

Sizzle作为jQuery中相对独立的一部分，省略掉枝叶，大致有以下主要内容。（这里只是简要注释了一下主要函数）

var Sizzle =
/*!
 * Sizzle CSS Selector Engine v1.10.19
 * http://sizzlejs.com/
 */
(function( window ) {
	
	//Sizzle选择器，如果getElementById 、getElementsByTagName、getElementsByClassName、querySelectorAll不能处理
	//则调用Sizzle.select 
	function Sizzle( selector, context, results, seed ) {
		//...
	}；
	
	//探测浏览器对各个方法属性的支持情况保存在support对象中，并且给Sizzle选择器的匹配关键变量添加匹配相关的hooks
	Sizzle.setDocument = function( node ) {
		//...
	}；
	
	//根据selector进行正则化匹配，生成结构化数组
	Sizzle.tokenize = function( selector, parseOnly ) {
		//...
	}
	
	//  ">": { dir: "parentNode", first: true },
	//	" ": { dir: "parentNode" },
	//	"+": { dir: "previousSibling", first: true },
	//	"~": { dir: "previousSibling" }
	//如果selector中存在上诉关系，则通过addCombinator找到对应对应节点，再进行匹配
	function addCombinator( matcher, combinator, base ) {
		//...
	}
	
	//matchers作为匹配条件，如果存在多个匹配条件则遍历所有条件。只有全部成立的时候返回true。
	function elementMatcher( matchers ) {
		//...
	}
	
	//
	function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postSelector ) {
		//...
	}
	
	//根据结构化数组tokens ,遍历及处理返回由elementMatcher处理处理过的函数
	function matcherFromTokens( tokens ) {
		//...	
	}
	
	function matcherFromGroupMatchers( elementMatchers, setMatchers ) {
		//...
	}
	
	//编译部分
	Sizzle.compile = function( selector, match /* Internal Use Only */ ) {
		//...
		//
		// Cache the compiled function
		cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) );

		// Save selector and tokenization
		cached.selector = selector;
	}
	
	//关系选择器（>,+,' ',~）
	
	//调用tokenize（selector），将selector生成结构化数组
	//减少范围
	//1:如果根选择器是一个ID,而且不是类似于#ID+  或者 #ID~ 这种形式就设置新的context以减小查找范围
	//2:选取seed种子，从右向左查找一个非关系实体选择器作为selector，并在context中查找满足该selector的数组作为seed
	//调用Sizzle.compile进行编译
	Sizzle.select = function( selector, context, results, seed ) {
		//...
	}
	
	//直接运行setDocument，检测环境
	setDocument();
	
	return Sizzle;

})( window );


jQuery.find = Sizzle;

思路：先准备备选种子【Sizzle最终执行结果是其子集】elems（指定或者document.getElementsByTagName("*")）;对每一个tokens的词语，我都有相应的匹配函数来判断某个备选种子是否满足条件【分为两种情况：实体选择器（除开关系选择器外的其他选择器）直接比较种子是否满足实体条件即可；关系选择器（">"/"+"/" "/"~"）需要和前一个实体选择器共同组成一个判断函数】，我们将所有的匹配函数连接起来返回一个整体的匹配函数，最后我们将所有的种子一一遍历，代入这个整体匹配函数中执行，返回真表示是我们需要的结果，返回假直接剔除即可。遍历匹配完所有的种子，我们就得到了想要的结果了。
并将结果保存在缓存数组中。

举一个简单的例子："p > span",假设"span"的匹配函数为match1，"p > "的匹配函数为match2，那么我必须满足匹配条件：allMatch = match1+match2才是我们想要的结果，需要注意的是我们添加最终匹配函数的时候是根据CSS选择器从左到右添加，但是执行最终匹配函数的时候是确实从右到左执行的（match2 -> match1）。遍历种子集合seeds,将seeds[0]代入allMatch中执行，返回结果为true则保存起来，返回结果为false则略过；接着讲seeds[1]代入allMatch中执行……

Sizzle的调用入口

说道Sizzle选择器，我们需要再次回到jQuery.fn.init这个函数, 看看jQuery初始化时对不同参数的处理情况。jQuery.fn.init中大致有这9中情况。

jQuery.find = Sizzle;

jQuery.fn.find=function( selector ) {
		var i,
			ret = [],
			self = this,
			len = self.length;

		if ( typeof selector !== "string" ) {
			return this.pushStack( jQuery( selector ).filter(function() {
				for ( i = 0; i < len; i++ ) {
					if ( jQuery.contains( self[ i ], this ) ) {
						return true;
					}
				}
			}) );
		}

		for ( i = 0; i < len; i++ ) {
			//Sizzle选择器入口
			jQuery.find( selector, self[ i ], ret );
		}

		ret = this.pushStack( len > 1 ? jQuery.unique( ret ) : ret );
		ret.selector = this.selector ? this.selector + " " + selector : selector;
		return ret;
	}；
	
jQuery.fn.init = function( selector, context ) {
		var match, elem;

		// 情况1: $(""), $(null), $(undefined), $(false)
		if ( !selector ) {
			return this;
		}

		if ( typeof selector === "string" ) {
			if ( selector.charAt(0) === "<" && selector.charAt( selector.length - 1 ) === ">" && selector.length >= 3 ) {
				//假设strings以 <> 作为开头和结尾的是 HTML ，并且跳过正则表达式检查
				match = [ null, selector, null ];

			} else {
				//rquickExpr = /^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/
				//正则表达式检查是否为<***> 或者#id 形式
				match = rquickExpr.exec( selector );
			}

			if ( match && (match[1] || !context) ) {

				// 情况2: $(html) -> $(array)
				if ( match[1] ) {
					context = context instanceof jQuery ? context[0] : context;

					jQuery.merge( this, jQuery.parseHTML(
						match[1],
						context && context.nodeType ? context.ownerDocument || context : document,
						true
					) );

					// 情况3: $(html, props)
					if ( rsingleTag.test( match[1] ) && jQuery.isPlainObject( context ) ) {
						for ( match in context ) {
	
							if ( jQuery.isFunction( this[ match ] ) ) {
								this[ match ]( context[ match ] );

							} else {
								this.attr( match, context[ match ] );
							}
						}
					}

					return this;

				// 情况4: $(#id)
				} else {
					elem = document.getElementById( match[2] );

					// Check parentNode to catch when Blackberry 4.6 returns
					// nodes that are no longer in the document #6963
					if ( elem && elem.parentNode ) {
						// Handle the case where IE and Opera return items
						// by name instead of ID
						if ( elem.id !== match[2] ) {
							return rootjQuery.find( selector );   
						}

						this.length = 1;
						this[0] = elem;
					}

					this.context = document;
					this.selector = selector;
					return this;
				}

			// 情况5: $(expr, $(...))
			} else if ( !context || context.jquery ) {
				return ( context || rootjQuery ).find( selector );   

			// 情况6: $(expr, context)
			// (which is just equivalent to: $(context).find(expr)
			} else {
				return this.constructor( context ).find( selector );
			}

		// 情况7: $(DOMElement)
		} else if ( selector.nodeType ) {
			this.context = this[0] = selector;
			this.length = 1;
			return this;

		// 情况8: $(function)
		} else if ( jQuery.isFunction( selector ) ) {
			return typeof rootjQuery.ready !== "undefined" ?
				rootjQuery.ready( selector ) :
				selector( jQuery );
		}

		//情况9
		if ( selector.selector !== undefined ) {
			this.selector = selector.selector;
			this.context = selector.context;
		}

		return jQuery.makeArray( selector, this );
	};

情况1：$(""), $(null), $(undefined), $(false)

参数为假值时，直接返回自身实例

情况2：$(html) -> $(array)

参数为html的字符串时，使用parseHTML对其进行解析

情况3： $(html, props)

比如在下面的例子中，创建一个div元素，并设置类样式为“test”、设置文本内容为“Click me!”、绑定一个click事件，然后插入body节点的末尾，当点击该div元素时，还会切换类样式test。

$("<div/>", {  
   "class": "test",  
   text: "Click me!",  
   click: function(){  
     $(this).toggleClass("test");  
   }  
}).appendTo("body");

我们可以在源码中看到，它遍历了prop。做了类似下面的处理

var $cc=$("<div/>");
$cc.attr('class','test');
$cc.attr(text,"Click me!");
$cc.click(
 function(){  
     $(this).toggleClass("test");  
      }  
);

情况4：$(#id)

match[2]不为空，说明selector是”#id”形式。直接使用getElementById获取到DOM对象。确保改ele在当前文档结构中

情况:5： $(expr,$(...))

如果context为假值，则返回$(‘document’).find(seletor) 否者返回( context ).find( selector );

情况6： $(expr, context)

如果提供的context不是jQuery对象，则返回$(context).find(expr)

情况7： $(DOMElement)

为DOMElement时，直接设置length、this[0]、context就可以返回

情况8: $(function)

如果参数是一个function，这个时候就为$(document).ready(function);等待网页中的所有DOM结构绘制完毕之后，运行function。因此$(document).ready(function);可以简写为$(function);

情况9:$($(***))

如果传入的selector本身就是一个jQuery对象，则直接拷贝一份并返回。返回的并不是原来的jQuery对象。

可以看到上述情况中，情况5和情况6会用到sizzle

Sizzle中的缓存

Sizzle中的具体代码分析这里就不详细分析了，Sizzle很注重效率。每次进行编译、生成token的过程中都会首先查询缓存，或者以key/value的形式存入缓存。Sizzle中缓存的运用有以下部分。

var	tokenCache = createCache()；
var	compilerCache = createCache()；

function createCache() {
	var keys = [];
	function cache( key, value ) {
		// Use (key + " ") to avoid collision with native prototype properties (see Issue #157)
		if ( keys.push( key + " " ) > Expr.cacheLength ) {
			// 只保留最新的条目
			delete cache[ keys.shift() ];
		}
		return (cache[ key + " " ] = value);
	}
	return cache;
}

Sizzle.tokenize = function( selector, parseOnly ) {
	//省略
	var  cached = tokenCache[ selector + " " ];
	if ( cached ) {
		return parseOnly ? 0 : cached.slice( 0 );
	}
    //省略
	tokenCache( selector, groups ).slice( 0 );
};

compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) {
	//省略
		var cached = compilerCache[ selector + " " ];

	if ( !cached ) {
	//省略
		cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) );
		cached.selector = selector;
	}
	return cached;
};

可以看到createCache很明显地使用了JavaScript闭包，返回一个function，并且都可以访问和修改局部变量keys。jQuery中默认的cacheLength为50,如果keys数组的长度超过了cacheLength，则删除在数组前面的key,保有最近查询用到的key。同时为了避免key值与原生的一些属性名冲突，对key做了处理：key + " "

这里要提到一个小技巧，它将key值存在一个数组中，而没有将value值也存在一个数组中。是作为tokenCache或者compilerCache的静态属性或者方法。

jQuery源码分析 Sizzle选择器

前言

jQuery中Sizzle的大体框架

Sizzle的调用入口

Sizzle中的缓存

相关阅读

相关文章

相关问答

相关文档