Clucene中StandardAnalyzer用到了职责链模式,相关代码如下:
class CLUCENE_EXPORT StandardTokenizer: public Tokenizer {
Token* next(Token* token);
}
class CLUCENE_EXPORT StandardFilter: public TokenFilter{
public:
// Construct filtering <i>in</i>.
StandardFilter(TokenStream* in, bool deleteTokenStream);
virtual ~StandardFilter();
Token* next(Token* token);
};
class CLUCENE_EXPORT LowerCaseFilter: public TokenFilter {
public:
LowerCaseFilter(TokenStream* in, bool deleteTokenStream);
virtual ~LowerCaseFilter();
Token* next(Token* token);
};
class CLUCENE_EXPORT StopFilter: public TokenFilter {
StopFilter(TokenStream* in, bool deleteTokenStream, const TCHAR** _stopWords, const bool _ignoreCase = false);
virtual ~StopFilter();
Token* next(Token* token);
}
使用者:
TokenStream* StandardAnalyzer::tokenStream(const TCHAR* /*fieldName*/, Reader* reader)
{
BufferedReader* bufferedReader = reader->__asBufferedReader();
TokenStream* ret;
if ( bufferedReader == NULL )
ret = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, false), true );
else
ret = _CLNEW StandardTokenizer(bufferedReader);
//ret->setMaxTokenLength(maxTokenLength);
ret = _CLNEW StandardFilter(ret,true);
ret = _CLNEW LowerCaseFilter(ret,true);
ret = _CLNEW StopFilter(ret,true, stopSet);
return ret;
}
最后返回的是StopFilter对象
当客户端:
while ( pStream->next(&t) != NULL )
{
}调用 ,StopFilter对象最先调用
StandardAnalyzer 类主要工作Token* StandardTokenizer::next(Token* t) 进行分词
Token* StandardFilter::next(Token* t) 主要是规范化 去掉.及's这样的
Token* LowerCaseFilter::next(Token* t) 大小写格式化
Token* StopFilter::next(Token* token) 去掉停用词
还有
TokenStream* StandardAnalyzer::reusableTokenStream(const TCHAR* fieldName, Reader* reader){
SavedStreams* streams = reinterpret_cast<SavedStreams*>(getPreviousTokenStream());
if (streams == NULL) {
streams = _CLNEW SavedStreams();
setPreviousTokenStream(streams);
BufferedReader* bufferedReader = reader->__asBufferedReader();
if ( bufferedReader == NULL )
streams->tokenStream = _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, false), true);
else
streams->tokenStream = _CLNEW StandardTokenizer(bufferedReader);
streams->filteredTokenStream = _CLNEW StandardFilter(streams->tokenStream, true);
streams->filteredTokenStream = _CLNEW LowerCaseFilter(streams->filteredTokenStream, true);
streams->filteredTokenStream = _CLNEW StopFilter(streams->filteredTokenStream, true, stopSet);
} else {
streams->tokenStream->reset(reader);
}
//streams->tokenStream->setMaxTokenLength(maxTokenLength);
return streams->filteredTokenStream;
}