首先,创建一个索引库,代码如下:
import posixpath
base = posixpath.abspath('.')
xapian_database_path = posixpath.join(base, u'index')
def index():
database = xapian.WritableDatabase(xapian_database_path, xapian.DB_CREATE_OR_OPEN)
indexer = xapian.TermGenerator()
stemmer = xapian.Stem(u'english')
indexer.set_stemmer(stemmer)
for s in ['abc', 'def']:
doc = xapian.Document()
doc.set_data(s)
# 设置该文字索引该 doc,我们可以查询 'abc', 'def'来分别查询出它们所索引的 doc
indexer.set_document(doc)
indexer.index_text(s)
database.add_document(doc)
database.flush()
搜索代码如下:
def search(query_string):
try:
database = xapian.Database(xapian_database_path)
except xapian.DatabaseOpeningError:
return
enquire = xapian.Enquire(database)
query_parser = xapian.QueryParser()
stemmer = xapian.Stem('english')
query_parser.set_stemmer(stemmer)
query_parser.set_database(database)
query_parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
query = query_parser.parse_query(query_string)
enquire.set_query(query)
matches = enquire.get_mset(0, 10)
print '%i results found.' % matches.get_matches_estimated()
print 'Results 1 - %i:' % matches.size()
for match in matches:
print '%i: %i%% docid=%i [%s]' % (match.rank+1,
match.percent,
match.docid,
match.document.get_data()
)
调用:
search('abc') # 搜索到数据
search('def') # 搜索到数据
saerch('XXXX') #搜索不到数据