lucene.net是.Net下的全文检索的工具包,不是应用,只是个类库,完成了全文检索的功能;预先把数据拆分成原子(字/词),保存到磁盘中;查询时把关键字也拆分成原子(字/词),再根据(字/词)进行匹配,返回结果。
Nuget安装“Lucene.Net”和“Lucene.Net.Analysis.PanGu”(盘古分词,一个第三方的分词器)
一、lucene.net七大对象
1、Analysis:分词器,负责把字符串拆分成原子,包含了标准分词,直接空格拆分。项目中用的是盘古中文分词,
2、Document:数据结构,定义存储数据的格式
3、Index:索引的读写类
4、QueryParser:查询解析器,负责解析查询语句
5、Search:负责各种查询类,命令解析后得到就是查询类
6、Store:索引存储类,负责文件夹等等
7、Util:常见工具类库
二、常用的查询方式
1、TermQuery:单元查询 ; new Term("title","张三") =》 title:张三
2、BooleanQuery:多条件查询;new Term("title","张三") and new Term("title","李四") =》 title:张三 + title:李四
new Term("title","张三") or new Term("title","李四") =》 title:张三 title:李四
3、WildcardQuery:通配符查询 ;new Term("title","张?") =》 title:张?
new Term("title","张*") =》 title:张*
4、PrefixQuery:前缀查询; 以xx开头 title:张*
5、PhraseQuery:间隔距离;包含“我”和“人”,但“我”和“人”之间的间隔不超过5个字符: title: "我 人"~5
6、FuzzyQuery:近似查询,ibhone----iphone title:ibhone~
7、NumericRangeQuery:范围查询; [1,100] {1,100}
三、读写范例
1、写
List<Commodity> commodityList = GetList();
FSDirectory directory = FSDirectory.Open(“d://Test/productIndex”);//文件夹
using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
{
foreach (Commodity commdity in commodityList)
{
Document doc = new Document();//一个Document对应一条数据
//一个Field对应一个字段:列名、值、是否保存原始值、是否分词,NOT_ANALYZED表示不分词,索引页中保存原始值
doc.Add(new Field("id", commdity.Id.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("title", commdity.Title, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("url", commdity.Url, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("imageurl", commdity.ImageUrl, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("content", "this is lucene working,powerful tool " + k, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new NumericField("price", Field.Store.YES, true).SetDoubleValue((double)(commdity.Price + k)));
//doc.Add(new NumericField("time", Field.Store.YES, true).SetLongValue(DateTime.Now.ToFileTimeUtc()));
doc.Add(new NumericField("time", Field.Store.YES, true).SetIntValue(int.Parse(DateTime.Now.ToString("yyyyMMdd")) + k));
writer.AddDocument(doc);//写到缓冲区中去
}
writer.Optimize();//优化,就是合并
}
2、读取
FSDirectory dir = FSDirectory.Open(StaticConstant.TestIndexPath);
IndexSearcher searcher = new IndexSearcher(dir);//查找器
//1、直接根据关键字查找
{
TermQuery query = new TermQuery(new Term("title", "图书馆"));//包含
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
}
}
//2、将关键字解析成查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器,用于解析关键字
{
string keyword = "这是一只真实的二哈呀";
{
Query query = parser.Parse(keyword);
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
}
}
//3、根据price字段过滤,查询价格在300-1200之间的数据,并按照price和time字段排序
{
Query query = parser.Parse(keyword);
NumericRangeFilter<int> timeFilter = NumericRangeFilter.NewIntRange("price", 300, 1200, true, true);//过滤
SortField sortPrice = new SortField("price", SortField.DOUBLE, false);//降序
SortField sortTime = new SortField("time", SortField.INT, true);//升序
Sort sort = new Sort(sortTime, sortPrice);//排序 哪个前哪个后
TopDocs docs = searcher.Search(query, timeFilter, 10000, sort);//找到的数据
int i = -1;
foreach (ScoreDoc sd in docs.ScoreDocs)
{
i++;
if (i>=0 && i < 20) //可以在这里写分页,
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
}
}
}
}