原文地址:https://my.oschina.net/harmel/blog/491159
本来是准备使用Lucene的但是新版本的API过于繁琐,最后还是决定使用Hibernate Search来实现全文检索。这篇博文以我以前做的博客为例来实现全文检索。
1、修改Hibernate配置文件,因为我的系统采用的是SSH2来开发的所以我修改的是spring配置文件
<bean id="sessionFactory" class="org.springframework.orm.hibernate4.LocalSessionFactoryBean">
<property name="dataSource" ref="dataSource" />
<property name="hibernateProperties">
<props>
<prop key="hibernate.dialect">org.hibernate.dialect.MySQL5Dialect</prop>
<prop key="hibernate.hbm2ddl.auto">update</prop>
<prop key="hibernate.show_sql">true</prop>
<prop key="hibernate.search.default.directory_provider">filesystem</prop>
<prop key="hibernate.search.default.indexBase">E:/index</prop>
</props>
</property>
<property name="mappingResources">
<list>
<value>cn/harmel/blog/domain/User.hbm.xml</value>
<value>cn/harmel/blog/domain/Category.hbm.xml</value>
<value>cn/harmel/blog/domain/Article.hbm.xml</value>
<value>cn/harmel/blog/domain/Comment.hbm.xml</value>
<value>cn/harmel/blog/domain/Attachment.hbm.xml</value>
</list>
</property>
</bean>
其实就是配置如下两个属性:
hibernate.search.default.directory_provider = filesystem
hibernate.search.default.indexBase = 索引存储目录
2、给实体标注注解
package cn.harmel.blog.domain;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import org.hibernate.search.annotations.Analyzer;
import org.hibernate.search.annotations.DocumentId;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Indexed;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 文章
*
* @author Harmel
*
*/
@Indexed
@Analyzer(impl = IKAnalyzer.class)
public class Article {
@DocumentId
private Long id;
@Field
private String title;
@Field
private String content;
@Field
private String description;
private Date postTime;
private Date lastEditTime;
private int viewCount;
private Category category;
private Set<Comment> comments = new HashSet<Comment>();
private Set<Attachment> attachments = new HashSet<Attachment>();
// 此处省略一些getter和setter方法
//........
}
注解说明:
@Indexed:让实体支持索引
@Analyzer :设置分词器,我这里使用的是开源的IK中文分词器
@DocumentID:索引文档ID
@Field :索引字段,该注解默认属性值为
store=Store.NO:是否将数据存储在索引中,经实验无论store=Store.NO还是store=Store.YES都不会影响最终的搜索。如果store=Store.NO值是通过数据库中获取,如果store=Store.YES值是直接从索引文档中获取。
index=Index.YES:是否索引
analyze=Analyze.YES:是否分词
标注了注解后的实体在保存和更新的时候,会自动生成或修改索引。
3、查询索引
public PageModel<Article> searchArticle(int pageNum, int pageSize, String keyword) {
FullTextSession fts = Search.getFullTextSession(sessionFactory.getCurrentSession());
QueryBuilder qb = fts.getSearchFactory().buildQueryBuilder().forEntity(Article.class).get();
Query luceneQuery = qb.keyword().onFields("title", "content", "description").matching(keyword).createQuery();
FullTextQuery query = fts.createFullTextQuery(luceneQuery, Article.class);
query.setFirstResult((pageNum - 1) * pageSize);
query.setMaxResults(pageSize);
List<Article> data = query.list();
//封装分页数据
PageModel<Article> model = new PageModel<>(pageNum, pageSize, data.size());
//将数据高亮
model.setData(SearchUtils.hightLight(luceneQuery, data, "title", "content", "description"));
return model;
}
将数据高亮工具方法
/**
* 高亮显示文章
*
* @param query {@link org.apache.lucene.search.Query}
* @param data 未高亮的数据
* @param fields 需要高亮的字段
* @return 高亮数据
*/
public static List<Article> hightLight(Query query, List<Article> data, String... fields) {
List<Article> result = new ArrayList<Article>();
Formatter formatter = new SimpleHTMLFormatter("<b style=\"color:red\">", "</b>");
QueryScorer queryScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, queryScorer);
// 使用IK中文分词
Analyzer analyzer = new IKAnalyzer();
for (Article a : data) {
// 构建新的对象进行返回,避免页面错乱(我的页面有错乱)
Article article = new Article();
for (String fieldName : fields) {
// 获得字段值,并给新的文章对象赋值
Object fieldValue = ReflectionUtils
.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, fieldName).getReadMethod(),a);
ReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, fieldName).getWriteMethod(),
article, fieldValue);
String hightLightFieldValue = null;
try {
hightLightFieldValue = highlighter.getBestFragment(analyzer, fieldName, String.valueOf(fieldValue));
} catch (Exception e) {
throw new RuntimeException("高亮显示关键字失败", e);
}
// 如果高亮成功则重新赋值
if (hightLightFieldValue != null) {
ReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, fieldName).getWriteMethod(),
article,hightLightFieldValue);
}
}
// 赋值ID
ReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, "id").getWriteMethod(),
article, a.getId());
result.add(article);
}
return result;
}
4、页面迭代显示
<s:iterator value="#request.pageModel.data">
<div class="article">
<div class="article_title_area">
<span class="article_title"><a href="${pageContext.request.contextPath }/article/show.action?id=${id }">${title }</a></span>
<span class="article_date">发表时间:<s:date name="postTime" format="yyyy-MM-dd HH:mm:ss"/></span>
</div>
<div class="article_content">${description }</div>
<div class="article_count_info">
<span>阅读(${viewCount })</span>
<span>评论(${comments.size() })</span>
</div>
</div>
</s:iterator>