最近对python项目进行版本升级,之前用的低版本的pylucne(3版本),现在用的是pylucene8.1.1。然后遇到了一些问题。
1.导包问题:
低版本的导包:
from lucene import SimpleFSDirectory, System, File,\
Document, Field,CJKAnalyzer, IndexWriter, Version, \
IndexSearcher,QueryParser,BooleanQuery,BooleanClause,\
MultiFieldQueryParser,RAMDirectory
高版本的导包,如果还这样就会报错:
更改如下:
import lucene
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.index import IndexWriter,DirectoryReader
from org.apache.lucene.store import SimpleFSDirectory,RAMDirectory,FSDirectory
from org.apache.lucene.document import Document, Field
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.util import Version
from org.apache.lucene.search import BooleanQuery,BooleanClause
from org.apache.lucene.analysis.cjk import CJKAnalyzer
from org.apache.lucene.queryparser.classic import MultiFieldQueryParser
# from java.io import File
from java.nio.file import Paths
from java.lang import System
这里需要你重新去查一下API文档,找到对应的包。
需要指出的是低版本File需要用Paths来替换。
如:
#SimpleFSDirectory(File(INDEX_DIR_COMMON))
SimpleFSDirectory(Paths.get(INDEX_DIR_COMMON))
2.CJKAnalyzer模块
高版本导包:
from org.apache.lucene.analysis.cjk import CJKAnalyzer
CJKAnalyzer(Version.LUCENE_CURRENT)在高版本无法使用,我换成Version.LUCENE_8_1_1也不行,最后需要这样:
#lucene_analyzer= CJKAnalyzer(Version.LUCENE_CURRENT)
lucene_analyzer= CJKAnalyzer()
3.QueryParser模块:
#curQuery=QueryParser(Version.LUCENE_CURRENT,k,lucene_analyzer).parse(v)
curQuery=QueryParser(k,lucene_analyzer).parse(v)
4.Document模块
低版本:
for item in res:
doc1 = Document()
doc2 = Document()
doc1.add(Field("pid",item.PATIENT_NO,Field.Store.YES,Field.Index.NO))
doc2.add(Field("pid",item.PATIENT_NO,Field.Store.YES,Field.Index.NO))
高版本:
t1 = FieldType()
t1.setStored(True)
t1.setTokenized(False)
t1.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
t2 = FieldType()
t2.setStored(True)
t2.setTokenized(False)
t2.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
for item in res:
doc1 = Document()
doc2 = Document()
doc1.add(Field("pid", item.PATIENT_NO, t1))
doc2.add(Field("pid", item.PATIENT_NO, t2))
5.MultiFieldQueryParser模块
#mutiQuery=MultiFieldQueryParser(Version.LUCENE_CURRENT,allFields,lucene_analyzer)
mutiQuery=MultiFieldQueryParser(allFields,lucene_analyzer)
6.RAMDirectory模块
低版本:
com_dir= SimpleFSDirectory(File(INDEX_DIR_COMMON))
global lucene_analyzer
lucene_analyzer= CJKAnalyzer(Version.LUCENE_CURRENT)
global com_searcher
if com_searcher is None:
ramDir = RAMDirectory(com_dir)
com_searcher = IndexSearcher(ramDir)
我改了改传入RAMDirectory的参数,但是还是报错,就把这个函数给替换了。
com_dir= FSDirectory.open(Paths.get(INDEX_DIR_COMMON))
neg_dir= FSDirectory.open(Paths.get(INDEX_DIR_NEGATE))
global lucene_analyzer
lucene_analyzer= CJKAnalyzer()
global com_searcher
if com_searcher is None:
# ramDir = RAMDirectory(com_dir)
com_searcher=IndexSearcher(DirectoryReader.open(com_dir))
7.BooleanQuery模块
低版本
neg_bQuery = BooleanQuery()
for k in negateKwDict:
if (operator.ne(k,'all')):
for v in negateKwDict[k]:
curQuery=QueryParser(Version.LUCENE_CURRENT,k,lucene_analyzer).parse(v)
neg_bQuery.add(curQuery,BooleanClause.Occur.SHOULD)
#........省略部分代码
global neg_searcher
neg_hits=neg_searcher.search(neg_bQuery,maxcount)
高版本:
neg_bQuery = BooleanQuery.Builder()
for k in negateKwDict:
if (operator.ne(k,'all')):
for v in negateKwDict[k]:
curQuery= QueryParser(k,lucene_analyzer).parse(v)
neg_bQuery.add(curQuery,BooleanClause.Occur.SHOULD)
#.......省略部分代码
global neg_searcher
neg_hits=neg_searcher.search(neg_bQuery.build(),maxcount)
注意这里BooleanQuery.Builder()返回的是BooleanQuery.Builder对象。使用build()函数将返回BooleanQuery,才能被search()函数使用。