当前位置: 首页 > 工具软件 > zhparser > 使用案例 >

zhparser 处理中文全文检索之一

阎安邦
2023-12-01

os: centos 7.4
db: postgresql 10.11

版本

# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core) 
# 
# 
# su - postgres
Last login: Wed Jan 15 18:34:12 CST 2020 on pts/0
$
$
$ psql -c "select version();"
                                                 version                                                  
----------------------------------------------------------------------------------------------------------
 PostgreSQL 10.11 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)


下载安装 SCWS

# cd /opt
# wget http://www.xunsearch.com/scws/down/scws-1.2.3.tar.bz2
# tar -jxvf ./scws-1.2.3.tar.bz2
# cd scws-1.2.3/
# ./configure 
# make 
# make install

# ldconfig

下载安装 zhparser

# cd /opt
# wget https://github.com/amutu/zhparser/archive/v0.2.0.tar.gz
# tar -zxvf ./v0.2.0.tar.gz
# cd zhparser-0.2.0/

# export PGHOME=/usr/pgsql-10;
export PGDATA=/var/lib/pgsql/10/data;

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PGHOME}/lib;
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/lib:/usr/lib:/usr/local/lib;
export PATH=${PGHOME}/bin:$PATH;
export MANPATH=${PGHOME}/share/man:$MANPATH;


# SCWS_HOME=/usr/local make
# SCWS_HOME=/usr/local make install

make 的日志如下

gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector-strong --param=ssp-buffer-size=4 -grecord-gcc-switches -m64 -mtune=generic -fPIC -I/usr/local/include/scws  -I. -I./ -I/usr/pgsql-10/include/server -I/usr/pgsql-10/include/internal  -D_GNU_SOURCE -I/usr/include/libxml2  -I/usr/include  -c -o zhparser.o zhparser.c
zhparser.c: In function ‘init’:
zhparser.c:210:6: warning: implicit declaration of function ‘SplitIdentifierString’ [-Wimplicit-function-declaration]
      if(!SplitIdentifierString(extra_dicts,',',&elemlist)){
      ^
gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector-strong --param=ssp-buffer-size=4 -grecord-gcc-switches -m64 -mtune=generic -fPIC -shared -o zhparser.so zhparser.o -L/usr/pgsql-10/lib  -Wl,--as-needed  -L/usr/lib64 -Wl,--as-needed -Wl,-rpath,'/usr/pgsql-10/lib',--enable-new-dtags  -lscws -L/usr/local/lib -Wl,-rpath -Wl,/usr/local/lib

make install 的日志如下

/bin/mkdir -p '/usr/pgsql-10/lib'
/bin/mkdir -p '/usr/pgsql-10/share/extension'
/bin/mkdir -p '/usr/pgsql-10/share/extension'
/bin/mkdir -p '/usr/pgsql-10/share/tsearch_data'
/bin/install -c -m 755  zhparser.so '/usr/pgsql-10/lib/zhparser.so'
/bin/install -c -m 644 .//zhparser.control '/usr/pgsql-10/share/extension/'
/bin/install -c -m 644 .//zhparser--1.0.sql .//zhparser--unpackaged--1.0.sql  '/usr/pgsql-10/share/extension/'
/bin/install -c -m 644 .//dict.utf8.xdb .//rules.utf8.ini '/usr/pgsql-10/share/tsearch_data/'

初步使用

# su - postgres
$ psql 
psql (10.11)
Type "help" for help.

postgres=# create extension zhparser;

postgres=# \dx zhparser
                      List of installed extensions
   Name   | Version | Schema |               Description                
----------+---------+--------+------------------------------------------
 zhparser | 1.0     | public | a parser for full-text search of Chinese
(1 row)

postgres=# \dx+ zhparser
           Objects in extension "zhparser"
                  Object description                  
------------------------------------------------------
 function zhprs_end(internal)
 function zhprs_getlexeme(internal,internal,internal)
 function zhprs_lextype(internal)
 function zhprs_start(internal,integer)
 text search parser zhparser
(5 rows)

postgres=# CREATE TEXT SEARCH CONFIGURATION testzhcfg (PARSER = zhparser);  

postgres=# ALTER TEXT SEARCH CONFIGURATION testzhcfg ADD MAPPING FOR n,v,a,i,e,l WITH simple; 

参考:
https://github.com/amutu/zhparser
http://blog.amutu.com/zhparser/
http://www.xunsearch.com/scws/

 类似资料: