laravel+mysql+sphinx

谢誉
2023-12-01

mkdir /usr/local/sphinx -p

cd /usr/local/sphinx/

wget http://sphinxsearch.com/files/sphinx-2.2.11-release.tar.gz

tar -zvxf sphinx-2.2.11-release.tar.gz 
cd sphinx-2.2.11-release/
./configure 
make && make install


searchd -h //有提示即为成功

配置文件目录: /usr/local/etc/
#修改sphinx.conf -- 只记录配置支持中文索引,其他配置网上有
# local index example (没有就新增,修改一下配置,才支持中文索引)
1、 min_word_len     = 1
2、 min_prefix_len       = 1
3、 ngram_len            = 1
4、
    charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101,U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109,U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F,U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115,U+0116->U+0117,U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D,U+011D,U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133,U+0134->U+0135,U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C,U+013C,U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142,U+0143->U+0144,U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B,U+014B,U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151,U+0152->U+0153,U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159,U+0159,U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F,U+0160->U+0161,U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167,U+0167,U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D,U+016E->U+016F,U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175,U+0175,U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C,U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F,U+0430..U+044F,U+05D0..U+05EA, U+0531..U+0556->U+0561..U+0586, U+0561..U+0587, U+0621..U+063A, U+01B9,U+01BF, U+0640..U+064A, U+0660..U+0669, U+066E, U+066F, U+0671..U+06D3, U+06F0..U+06FF,U+0904..U+0939, U+0958..U+095F, U+0960..U+0963, U+0966..U+096F, U+097B..U+097F,U+0985..U+09B9, U+09CE, U+09DC..U+09E3, U+09E6..U+09EF,U+0A05..U+0A39, U+0A59..U+0A5E,U+0A66..U+0A6F, U+0A85..U+0AB9, U+0AE0..U+0AE3,U+0AE6..U+0AEF, U+0B05..U+0B39,U+0B5C..U+0B61, U+0B66..U+0B6F, U+0B71, U+0B85..U+0BB9,U+0BE6..U+0BF2, U+0C05..U+0C39,U+0C66..U+0C6F, U+0C85..U+0CB9, U+0CDE..U+0CE3,U+0CE6..U+0CEF, U+0D05..U+0D39, U+0D60,U+0D61, U+0D66..U+0D6F, U+0D85..U+0DC6,U+1900..U+1938, U+1946..U+194F, U+A800..U+A805,U+A807..U+A822, U+0386->U+03B1,U+03AC->U+03B1, U+0388->U+03B5, U+03AD->U+03B5,U+0389->U+03B7, U+03AE->U+03B7,U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,U+03AF->U+03B9, U+03CA->U+03B9,U+038C->U+03BF, U+03CC->U+03BF, U+038E->U+03C5,U+03AB->U+03C5, U+03B0->U+03C5,U+03CB->U+03C5, U+03CD->U+03C5, U+038F->U+03C9,U+03CE->U+03C9, U+03C2->U+03C3, U+0391..U+03A1->U+03B1..U+03C1,U+03A3..U+03A9->U+03C3..U+03C9, U+03B1..U+03C1,U+03C3..U+03C9, U+0E01..U+0E2E,U+0E30..U+0E3A, U+0E40..U+0E45, U+0E47, U+0E50..U+0E59, U+A000..U+A48F, U+4E00..U+9FBF,U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,U+2F800..U+2FA1F, U+2E80..U+2EFF,U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,U+3040..U+309F, U+30A0..U+30FF,U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, U+3130..U+318F, U+A000..U+A48F,U+A490..U+A4CF 
5、  
    ngram_chars =U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,U+3040..U+309F, U+30A0..U+30FF, U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF

# 创建索引
/usr/local/bin/indexer -c /usr/local/etc/sphinx.conf --all

#启动后台守护进程
/usr/local/bin/searchd -c /usr/local/etc/sphinx.conf

#停止正在运行的searchd
/usr/local/bin/searchd --config sphinx.conf  --stop

查看端口情况
#ps aux|grep searchd


测试:

进入SphinxQL:

mysql -h0 -ppwd -P9306


select * from 索引名 where match('words');




laravel支持sphinx:

https://github.com/sngrl/sphinxsearch


配置文件详细内容

#
# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#

#############################################################################
## data source definition
#############################################################################

source all_hotel_info_src
{
    # data source type. mandatory, no default value
    # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
    type            = mysql

    #####################################################################
    ## SQL settings (for 'mysql' and 'pgsql' types)
    #####################################################################

    # some straightforward parameters for SQL source types
    sql_host        = localhost
    sql_user        = aaa
    sql_pass        = aaaaaa
    sql_db          = aaa
    sql_port        = 3306  # optional, default is 3306

    # UNIX socket name
    # optional, default is empty (reuse client library defaults)
    # usually '/var/lib/mysql/mysql.sock' on Linux
    # usually '/tmp/mysql.sock' on FreeBSD
    #
    # sql_sock      = /tmp/mysql.sock


    # MySQL specific client connection flags
    # optional, default is 0
    #
    # mysql_connect_flags   = 32 # enable compression

    # MySQL specific SSL certificate settings
    # optional, defaults are empty
    #
    # mysql_ssl_cert        = /etc/ssl/client-cert.pem
    # mysql_ssl_key     = /etc/ssl/client-key.pem
    # mysql_ssl_ca      = /etc/ssl/cacert.pem

    # MS SQL specific Windows authentication mode flag
    # MUST be in sync with charset_type index-level setting
    # optional, default is 0
    #
    # mssql_winauth     = 1 # use currently logged on user credentials


    # ODBC specific DSN (data source name)
    # mandatory for odbc source type, no default value
    #
    # odbc_dsn      = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
    # sql_query     = SELECT id, data FROM documents.csv


    # ODBC and MS SQL specific, per-column buffer sizes
    # optional, default is auto-detect
    #
    # sql_column_buffers    = content=12M, comments=1M


    # pre-query, executed before the main fetch query
    # multi-value, optional, default is empty list of queries
    #
     sql_query_pre      = SET NAMES utf8
    # sql_query_pre     = SET SESSION query_cache_type=OFF


    # main document fetch query
    # mandatory, integer document ID field MUST be the first selected column
    sql_query       = \
        SELECT id, inner_hotel_id, hotel_name \
        FROM aaa

    sql_attr_uint           =id
    sql_field_string        =hotel_name

    # joined/payload field fetch query
    # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
    # payload fields let you attach custom per-keyword values (eg. for ranking)
    #
    # syntax is FIELD-NAME 'from'  ( 'query' | 'payload-query' ); QUERY
    # joined field QUERY should return 2 columns (docid, text)
    # payload field QUERY should return 3 columns (docid, keyword, weight)
    #
    # REQUIRES that query results are in ascending document ID order!
    # multi-value, optional, default is empty list of queries
    #
    # sql_joined_field  = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
    # sql_joined_field  = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC


    # file based field declaration
    #
    # content of this field is treated as a file name
    # and the file gets loaded and indexed in place of a field
    #
    # max file size is limited by max_file_field_buffer indexer setting
    # file IO errors are non-fatal and get reported as warnings
    #
    # sql_file_field        = content_file_path


    # range query setup, query that must return min and max ID values
    # optional, default is empty
    #
    # sql_query will need to reference $start and $end boundaries
    # if using ranged query:
    #
    # sql_query     = \
    #   SELECT doc.id, doc.id AS group, doc.title, doc.data \
    #   FROM documents doc \
    #   WHERE id>=$start AND id<=$end
    #
    # sql_query_range       = SELECT MIN(id),MAX(id) FROM documents


    # range query step
    # optional, default is 1024
    #
    # sql_range_step        = 1000


    # unsigned integer attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # optional bit size can be specified, default is 32
    #
    # sql_attr_uint     = author_id
    # sql_attr_uint     = forum_id:9 # 9 bits for forum_id
    sql_attr_uint       = group_id

    # boolean attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # equivalent to sql_attr_uint with 1-bit size
    #
    # sql_attr_bool     = is_deleted


    # bigint attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # declares a signed (unlike uint!) 64-bit attribute
    #
    # sql_attr_bigint       = my_bigint_id


    # UNIX timestamp attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # similar to integer, but can also be used in date functions
    #
    # sql_attr_timestamp    = posted_ts
    # sql_attr_timestamp    = last_edited_ts
    sql_attr_timestamp  = date_added


    # floating point attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # values are stored in single precision, 32-bit IEEE 754 format
    #
    # sql_attr_float        = lat_radians
    # sql_attr_float        = long_radians


    # multi-valued attribute (MVA) attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # MVA values are variable length lists of unsigned 32-bit integers
    #
    # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
    # ATTR-TYPE is 'uint' or 'timestamp'
    # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
    # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
    # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
    #
    # sql_attr_multi        = uint tag from query; SELECT docid, tagid FROM tags
    # sql_attr_multi        = uint tag from ranged-query; \
    #   SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
    #   SELECT MIN(docid), MAX(docid) FROM tags


    # string attribute declaration
    # multi-value (an arbitrary number of these is allowed), optional
    # lets you store and retrieve strings
    #
    # sql_attr_string       = stitle


    # JSON attribute declaration
    # multi-value (an arbitrary number of these is allowed), optional
    # lets you store a JSON document as an (in-memory) attribute for later use
    #
    # sql_attr_json     = properties


    # combined field plus attribute declaration (from a single column)
    # stores column as an attribute, but also indexes it as a full-text field
    #
    # sql_field_string  = author


    # post-query, executed on sql_query completion
    # optional, default is empty
    #
    # sql_query_post        =


    # post-index-query, executed on successful indexing completion
    # optional, default is empty
    # $maxid expands to max document ID actually fetched from DB
    #
    # sql_query_post_index  = REPLACE INTO counters ( id, val ) \
    #   VALUES ( 'max_indexed_id', $maxid )


    # ranged query throttling, in milliseconds
    # optional, default is 0 which means no delay
    # enforces given delay before each query step
    sql_ranged_throttle = 0


    # kill-list query, fetches the document IDs for kill-list
    # k-list will suppress matches from preceding indexes in the same query
    # optional, default is empty
    #
    # sql_query_killlist    = SELECT id FROM documents WHERE edited>=@last_reindex


    # columns to unpack on indexer side when indexing
    # multi-value, optional, default is empty list
    #
    # unpack_zlib       = zlib_column
    # unpack_mysqlcompress  = compressed_column
    # unpack_mysqlcompress  = compressed_column_2


    # maximum unpacked length allowed in MySQL COMPRESS() unpacker
    # optional, default is 16M
    #
    # unpack_mysqlcompress_maxsize  = 16M


    # hook command to run when SQL connection succeeds
    # optional, default value is empty (do nothing)
    #
    # hook_connect          = bash sql_connect.sh


    # hook command to run after (any) SQL range query
    # it may print out "minid maxid" (w/o quotes) to override the range
    # optional, default value is empty (do nothing)
    #
    # hook_query_range      = bash sql_query_range.sh


    # hook command to run on successful indexing completion
    # $maxid expands to max document ID actually fetched from DB
    # optional, default value is empty (do nothing)
    #
    # hook_post_index       = bash sql_post_index.sh $maxid

    #####################################################################
    ## xmlpipe2 settings
    #####################################################################

    # type          = xmlpipe

    # shell command to invoke xmlpipe stream producer
    # mandatory
    #
    # xmlpipe_command       = cat /var/test.xml

    # xmlpipe2 field declaration
    # multi-value, optional, default is empty
    #
    # xmlpipe_field     = subject
    # xmlpipe_field     = content


    # xmlpipe2 attribute declaration
    # multi-value, optional, default is empty
    # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
    # examples:
    #
    # xmlpipe_attr_timestamp    = published
    # xmlpipe_attr_uint = author_id
    # xmlpipe_attr_bool = is_enabled
    # xmlpipe_attr_float    = latitude
    # xmlpipe_attr_bigint   = guid
    # xmlpipe_attr_multi    = tags
    # xmlpipe_attr_multi_64 = tags64
    # xmlpipe_attr_string   = title
    # xmlpipe_attr_json = extra_data
    # xmlpipe_field_string  = content


    # perform UTF-8 validation, and filter out incorrect codes
    # avoids XML parser choking on non-UTF-8 documents
    # optional, default is 0
    #
    # xmlpipe_fixup_utf8    = 1
}


# inherited source example
#
# all the parameters are copied from the parent source,
# and may then be overridden in this source definition
source all_hotel_info_srcthrottled : all_hotel_info_src
{
    sql_ranged_throttle = 100
}

#############################################################################
## index definition
#############################################################################

# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index all_hotel_info_index
{
    # index type
    # optional, default is 'plain'
    # known values are 'plain', 'distributed', and 'rt' (see samples below)
    # type          = plain

    # document source(s) to index
    # multi-value, mandatory
    # document IDs must be globally unique across all sources
    source          = all_hotel_info_src

    # index files path and file name, without extension
    # mandatory, path must be writable, extensions will be auto-appended
    path            = /usr/local/sphinx/data/all_hotel_info

    # document attribute values (docinfo) storage mode
    # optional, default is 'extern'
    # known values are 'none', 'extern' and 'inline'
    docinfo         = extern

    # dictionary type, 'crc' or 'keywords'
    # crc is faster to index when no substring/wildcards searches are needed
    # crc with substrings might be faster to search but is much slower to index
    # (because all substrings are pre-extracted as individual keywords)
    # keywords is much faster to index with substrings, and index is much (3-10x) smaller
    # keywords supports wildcards, crc does not, and never will
    # optional, default is 'keywords'
    dict            = keywords

    # memory locking for cached data (.spa and .spi), to prevent swapping
    # optional, default is 0 (do not mlock)
    # requires searchd to be run from root
    mlock           = 0

    # a list of morphology preprocessors to apply
    # optional, default is empty
    #
    # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
    # 'soundex', and 'metaphone'; additional preprocessors available from
    # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
    # (see libstemmer_c/libstemmer/modules.txt)
    #
    # morphology        = stem_en, stem_ru, soundex
    # morphology        = libstemmer_german
    # morphology        = libstemmer_sv
    morphology      = rlp_chinese

    # minimum word length at which to enable stemming
    # optional, default is 1 (stem everything)
    #
    # min_stemming_len  = 1


    # stopword files list (space separated)
    # optional, default is empty
    # contents are plain text, charset_table and stemming are both applied
    #
    # stopwords     = /var/data/stopwords.txt


    # wordforms file, in "mapfrom > mapto" plain text format
    # optional, default is empty
    #
    # wordforms     = /var/data/wordforms.txt


    # tokenizing exceptions file
    # optional, default is empty
    #
    # plain text, case sensitive, space insensitive in map-from part
    # one "Map Several Words => ToASingleOne" entry per line
    #
    # exceptions        = /var/data/exceptions.txt


    # embedded file size limit
    # optional, default is 16K
    #
    # exceptions, wordforms, and stopwords files smaller than this limit
    # are stored in the index; otherwise, their paths and sizes are stored
    #
    # embedded_limit        = 16K

    # minimum indexed word length
    # default is 1 (index everything)
    min_word_len        = 1


    # ignored characters list
    # optional, default value is empty
    #
    # ignore_chars      = U+00AD


    # minimum word prefix length to index
    # optional, default is 0 (do not index prefixes)
    #
     min_prefix_len     = 1


    # minimum word infix length to index
    # optional, default is 0 (do not index infixes)
    #
    # min_infix_len     = 1


    # maximum substring (prefix or infix) length to index
    # optional, default is 0 (do not limit substring length)
    #
    # max_substring_len = 8


    # list of fields to limit prefix/infix indexing to
    # optional, default value is empty (index all fields in prefix/infix mode)
    #
    # prefix_fields     = filename
    # infix_fields      = url, domain


    # expand keywords with exact forms and/or stars when searching fit indexes
    # search-time only, does not affect indexing, can be 0 or 1
    # optional, default is 0 (do not expand keywords)
    #
    # expand_keywords       = 1


    # n-gram length to index, for CJK indexing
    # only supports 0 and 1 for now, other lengths to be implemented
    # optional, default is 0 (disable n-grams)
    #
     ngram_len      = 1


    # n-gram characters list, for CJK indexing
    # optional, default is empty
    #
    # ngram_chars       = U+3000..U+2FA1F


    # phrase boundary characters list
    # optional, default is empty
    #
    # phrase_boundary       = ., ?, !, U+2026 # horizontal ellipsis


    # phrase boundary word position increment
    # optional, default is 0
    #
    # phrase_boundary_step  = 100


    # blended characters list
    # blended chars are indexed both as separators and valid characters
    # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
    # optional, default is empty
    #
    # blend_chars       = +, &, U+23


    # blended token indexing mode
    # a comma separated list of blended token indexing variants
    # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
    # optional, default is trim_none
    #
    # blend_mode        = trim_tail, skip_pure


    # whether to strip HTML tags from incoming documents
    # known values are 0 (do not strip) and 1 (do strip)
    # optional, default is 0
    html_strip      = 0

    # what HTML attributes to index if stripping HTML
    # optional, default is empty (do not index anything)
    #
    # html_index_attrs  = img=alt,title; a=title;


    # what HTML elements contents to strip
    # optional, default is empty (do not strip element contents)
    #
    # html_remove_elements  = style, script


    # whether to preopen index data files on startup
    # optional, default is 0 (do not preopen), searchd-only
    #
    # preopen           = 1


    # whether to enable in-place inversion (2x less disk, 90-95% speed)
    # optional, default is 0 (use separate temporary files), indexer-only
    #
    # inplace_enable        = 1


    # in-place fine-tuning options
    # optional, defaults are listed below
    #
    # inplace_hit_gap       = 0 # preallocated hitlist gap size
    # inplace_docinfo_gap   = 0 # preallocated docinfo gap size
    # inplace_reloc_factor  = 0.1 # relocation buffer size within arena
    # inplace_write_factor  = 0.1 # write buffer size within arena


    # whether to index original keywords along with stemmed versions
    # enables "=exactform" operator to work
    # optional, default is 0
    #
    # index_exact_words = 1


    # position increment on overshort (less that min_word_len) words
    # optional, allowed values are 0 and 1, default is 1
    #
    # overshort_step        = 1


    # position increment on stopword
    # optional, allowed values are 0 and 1, default is 1
    #
    # stopword_step     = 1


    # hitless words list
    # positions for these keywords will not be stored in the index
    # optional, allowed values are 'all', or a list file name
    #
    # hitless_words     = all
    # hitless_words     = hitless.txt


    # detect and index sentence and paragraph boundaries
    # required for the SENTENCE and PARAGRAPH operators to work
    # optional, allowed values are 0 and 1, default is 0
    #
    # index_sp          = 1


    # index zones, delimited by HTML/XML tags
    # a comma separated list of tags and wildcards
    # required for the ZONE operator to work
    # optional, default is empty string (do not index zones)
    #
    # index_zones       = title, h*, th


    # index per-document and average per-index field lengths, in tokens
    # required for the BM25A(), BM25F() in expression ranker
    # optional, default is 0 (do not index field lenghts)
    #
    # index_field_lengths   = 1


    # regular expressions (regexps) to filter the fields and queries with
    # gets applied to data source fields when indexing
    # gets applied to search queries when searching
    # multi-value, optional, default is empty list of regexps
    #
    # regexp_filter     = \b(\d+)\" => \1inch
    # regexp_filter     = (blue|red) => color


    # list of the words considered frequent with respect to bigram indexing
    # optional, default is empty
    #
    # bigram_freq_words = the, a, i, you, my


    # bigram indexing mode
    # known values are none, all, first_freq, both_freq
    # option, default is none (do not index bigrams)
    #
    # bigram_index      = both_freq


    # snippet document file name prefix
    # preprended to file names when generating snippets using load_files option
    # WARNING, this is a prefix (not a path), trailing slash matters!
    # optional, default is empty
    #
    # snippets_file_prefix  = /mnt/mydocs/server1


    # whether to apply stopwords before or after stemming
    # optional, default is 0 (apply stopwords after stemming)
    #
    # stopwords_unstemmed   = 0


    # path to a global (cluster-wide) keyword IDFs file
    # optional, default is empty (use local IDFs)
    #
    # global_idf        = /usr/local/sphinx/var/global.idf

    charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101,U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109,U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F,U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115,U+0116->U+0117,U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D,U+011D,U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133,U+0134->U+0135,U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C,U+013C,U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142,U+0143->U+0144,U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B,U+014B,U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151,U+0152->U+0153,U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159,U+0159,U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F,U+0160->U+0161,U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167,U+0167,U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D,U+016E->U+016F,U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175,U+0175,U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C,U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F,U+0430..U+044F,U+05D0..U+05EA, U+0531..U+0556->U+0561..U+0586, U+0561..U+0587, U+0621..U+063A, U+01B9,U+01BF, U+0640..U+064A, U+0660..U+0669, U+066E, U+066F, U+0671..U+06D3, U+06F0..U+06FF,U+0904..U+0939, U+0958..U+095F, U+0960..U+0963, U+0966..U+096F, U+097B..U+097F,U+0985..U+09B9, U+09CE, U+09DC..U+09E3, U+09E6..U+09EF,U+0A05..U+0A39, U+0A59..U+0A5E,U+0A66..U+0A6F, U+0A85..U+0AB9, U+0AE0..U+0AE3,U+0AE6..U+0AEF, U+0B05..U+0B39,U+0B5C..U+0B61, U+0B66..U+0B6F, U+0B71, U+0B85..U+0BB9,U+0BE6..U+0BF2, U+0C05..U+0C39,U+0C66..U+0C6F, U+0C85..U+0CB9, U+0CDE..U+0CE3,U+0CE6..U+0CEF, U+0D05..U+0D39, U+0D60,U+0D61, U+0D66..U+0D6F, U+0D85..U+0DC6,U+1900..U+1938, U+1946..U+194F, U+A800..U+A805,U+A807..U+A822, U+0386->U+03B1,U+03AC->U+03B1, U+0388->U+03B5, U+03AD->U+03B5,U+0389->U+03B7, U+03AE->U+03B7,U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,U+03AF->U+03B9, U+03CA->U+03B9,U+038C->U+03BF, U+03CC->U+03BF, U+038E->U+03C5,U+03AB->U+03C5, U+03B0->U+03C5,U+03CB->U+03C5, U+03CD->U+03C5, U+038F->U+03C9,U+03CE->U+03C9, U+03C2->U+03C3, U+0391..U+03A1->U+03B1..U+03C1,U+03A3..U+03A9->U+03C3..U+03C9, U+03B1..U+03C1,U+03C3..U+03C9, U+0E01..U+0E2E,U+0E30..U+0E3A, U+0E40..U+0E45, U+0E47, U+0E50..U+0E59, U+A000..U+A48F, U+4E00..U+9FBF,U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,U+2F800..U+2FA1F, U+2E80..U+2EFF,U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,U+3040..U+309F, U+30A0..U+30FF,U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, U+3130..U+318F, U+A000..U+A48F,U+A490..U+A4CF 

    ngram_chars =U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,U+3040..U+309F, U+30A0..U+30FF, U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF 

}


# inherited index example
#
# all the parameters are copied from the parent index,
# and may then be overridden in this index definition
index all_hotel_infostemmed : all_hotel_info_index
{
    source=all_hotel_info_srcthrottled 
    path =/usr/local/sphinx/index/all_hotel_info
}


# distributed index example
#
# this is a virtual index which can NOT be directly indexed,
# and only contains references to other local and/or remote indexes
index dist1
{
    # 'distributed' index type MUST be specified
    type            = distributed

    # local index to be searched
    # there can be many local indexes configured
    local           = all_hotel_info
    local           = all_hotel_infostemmed

    # remote agent
    # multiple remote agents may be specified
    # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
    # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
    agent           = localhost:9313:remote1
    agent           = localhost:9314:remote2,remote3
    # agent         = /var/run/searchd.sock:remote4

    # remote agent mirrors groups, aka mirrors, aka HA agents
    # defines 2 or more interchangeable mirrors for a given index part
    #
    # agent         = server3:9312 | server4:9312 :indexchunk2
    # agent         = server3:9312:chunk2server3 | server4:9312:chunk2server4
    # agent         = server3:chunk2server3 | server4:chunk2server4
    # agent         = server21|server22|server23:chunk2


    # blackhole remote agent, for debugging/testing
    # network errors and search results will be ignored
    #
    # agent_blackhole       = testbox:9312:testindex1,testindex2


    # persistenly connected remote agent
    # reduces connect() pressure, requires that workers IS threads
    #
    # agent_persistent      = testbox:9312:testindex1,testindex2


    # remote agent connection timeout, milliseconds
    # optional, default is 1000 ms, ie. 1 sec
    agent_connect_timeout   = 1000

    # remote agent query timeout, milliseconds
    # optional, default is 3000 ms, ie. 3 sec
    agent_query_timeout     = 3000

    # HA mirror agent strategy
    # optional, defaults to ??? (random mirror)
    # know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm
    #
    # ha_strategy               = nodeads

    # path to RLP context file
    # optional, defaut is empty
    #
    # rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml
}


# realtime index example
#
# you can run INSERT, REPLACE, and DELETE on this index on the fly
# using MySQL protocol (see 'listen' directive below)
index rt
{
    # 'rt' index type must be specified to use RT index
    type            = rt

    # index files path and file name, without extension
    # mandatory, path must be writable, extensions will be auto-appended
    path            = /var/data/rt

    # RAM chunk size limit
    # RT index will keep at most this much data in RAM, then flush to disk
    # optional, default is 128M
    #
    # rt_mem_limit      = 512M

    # full-text field declaration
    # multi-value, mandatory
    rt_field        = title
    rt_field        = content

    # unsigned integer attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # declares an unsigned 32-bit attribute
    rt_attr_uint        = gid

    # RT indexes currently support the following attribute types:
    # uint, bigint, float, timestamp, string, mva, mva64, json
    #
    # rt_attr_bigint        = guid
    # rt_attr_float     = gpa
    # rt_attr_timestamp = ts_added
    # rt_attr_string        = author
    # rt_attr_multi     = tags
    # rt_attr_multi_64  = tags64
    # rt_attr_json      = extra_data
}

#############################################################################
## indexer settings
#############################################################################

indexer
{
    # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
    # optional, default is 128M, max is 2047M, recommended is 256M to 1024M
    mem_limit       = 128M

    # maximum IO calls per second (for I/O throttling)
    # optional, default is 0 (unlimited)
    #
    # max_iops      = 40


    # maximum IO call size, bytes (for I/O throttling)
    # optional, default is 0 (unlimited)
    #
    # max_iosize        = 1048576


    # maximum xmlpipe2 field length, bytes
    # optional, default is 2M
    #
    # max_xmlpipe2_field    = 4M


    # write buffer size, bytes
    # several (currently up to 4) buffers will be allocated
    # write buffers are allocated in addition to mem_limit
    # optional, default is 1M
    #
    # write_buffer      = 1M


    # maximum file field adaptive buffer size
    # optional, default is 8M, minimum is 1M
    #
    # max_file_field_buffer = 32M


    # how to handle IO errors in file fields
    # known values are 'ignore_field', 'skip_document', and 'fail_index'
    # optional, default is 'ignore_field'
    #
    # on_file_field_error = skip_document


    # lemmatizer cache size
    # improves the indexing time when the lemmatization is enabled
    # optional, default is 256K
    #
    # lemmatizer_cache = 512M
}

#############################################################################
## searchd settings
#############################################################################

searchd
{
    # [hostname:]port[:protocol], or /unix/socket/path to listen on
    # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
    #
    # multi-value, multiple listen points are allowed
    # optional, defaults are 9312:sphinx and 9306:mysql41, as below
    #
    # listen            = 127.0.0.1
    # listen            = 192.168.0.1:9312
    # listen            = 9312
    # listen            = /var/run/searchd.sock
    listen          = 9312
    listen          = 9306:mysql41

    # log file, searchd run info is logged here
    # optional, default is 'searchd.log'
    log         = /var/log/searchd.log

    # query log file, all search queries are logged here
    # optional, default is empty (do not log queries)
    query_log       = /var/log/query.log

    # client read timeout, seconds
    # optional, default is 5
    read_timeout        = 5

    # request timeout, seconds
    # optional, default is 5 minutes
    client_timeout      = 300

    # maximum amount of children to fork (concurrent searches to run)
    # optional, default is 0 (unlimited)
    max_children        = 30

    # maximum amount of persistent connections from this master to each agent host
    # optional, but necessary if you use agent_persistent. It is reasonable to set the value
    # as max_children, or less on the agent's hosts.
    persistent_connections_limit    = 30

    # PID file, searchd process ID file name
    # mandatory
    pid_file        = /var/log/searchd.pid

    # seamless rotate, prevents rotate stalls if precaching huge datasets
    # optional, default is 1
    seamless_rotate     = 1

    # whether to forcibly preopen all indexes on startup
    # optional, default is 1 (preopen everything)
    preopen_indexes     = 1

    # whether to unlink .old index copies on succesful rotation.
    # optional, default is 1 (do unlink)
    unlink_old      = 1

    # attribute updates periodic flush timeout, seconds
    # updates will be automatically dumped to disk this frequently
    # optional, default is 0 (disable periodic flush)
    #
    # attr_flush_period = 900


    # MVA updates pool size
    # shared between all instances of searchd, disables attr flushes!
    # optional, default size is 1M
    mva_updates_pool    = 1M

    # max allowed network packet size
    # limits both query packets from clients, and responses from agents
    # optional, default size is 8M
    max_packet_size     = 8M

    # max allowed per-query filter count
    # optional, default is 256
    max_filters     = 256

    # max allowed per-filter values count
    # optional, default is 4096
    max_filter_values   = 4096


    # socket listen queue length
    # optional, default is 5
    #
    # listen_backlog        = 5


    # per-keyword read buffer size
    # optional, default is 256K
    #
    # read_buffer       = 256K


    # unhinted read size (currently used when reading hits)
    # optional, default is 32K
    #
    # read_unhinted     = 32K


    # max allowed per-batch query count (aka multi-query count)
    # optional, default is 32
    max_batch_queries   = 32


    # max common subtree document cache size, per-query
    # optional, default is 0 (disable subtree optimization)
    #
    # subtree_docs_cache    = 4M


    # max common subtree hit cache size, per-query
    # optional, default is 0 (disable subtree optimization)
    #
    # subtree_hits_cache    = 8M


    max_matches = 50
    # multi-processing mode (MPM)
    # known values are none, fork, prefork, and threads
    # threads is required for RT backend to work
    # optional, default is threads
    workers         = threads # for RT to work


    # max threads to create for searching local parts of a distributed index
    # optional, default is 0, which means disable multi-threaded searching
    # should work with all MPMs (ie. does NOT require workers=threads)
    #
    # dist_threads      = 4


    # binlog files path; use empty string to disable binlog
    # optional, default is build-time configured data directory
    #
    # binlog_path       = # disable logging
    # binlog_path       = /var/data # binlog.001 etc will be created there


    # binlog flush/sync mode
    # 0 means flush and sync every second
    # 1 means flush and sync every transaction
    # 2 means flush every transaction, sync every second
    # optional, default is 2
    #
    # binlog_flush      = 2


    # binlog per-file size limit
    # optional, default is 128M, 0 means no limit
    #
    # binlog_max_log_size   = 256M


    # per-thread stack size, only affects workers=threads mode
    # optional, default is 64K
    #
    # thread_stack          = 128K


    # per-keyword expansion limit (for dict=keywords prefix searches)
    # optional, default is 0 (no limit)
    #
    # expansion_limit       = 1000


    # RT RAM chunks flush period
    # optional, default is 0 (no periodic flush)
    #
    # rt_flush_period       = 900


    # query log file format
    # optional, known values are plain and sphinxql, default is plain
    #
    # query_log_format      = sphinxql


    # version string returned to MySQL network protocol clients
    # optional, default is empty (use Sphinx version)
    #
    # mysql_version_string  = 5.0.37


    # default server-wide collation
    # optional, default is libc_ci
    #
    # collation_server      = utf8_general_ci


    # server-wide locale for libc based collations
    # optional, default is C
    #
    # collation_libc_locale = ru_RU.UTF-8


    # threaded server watchdog (only used in workers=threads mode)
    # optional, values are 0 and 1, default is 1 (watchdog on)
    #
    # watchdog              = 1


    # costs for max_predicted_time model, in (imaginary) nanoseconds
    # optional, default is "doc=64, hit=48, skip=2048, match=64"
    #
    # predicted_time_costs  = doc=64, hit=48, skip=2048, match=64


    # current SphinxQL state (uservars etc) serialization path
    # optional, default is none (do not serialize SphinxQL state)
    #
    # sphinxql_state            = sphinxvars.sql


    # maximum RT merge thread IO calls per second, and per-call IO size
    # useful for throttling (the background) OPTIMIZE INDEX impact
    # optional, default is 0 (unlimited)
    #
    # rt_merge_iops         = 40
    # rt_merge_maxiosize        = 1M


    # interval between agent mirror pings, in milliseconds
    # 0 means disable pings
    # optional, default is 1000
    #
    # ha_ping_interval      = 0


    # agent mirror statistics window size, in seconds
    # stats older than the window size (karma) are retired
    # that is, they will not affect master choice of agents in any way
    # optional, default is 60 seconds
    #
    # ha_period_karma           = 60


    # delay between preforked children restarts on rotation, in milliseconds
    # optional, default is 0 (no delay)
    #
    # prefork_rotation_throttle = 100


    # a prefix to prepend to the local file names when creating snippets
    # with load_files and/or load_files_scatter options
    # optional, default is empty
    #
    # snippets_file_prefix      = /mnt/common/server1/
}

#############################################################################
## common settings
#############################################################################

common
{

    # lemmatizer dictionaries base path
    # optional, defaut is /usr/local/share (see ./configure --datadir)
    #
    # lemmatizer_base = /usr/local/share/sphinx/dicts


    # how to handle syntax errors in JSON attributes
    # known values are 'ignore_attr' and 'fail_index'
    # optional, default is 'ignore_attr'
    #
    # on_json_attr_error = fail_index


    # whether to auto-convert numeric values from strings in JSON attributes
    # with auto-conversion, string value with actually numeric data
    # (as in {"key":"12345"}) gets stored as a number, rather than string
    # optional, allowed values are 0 and 1, default is 0 (do not convert)
    #
    # json_autoconv_numbers = 1


    # whether and how to auto-convert key names in JSON attributes
    # known value is 'lowercase'
    # optional, default is unspecified (do nothing)
    #
    # json_autoconv_keynames = lowercase


    # path to RLP root directory
    # optional, defaut is /usr/local/share (see ./configure --datadir)
    #
    # rlp_root = /usr/local/share/sphinx/rlp


    # path to RLP environment file
    # optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir)
    #
    # rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml


    # maximum total size of documents batched before processing them by the RLP
    # optional, default is 51200
    #
    # rlp_max_batch_size = 100k


    # maximum number of documents batched before processing them by the RLP
    # optional, default is 50
    #
    # rlp_max_batch_docs = 100


    # trusted plugin directory
    # optional, default is empty (disable UDFs)
    #
    # plugin_dir            = /usr/local/sphinx/lib

}

# --eof--
 类似资料: