Contenido Project

Line #	Revision	Author
1	603	ahitrov	#
2			# Sphinx configuration file sample
3			#
4			# WARNING! While this sample file mentions all available options,
5			# it contains (very) short helper descriptions only. Please refer to
6			# doc/sphinx.html for details.
7			#
8
9			#############################################################################
10			## data source definition
11			#############################################################################
12
13			source zvukiru
14			{
15			# data source type. mandatory, no default value
16			# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
17			type = pgsql
18
19			#####################################################################
20			## SQL settings (for 'mysql' and 'pgsql' types)
21			#####################################################################
22
23			# some straightforward parameters for SQL source types
24			sql_host = localhost
25			sql_user = zvuki
26			sql_pass = sarUchOov
27			sql_db = zvukirutf
28			sql_port = 5432 # optional, default is 3306
29
30			# UNIX socket name
31			# optional, default is empty (reuse client library defaults)
32			# usually '/var/lib/mysql/mysql.sock' on Linux
33			# usually '/tmp/mysql.sock' on FreeBSD
34			#
35			# sql_sock = /tmp/mysql.sock
36
37
38			# MySQL specific client connection flags
39			# optional, default is 0
40			#
41			# mysql_connect_flags = 32 # enable compression
42
43			# MySQL specific SSL certificate settings
44			# optional, defaults are empty
45			#
46			# mysql_ssl_cert = /etc/ssl/client-cert.pem
47			# mysql_ssl_key = /etc/ssl/client-key.pem
48			# mysql_ssl_ca = /etc/ssl/cacert.pem
49
50			# MS SQL specific Windows authentication mode flag
51			# MUST be in sync with charset_type index-level setting
52			# optional, default is 0
53			#
54			# mssql_winauth = 1 # use currently logged on user credentials
55
56
57			# ODBC specific DSN (data source name)
58			# mandatory for odbc source type, no default value
59			#
60			# odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (.txt; .csv)};
61			# sql_query = SELECT id, data FROM documents.csv
62
63
64			# ODBC and MS SQL specific, per-column buffer sizes
65			# optional, default is auto-detect
66			#
67			# sql_column_buffers = content=12M, comments=1M
68
69
70			# pre-query, executed before the main fetch query
71			# multi-value, optional, default is empty list of queries
72			#
73			# sql_query_pre = SET NAMES utf8
74			# sql_query_pre = SET SESSION query_cache_type=OFF
75
76
77			# main document fetch query
78			# mandatory, integer document ID field MUST be the first selected column
79			sql_query = \
80			SELECT id, object_id, object_class, extract(epoch from date_trunc('seconds', mtime)) AS last_edited, is_deleted, name as title, search as content \
81			FROM search
82
83
84			# joined/payload field fetch query
85			# joined fields let you avoid (slow) JOIN and GROUP_CONCAT
86			# payload fields let you attach custom per-keyword values (eg. for ranking)
87			#
88			# syntax is FIELD-NAME 'from' ( 'query' \| 'payload-query' ); QUERY
89			# joined field QUERY should return 2 columns (docid, text)
90			# payload field QUERY should return 3 columns (docid, keyword, weight)
91			#
92			# REQUIRES that query results are in ascending document ID order!
93			# multi-value, optional, default is empty list of queries
94			#
95			# sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
96			# sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
97
98
99			# file based field declaration
100			#
101			# content of this field is treated as a file name
102			# and the file gets loaded and indexed in place of a field
103			#
104			# max file size is limited by max_file_field_buffer indexer setting
105			# file IO errors are non-fatal and get reported as warnings
106			#
107			# sql_file_field = content_file_path
108
109
110			# range query setup, query that must return min and max ID values
111			# optional, default is empty
112			#
113			# sql_query will need to reference $start and $end boundaries
114			# if using ranged query:
115			#
116			# sql_query = \
117			# SELECT doc.id, doc.id AS group, doc.title, doc.data \
118			# FROM documents doc \
119			# WHERE id>=$start AND id<=$end
120			#
121			# sql_query_range = SELECT MIN(id),MAX(id) FROM documents
122
123
124			# range query step
125			# optional, default is 1024
126			#
127			sql_range_step = 1000
128
129
130			# unsigned integer attribute declaration
131			# multi-value (an arbitrary number of attributes is allowed), optional
132			# optional bit size can be specified, default is 32
133			#
134			# sql_attr_uint = author_id
135			# sql_attr_uint = forum_id:9 # 9 bits for forum_id
136			sql_attr_uint = object_id
137
138			# boolean attribute declaration
139			# multi-value (an arbitrary number of attributes is allowed), optional
140			# equivalent to sql_attr_uint with 1-bit size
141			#
142			sql_attr_bool = is_deleted
143
144
145			# bigint attribute declaration
146			# multi-value (an arbitrary number of attributes is allowed), optional
147			# declares a signed (unlike uint!) 64-bit attribute
148			#
149			# sql_attr_bigint = my_bigint_id
150
151
152			# UNIX timestamp attribute declaration
153			# multi-value (an arbitrary number of attributes is allowed), optional
154			# similar to integer, but can also be used in date functions
155			#
156			# sql_attr_timestamp = posted_ts
157			sql_attr_timestamp = last_edited
158			# sql_attr_timestamp = date_added
159
160
161			# floating point attribute declaration
162			# multi-value (an arbitrary number of attributes is allowed), optional
163			# values are stored in single precision, 32-bit IEEE 754 format
164			#
165			# sql_attr_float = lat_radians
166			# sql_attr_float = long_radians
167
168
169			# multi-valued attribute (MVA) attribute declaration
170			# multi-value (an arbitrary number of attributes is allowed), optional
171			# MVA values are variable length lists of unsigned 32-bit integers
172			#
173			# syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
174			# ATTR-TYPE is 'uint' or 'timestamp'
175			# SOURCE-TYPE is 'field', 'query', or 'ranged-query'
176			# QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
177			# RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
178			#
179			# sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags
180			# sql_attr_multi = uint tag from ranged-query; \
181			# SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
182			# SELECT MIN(docid), MAX(docid) FROM tags
183
184
185			# string attribute declaration
186			# multi-value (an arbitrary number of these is allowed), optional
187			# lets you store and retrieve strings
188			#
189			sql_attr_string = object_class
190
191
192			# JSON attribute declaration
193			# multi-value (an arbitrary number of these is allowed), optional
194			# lets you store a JSON document as an (in-memory) attribute for later use
195			#
196			# sql_attr_json = properties
197
198
199			# combined field plus attribute declaration (from a single column)
200			# stores column as an attribute, but also indexes it as a full-text field
201			#
202			# sql_field_string = author
203
204
205			# post-query, executed on sql_query completion
206			# optional, default is empty
207			#
208			# sql_query_post =
209
210
211			# post-index-query, executed on successful indexing completion
212			# optional, default is empty
213			# $maxid expands to max document ID actually fetched from DB
214			#
215			# sql_query_post_index = REPLACE INTO counters ( id, val ) \
216			# VALUES ( 'max_indexed_id', $maxid )
217
218
219			# ranged query throttling, in milliseconds
220			# optional, default is 0 which means no delay
221			# enforces given delay before each query step
222			sql_ranged_throttle = 0
223
224
225			# kill-list query, fetches the document IDs for kill-list
226			# k-list will suppress matches from preceding indexes in the same query
227			# optional, default is empty
228			#
229			# sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
230
231
232			# columns to unpack on indexer side when indexing
233			# multi-value, optional, default is empty list
234			#
235			# unpack_zlib = zlib_column
236			# unpack_mysqlcompress = compressed_column
237			# unpack_mysqlcompress = compressed_column_2
238
239
240			# maximum unpacked length allowed in MySQL COMPRESS() unpacker
241			# optional, default is 16M
242			#
243			# unpack_mysqlcompress_maxsize = 16M
244
245
246			# hook command to run when SQL connection succeeds
247			# optional, default value is empty (do nothing)
248			#
249			# hook_connect = bash sql_connect.sh
250
251
252			# hook command to run after (any) SQL range query
253			# it may print out "minid maxid" (w/o quotes) to override the range
254			# optional, default value is empty (do nothing)
255			#
256			# hook_query_range = bash sql_query_range.sh
257
258
259			# hook command to run on successful indexing completion
260			# $maxid expands to max document ID actually fetched from DB
261			# optional, default value is empty (do nothing)
262			#
263			# hook_post_index = bash sql_post_index.sh $maxid
264
265			#####################################################################
266			## xmlpipe2 settings
267			#####################################################################
268
269			# type = xmlpipe
270
271			# shell command to invoke xmlpipe stream producer
272			# mandatory
273			#
274			# xmlpipe_command = cat /var/db/sphinxsearch/test.xml
275
276			# xmlpipe2 field declaration
277			# multi-value, optional, default is empty
278			#
279			# xmlpipe_field = subject
280			# xmlpipe_field = content
281
282
283			# xmlpipe2 attribute declaration
284			# multi-value, optional, default is empty
285			# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
286			# examples:
287			#
288			# xmlpipe_attr_timestamp = published
289			# xmlpipe_attr_uint = author_id
290			# xmlpipe_attr_bool = is_enabled
291			# xmlpipe_attr_float = latitude
292			# xmlpipe_attr_bigint = guid
293			# xmlpipe_attr_multi = tags
294			# xmlpipe_attr_multi_64 = tags64
295			# xmlpipe_attr_string = title
296			# xmlpipe_attr_json = extra_data
297			# xmlpipe_field_string = content
298
299
300			# perform UTF-8 validation, and filter out incorrect codes
301			# avoids XML parser choking on non-UTF-8 documents
302			# optional, default is 0
303			#
304			# xmlpipe_fixup_utf8 = 1
305			}
306
307
308			# inherited source example
309			#
310			# all the parameters are copied from the parent source,
311			# and may then be overridden in this source definition
312			source zvukiruthrottled : zvukiru
313			{
314			sql_ranged_throttle = 100
315			}
316
317			#############################################################################
318			## index definition
319			#############################################################################
320
321			# local index example
322			#
323			# this is an index which is stored locally in the filesystem
324			#
325			# all indexing-time options (such as morphology and charsets)
326			# are configured per local index
327			index zvukiru
328			{
329			# index type
330			# optional, default is 'plain'
331			# known values are 'plain', 'distributed', and 'rt' (see samples below)
332			# type = plain
333
334			# document source(s) to index
335			# multi-value, mandatory
336			# document IDs must be globally unique across all sources
337			source = zvukiru
338
339			# index files path and file name, without extension
340			# mandatory, path must be writable, extensions will be auto-appended
341			path = /var/db/sphinxsearch/data/zvukiru
342
343			# document attribute values (docinfo) storage mode
344			# optional, default is 'extern'
345			# known values are 'none', 'extern' and 'inline'
346			docinfo = extern
347
348			# dictionary type, 'crc' or 'keywords'
349			# crc is faster to index when no substring/wildcards searches are needed
350			# crc with substrings might be faster to search but is much slower to index
351			# (because all substrings are pre-extracted as individual keywords)
352			# keywords is much faster to index with substrings, and index is much (3-10x) smaller
353			# keywords supports wildcards, crc does not, and never will
354			# optional, default is 'keywords'
355			dict = keywords
356
357			# memory locking for cached data (.spa and .spi), to prevent swapping
358			# optional, default is 0 (do not mlock)
359			# requires searchd to be run from root
360			mlock = 0
361
362			# a list of morphology preprocessors to apply
363			# optional, default is empty
364			#
365			# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
366			# 'soundex', and 'metaphone'; additional preprocessors available from
367			# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
368			# (see libstemmer_c/libstemmer/modules.txt)
369			#
370			morphology = stem_en, stem_ru, soundex
371			# morphology = libstemmer_german
372			# morphology = libstemmer_sv
373			# morphology = none
374
375			# minimum word length at which to enable stemming
376			# optional, default is 1 (stem everything)
377			#
378			min_stemming_len = 2
379
380
381			# stopword files list (space separated)
382			# optional, default is empty
383			# contents are plain text, charset_table and stemming are both applied
384			#
385			# stopwords = /var/db/sphinxsearch/data/stopwords.txt
386
387
388			# wordforms file, in "mapfrom > mapto" plain text format
389			# optional, default is empty
390			#
391			# wordforms = /var/db/sphinxsearch/data/wordforms.txt
392
393
394			# tokenizing exceptions file
395			# optional, default is empty
396			#
397			# plain text, case sensitive, space insensitive in map-from part
398			# one "Map Several Words => ToASingleOne" entry per line
399			#
400			# exceptions = /var/db/sphinxsearch/data/exceptions.txt
401
402
403			# embedded file size limit
404			# optional, default is 16K
405			#
406			# exceptions, wordforms, and stopwords files smaller than this limit
407			# are stored in the index; otherwise, their paths and sizes are stored
408			#
409			# embedded_limit = 16K
410
411			# minimum indexed word length
412			# default is 1 (index everything)
413			min_word_len = 2
414
415
416			# ignored characters list
417			# optional, default value is empty
418			#
419			# ignore_chars = U+00AD
420
421
422			# minimum word prefix length to index
423			# optional, default is 0 (do not index prefixes)
424			#
425			# min_prefix_len = 0
426
427
428			# minimum word infix length to index
429			# optional, default is 0 (do not index infixes)
430			#
431			# min_infix_len = 0
432
433
434			# maximum substring (prefix or infix) length to index
435			# optional, default is 0 (do not limit substring length)
436			#
437			# max_substring_len = 8
438
439
440			# list of fields to limit prefix/infix indexing to
441			# optional, default value is empty (index all fields in prefix/infix mode)
442			#
443			# prefix_fields = filename
444			# infix_fields = url, domain
445
446
447			# expand keywords with exact forms and/or stars when searching fit indexes
448			# search-time only, does not affect indexing, can be 0 or 1
449			# optional, default is 0 (do not expand keywords)
450			#
451			# expand_keywords = 1
452
453
454			# n-gram length to index, for CJK indexing
455			# only supports 0 and 1 for now, other lengths to be implemented
456			# optional, default is 0 (disable n-grams)
457			#
458			# ngram_len = 1
459
460
461			# n-gram characters list, for CJK indexing
462			# optional, default is empty
463			#
464			# ngram_chars = U+3000..U+2FA1F
465
466
467			# phrase boundary characters list
468			# optional, default is empty
469			#
470			# phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
471
472
473			# phrase boundary word position increment
474			# optional, default is 0
475			#
476			# phrase_boundary_step = 100
477
478
479			# blended characters list
480			# blended chars are indexed both as separators and valid characters
481			# for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
482			# optional, default is empty
483			#
484			# blend_chars = +, &, U+23
485
486
487			# blended token indexing mode
488			# a comma separated list of blended token indexing variants
489			# known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
490			# optional, default is trim_none
491			#
492			# blend_mode = trim_tail, skip_pure
493
494
495			# whether to strip HTML tags from incoming documents
496			# known values are 0 (do not strip) and 1 (do strip)
497			# optional, default is 0
498			html_strip = 0
499
500			# what HTML attributes to index if stripping HTML
501			# optional, default is empty (do not index anything)
502			#
503			# html_index_attrs = img=alt,title; a=title;
504
505
506			# what HTML elements contents to strip
507			# optional, default is empty (do not strip element contents)
508			#
509			# html_remove_elements = style, script
510
511
512			# whether to preopen index data files on startup
513			# optional, default is 0 (do not preopen), searchd-only
514			#
515			# preopen = 1
516
517
518			# whether to enable in-place inversion (2x less disk, 90-95% speed)
519			# optional, default is 0 (use separate temporary files), indexer-only
520			#
521			# inplace_enable = 1
522
523
524			# in-place fine-tuning options
525			# optional, defaults are listed below
526			#
527			# inplace_hit_gap = 0 # preallocated hitlist gap size
528			# inplace_docinfo_gap = 0 # preallocated docinfo gap size
529			# inplace_reloc_factor = 0.1 # relocation buffer size within arena
530			# inplace_write_factor = 0.1 # write buffer size within arena
531
532
533			# whether to index original keywords along with stemmed versions
534			# enables "=exactform" operator to work
535			# optional, default is 0
536			#
537			# index_exact_words = 1
538
539
540			# position increment on overshort (less that min_word_len) words
541			# optional, allowed values are 0 and 1, default is 1
542			#
543			# overshort_step = 1
544
545
546			# position increment on stopword
547			# optional, allowed values are 0 and 1, default is 1
548			#
549			# stopword_step = 1
550
551
552			# hitless words list
553			# positions for these keywords will not be stored in the index
554			# optional, allowed values are 'all', or a list file name
555			#
556			# hitless_words = all
557			# hitless_words = hitless.txt
558
559
560			# detect and index sentence and paragraph boundaries
561			# required for the SENTENCE and PARAGRAPH operators to work
562			# optional, allowed values are 0 and 1, default is 0
563			#
564			# index_sp = 1
565
566
567			# index zones, delimited by HTML/XML tags
568			# a comma separated list of tags and wildcards
569			# required for the ZONE operator to work
570			# optional, default is empty string (do not index zones)
571			#
572			# index_zones = title, h*, th
573
574
575			# index per-document and average per-index field lengths, in tokens
576			# required for the BM25A(), BM25F() in expression ranker
577			# optional, default is 0 (do not index field lenghts)
578			#
579			# index_field_lengths = 1
580
581
582			# regular expressions (regexps) to filter the fields and queries with
583			# gets applied to data source fields when indexing
584			# gets applied to search queries when searching
585			# multi-value, optional, default is empty list of regexps
586			#
587			# regexp_filter = \b(\d+)\" => \1inch
588			# regexp_filter = (blue\|red) => color
589
590
591			# list of the words considered frequent with respect to bigram indexing
592			# optional, default is empty
593			#
594			# bigram_freq_words = the, a, i, you, my
595
596
597			# bigram indexing mode
598			# known values are none, all, first_freq, both_freq
599			# option, default is none (do not index bigrams)
600			#
601			# bigram_index = both_freq
602
603
604			# snippet document file name prefix
605			# preprended to file names when generating snippets using load_files option
606			# WARNING, this is a prefix (not a path), trailing slash matters!
607			# optional, default is empty
608			#
609			# snippets_file_prefix = /mnt/mydocs/server1
610
611
612			# whether to apply stopwords before or after stemming
613			# optional, default is 0 (apply stopwords after stemming)
614			#
615			# stopwords_unstemmed = 0
616
617
618			# path to a global (cluster-wide) keyword IDFs file
619			# optional, default is empty (use local IDFs)
620			#
621			# global_idf = /usr/local/sphinx/var/global.idf
622			}
623
624
625			# inherited index example
626			#
627			# all the parameters are copied from the parent index,
628			# and may then be overridden in this index definition
629			index zvukirustemmed : zvukiru
630			{
631			path = /var/db/sphinxsearch/data/zvukirustemmed
632			morphology = stem_en
633			}
634
635
636			# distributed index example
637			#
638			# this is a virtual index which can NOT be directly indexed,
639			# and only contains references to other local and/or remote indexes
640			index dist1
641			{
642			# 'distributed' index type MUST be specified
643			type = distributed
644
645			# local index to be searched
646			# there can be many local indexes configured
647			local = test1
648			local = test1stemmed
649
650			# remote agent
651			# multiple remote agents may be specified
652			# syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
653			# syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
654			agent = localhost:9313:remote1
655			agent = localhost:9314:remote2,remote3
656			# agent = /var/run/searchd.sock:remote4
657
658			# remote agent mirrors groups, aka mirrors, aka HA agents
659			# defines 2 or more interchangeable mirrors for a given index part
660			#
661			# agent = server3:9312 \| server4:9312 :indexchunk2
662			# agent = server3:9312:chunk2server3 \| server4:9312:chunk2server4
663			# agent = server3:chunk2server3 \| server4:chunk2server4
664			# agent = server21\|server22\|server23:chunk2
665
666
667			# blackhole remote agent, for debugging/testing
668			# network errors and search results will be ignored
669			#
670			# agent_blackhole = testbox:9312:testindex1,testindex2
671
672
673			# persistenly connected remote agent
674			# reduces connect() pressure, requires that workers IS threads
675			#
676			# agent_persistent = testbox:9312:testindex1,testindex2
677
678
679			# remote agent connection timeout, milliseconds
680			# optional, default is 1000 ms, ie. 1 sec
681			agent_connect_timeout = 1000
682
683			# remote agent query timeout, milliseconds
684			# optional, default is 3000 ms, ie. 3 sec
685			agent_query_timeout = 3000
686
687			# HA mirror agent strategy
688			# optional, defaults to ??? (random mirror)
689			# know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm
690			#
691			# ha_strategy = nodeads
692
693			# path to RLP context file
694			# optional, defaut is empty
695			#
696			# rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml
697			}
698
699
700			# realtime index example
701			#
702			# you can run INSERT, REPLACE, and DELETE on this index on the fly
703			# using MySQL protocol (see 'listen' directive below)
704			index rt
705			{
706			# 'rt' index type must be specified to use RT index
707			type = rt
708
709			# index files path and file name, without extension
710			# mandatory, path must be writable, extensions will be auto-appended
711			path = /var/db/sphinxsearch/data/rt
712
713			# RAM chunk size limit
714			# RT index will keep at most this much data in RAM, then flush to disk
715			# optional, default is 128M
716			#
717			# rt_mem_limit = 512M
718
719			# full-text field declaration
720			# multi-value, mandatory
721			rt_field = title
722			rt_field = content
723
724			# unsigned integer attribute declaration
725			# multi-value (an arbitrary number of attributes is allowed), optional
726			# declares an unsigned 32-bit attribute
727			rt_attr_uint = gid
728
729			# RT indexes currently support the following attribute types:
730			# uint, bigint, float, timestamp, string, mva, mva64, json
731			#
732			# rt_attr_bigint = guid
733			# rt_attr_float = gpa
734			# rt_attr_timestamp = ts_added
735			# rt_attr_string = author
736			# rt_attr_multi = tags
737			# rt_attr_multi_64 = tags64
738			# rt_attr_json = extra_data
739			}
740
741			#############################################################################
742			## indexer settings
743			#############################################################################
744
745			indexer
746			{
747			# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
748			# optional, default is 128M, max is 2047M, recommended is 256M to 1024M
749			mem_limit = 128M
750
751			# maximum IO calls per second (for I/O throttling)
752			# optional, default is 0 (unlimited)
753			#
754			# max_iops = 40
755
756
757			# maximum IO call size, bytes (for I/O throttling)
758			# optional, default is 0 (unlimited)
759			#
760			# max_iosize = 1048576
761
762
763			# maximum xmlpipe2 field length, bytes
764			# optional, default is 2M
765			#
766			# max_xmlpipe2_field = 4M
767
768
769			# write buffer size, bytes
770			# several (currently up to 4) buffers will be allocated
771			# write buffers are allocated in addition to mem_limit
772			# optional, default is 1M
773			#
774			# write_buffer = 1M
775
776
777			# maximum file field adaptive buffer size
778			# optional, default is 8M, minimum is 1M
779			#
780			# max_file_field_buffer = 32M
781
782
783			# how to handle IO errors in file fields
784			# known values are 'ignore_field', 'skip_document', and 'fail_index'
785			# optional, default is 'ignore_field'
786			#
787			# on_file_field_error = skip_document
788
789
790			# lemmatizer cache size
791			# improves the indexing time when the lemmatization is enabled
792			# optional, default is 256K
793			#
794			# lemmatizer_cache = 512M
795			}
796
797			#############################################################################
798			## searchd settings
799			#############################################################################
800
801			searchd
802			{
803			# [hostname:]port[:protocol], or /unix/socket/path to listen on
804			# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
805			#
806			# multi-value, multiple listen points are allowed
807			# optional, defaults are 9312:sphinx and 9306:mysql41, as below
808			#
809			# listen = 127.0.0.1
810			# listen = 192.168.0.1:9312
811			# listen = 9312
812			# listen = /var/run/searchd.sock
813			listen = 9312
814			listen = 9306:mysql41
815
816			# log file, searchd run info is logged here
817			# optional, default is 'searchd.log'
818			log = /var/log/sphinxsearch/searchd.log
819
820			# query log file, all search queries are logged here
821			# optional, default is empty (do not log queries)
822			query_log = /var/log/sphinxsearch/sphinx-query.log
823
824			# client read timeout, seconds
825			# optional, default is 5
826			read_timeout = 5
827
828			# request timeout, seconds
829			# optional, default is 5 minutes
830			client_timeout = 300
831
832			# maximum amount of children to fork (concurrent searches to run)
833			# optional, default is 0 (unlimited)
834			max_children = 30
835
836			# maximum amount of persistent connections from this master to each agent host
837			# optional, but necessary if you use agent_persistent. It is reasonable to set the value
838			# as max_children, or less on the agent's hosts.
839			persistent_connections_limit = 30
840
841			# PID file, searchd process ID file name
842			# mandatory
843			pid_file = /var/run/sphinxsearch/searchd.pid
844
845			# seamless rotate, prevents rotate stalls if precaching huge datasets
846			# optional, default is 1
847			seamless_rotate = 1
848
849			# whether to forcibly preopen all indexes on startup
850			# optional, default is 1 (preopen everything)
851			preopen_indexes = 1
852
853			# whether to unlink .old index copies on succesful rotation.
854			# optional, default is 1 (do unlink)
855			unlink_old = 1
856
857			# attribute updates periodic flush timeout, seconds
858			# updates will be automatically dumped to disk this frequently
859			# optional, default is 0 (disable periodic flush)
860			#
861			# attr_flush_period = 900
862
863
864			# MVA updates pool size
865			# shared between all instances of searchd, disables attr flushes!
866			# optional, default size is 1M
867			mva_updates_pool = 1M
868
869			# max allowed network packet size
870			# limits both query packets from clients, and responses from agents
871			# optional, default size is 8M
872			max_packet_size = 8M
873
874			# max allowed per-query filter count
875			# optional, default is 256
876			max_filters = 256
877
878			# max allowed per-filter values count
879			# optional, default is 4096
880			max_filter_values = 4096
881
882
883			# socket listen queue length
884			# optional, default is 5
885			#
886			# listen_backlog = 5
887
888
889			# per-keyword read buffer size
890			# optional, default is 256K
891			#
892			# read_buffer = 256K
893
894
895			# unhinted read size (currently used when reading hits)
896			# optional, default is 32K
897			#
898			# read_unhinted = 32K
899
900
901			# max allowed per-batch query count (aka multi-query count)
902			# optional, default is 32
903			max_batch_queries = 32
904
905
906			# max common subtree document cache size, per-query
907			# optional, default is 0 (disable subtree optimization)
908			#
909			# subtree_docs_cache = 4M
910
911
912			# max common subtree hit cache size, per-query
913			# optional, default is 0 (disable subtree optimization)
914			#
915			# subtree_hits_cache = 8M
916
917
918			# multi-processing mode (MPM)
919			# known values are none, fork, prefork, and threads
920			# threads is required for RT backend to work
921			# optional, default is threads
922			workers = threads # for RT to work
923
924
925			# max threads to create for searching local parts of a distributed index
926			# optional, default is 0, which means disable multi-threaded searching
927			# should work with all MPMs (ie. does NOT require workers=threads)
928			#
929			# dist_threads = 4
930
931
932			# binlog files path; use empty string to disable binlog
933			# optional, default is build-time configured data directory
934			#
935			# binlog_path = # disable logging
936			# binlog_path = /var/db/sphinxsearch/data # binlog.001 etc will be created there
937
938
939			# binlog flush/sync mode
940			# 0 means flush and sync every second
941			# 1 means flush and sync every transaction
942			# 2 means flush every transaction, sync every second
943			# optional, default is 2
944			#
945			# binlog_flush = 2
946
947
948			# binlog per-file size limit
949			# optional, default is 128M, 0 means no limit
950			#
951			# binlog_max_log_size = 256M
952
953
954			# per-thread stack size, only affects workers=threads mode
955			# optional, default is 64K
956			#
957			# thread_stack = 128K
958
959
960			# per-keyword expansion limit (for dict=keywords prefix searches)
961			# optional, default is 0 (no limit)
962			#
963			# expansion_limit = 1000
964
965
966			# RT RAM chunks flush period
967			# optional, default is 0 (no periodic flush)
968			#
969			# rt_flush_period = 900
970
971
972			# query log file format
973			# optional, known values are plain and sphinxql, default is plain
974			#
975			# query_log_format = sphinxql
976
977
978			# version string returned to MySQL network protocol clients
979			# optional, default is empty (use Sphinx version)
980			#
981			# mysql_version_string = 5.0.37
982
983
984			# default server-wide collation
985			# optional, default is libc_ci
986			#
987			# collation_server = utf8_general_ci
988
989
990			# server-wide locale for libc based collations
991			# optional, default is C
992			#
993			# collation_libc_locale = ru_RU.UTF-8
994
995
996			# threaded server watchdog (only used in workers=threads mode)
997			# optional, values are 0 and 1, default is 1 (watchdog on)
998			#
999			# watchdog = 1
1000
1001
1002			# costs for max_predicted_time model, in (imaginary) nanoseconds
1003			# optional, default is "doc=64, hit=48, skip=2048, match=64"
1004			#
1005			# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
1006
1007
1008			# current SphinxQL state (uservars etc) serialization path
1009			# optional, default is none (do not serialize SphinxQL state)
1010			#
1011			# sphinxql_state = sphinxvars.sql
1012
1013
1014			# maximum RT merge thread IO calls per second, and per-call IO size
1015			# useful for throttling (the background) OPTIMIZE INDEX impact
1016			# optional, default is 0 (unlimited)
1017			#
1018			# rt_merge_iops = 40
1019			# rt_merge_maxiosize = 1M
1020
1021
1022			# interval between agent mirror pings, in milliseconds
1023			# 0 means disable pings
1024			# optional, default is 1000
1025			#
1026			# ha_ping_interval = 0
1027
1028
1029			# agent mirror statistics window size, in seconds
1030			# stats older than the window size (karma) are retired
1031			# that is, they will not affect master choice of agents in any way
1032			# optional, default is 60 seconds
1033			#
1034			# ha_period_karma = 60
1035
1036
1037			# delay between preforked children restarts on rotation, in milliseconds
1038			# optional, default is 0 (no delay)
1039			#
1040			# prefork_rotation_throttle = 100
1041
1042
1043			# a prefix to prepend to the local file names when creating snippets
1044			# with load_files and/or load_files_scatter options
1045			# optional, default is empty
1046			#
1047			# snippets_file_prefix = /mnt/common/server1/
1048			}
1049
1050			#############################################################################
1051			## common settings
1052			#############################################################################
1053
1054			common
1055			{
1056
1057			# lemmatizer dictionaries base path
1058			# optional, defaut is /usr/local/share (see ./configure --datadir)
1059			#
1060			# lemmatizer_base = /usr/local/share/sphinx/dicts
1061
1062
1063			# how to handle syntax errors in JSON attributes
1064			# known values are 'ignore_attr' and 'fail_index'
1065			# optional, default is 'ignore_attr'
1066			#
1067			# on_json_attr_error = fail_index
1068
1069
1070			# whether to auto-convert numeric values from strings in JSON attributes
1071			# with auto-conversion, string value with actually numeric data
1072			# (as in {"key":"12345"}) gets stored as a number, rather than string
1073			# optional, allowed values are 0 and 1, default is 0 (do not convert)
1074			#
1075			# json_autoconv_numbers = 1
1076
1077
1078			# whether and how to auto-convert key names in JSON attributes
1079			# known value is 'lowercase'
1080			# optional, default is unspecified (do nothing)
1081			#
1082			# json_autoconv_keynames = lowercase
1083
1084
1085			# path to RLP root directory
1086			# optional, defaut is /usr/local/share (see ./configure --datadir)
1087			#
1088			# rlp_root = /usr/local/share/sphinx/rlp
1089
1090
1091			# path to RLP environment file
1092			# optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir)
1093			#
1094			# rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml
1095
1096
1097			# maximum total size of documents batched before processing them by the RLP
1098			# optional, default is 51200
1099			#
1100			# rlp_max_batch_size = 100k
1101
1102
1103			# maximum number of documents batched before processing them by the RLP
1104			# optional, default is 50
1105			#
1106			# rlp_max_batch_docs = 100
1107
1108
1109			# trusted plugin directory
1110			# optional, default is empty (disable UDFs)
1111			#
1112			# plugin_dir = /usr/local/sphinx/lib
1113
1114			}
1115
1116			# --eof--

Repository List / Contenido / utf8 / plugins / sphinx / etc / sphinx.conf @ r603

Небольшая справка по веткам