Line # Revision Author
1 603 ahitrov #
2 # Sphinx configuration file sample
3 #
4 # WARNING! While this sample file mentions all available options,
5 # it contains (very) short helper descriptions only. Please refer to
6 # doc/sphinx.html for details.
7 #
8
9 #############################################################################
10 ## data source definition
11 #############################################################################
12
13 source zvukiru
14 {
15 # data source type. mandatory, no default value
16 # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
17 type = pgsql
18
19 #####################################################################
20 ## SQL settings (for 'mysql' and 'pgsql' types)
21 #####################################################################
22
23 # some straightforward parameters for SQL source types
24 sql_host = localhost
25 sql_user = zvuki
26 sql_pass = sarUchOov
27 sql_db = zvukirutf
28 sql_port = 5432 # optional, default is 3306
29
30 # UNIX socket name
31 # optional, default is empty (reuse client library defaults)
32 # usually '/var/lib/mysql/mysql.sock' on Linux
33 # usually '/tmp/mysql.sock' on FreeBSD
34 #
35 # sql_sock = /tmp/mysql.sock
36
37
38 # MySQL specific client connection flags
39 # optional, default is 0
40 #
41 # mysql_connect_flags = 32 # enable compression
42
43 # MySQL specific SSL certificate settings
44 # optional, defaults are empty
45 #
46 # mysql_ssl_cert = /etc/ssl/client-cert.pem
47 # mysql_ssl_key = /etc/ssl/client-key.pem
48 # mysql_ssl_ca = /etc/ssl/cacert.pem
49
50 # MS SQL specific Windows authentication mode flag
51 # MUST be in sync with charset_type index-level setting
52 # optional, default is 0
53 #
54 # mssql_winauth = 1 # use currently logged on user credentials
55
56
57 # ODBC specific DSN (data source name)
58 # mandatory for odbc source type, no default value
59 #
60 # odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
61 # sql_query = SELECT id, data FROM documents.csv
62
63
64 # ODBC and MS SQL specific, per-column buffer sizes
65 # optional, default is auto-detect
66 #
67 # sql_column_buffers = content=12M, comments=1M
68
69
70 # pre-query, executed before the main fetch query
71 # multi-value, optional, default is empty list of queries
72 #
73 # sql_query_pre = SET NAMES utf8
74 # sql_query_pre = SET SESSION query_cache_type=OFF
75
76
77 # main document fetch query
78 # mandatory, integer document ID field MUST be the first selected column
79 sql_query = \
80 SELECT id, object_id, object_class, extract(epoch from date_trunc('seconds', mtime)) AS last_edited, is_deleted, name as title, search as content \
81 FROM search
82
83
84 # joined/payload field fetch query
85 # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
86 # payload fields let you attach custom per-keyword values (eg. for ranking)
87 #
88 # syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY
89 # joined field QUERY should return 2 columns (docid, text)
90 # payload field QUERY should return 3 columns (docid, keyword, weight)
91 #
92 # REQUIRES that query results are in ascending document ID order!
93 # multi-value, optional, default is empty list of queries
94 #
95 # sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
96 # sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
97
98
99 # file based field declaration
100 #
101 # content of this field is treated as a file name
102 # and the file gets loaded and indexed in place of a field
103 #
104 # max file size is limited by max_file_field_buffer indexer setting
105 # file IO errors are non-fatal and get reported as warnings
106 #
107 # sql_file_field = content_file_path
108
109
110 # range query setup, query that must return min and max ID values
111 # optional, default is empty
112 #
113 # sql_query will need to reference $start and $end boundaries
114 # if using ranged query:
115 #
116 # sql_query = \
117 # SELECT doc.id, doc.id AS group, doc.title, doc.data \
118 # FROM documents doc \
119 # WHERE id>=$start AND id<=$end
120 #
121 # sql_query_range = SELECT MIN(id),MAX(id) FROM documents
122
123
124 # range query step
125 # optional, default is 1024
126 #
127 sql_range_step = 1000
128
129
130 # unsigned integer attribute declaration
131 # multi-value (an arbitrary number of attributes is allowed), optional
132 # optional bit size can be specified, default is 32
133 #
134 # sql_attr_uint = author_id
135 # sql_attr_uint = forum_id:9 # 9 bits for forum_id
136 sql_attr_uint = object_id
137
138 # boolean attribute declaration
139 # multi-value (an arbitrary number of attributes is allowed), optional
140 # equivalent to sql_attr_uint with 1-bit size
141 #
142 sql_attr_bool = is_deleted
143
144
145 # bigint attribute declaration
146 # multi-value (an arbitrary number of attributes is allowed), optional
147 # declares a signed (unlike uint!) 64-bit attribute
148 #
149 # sql_attr_bigint = my_bigint_id
150
151
152 # UNIX timestamp attribute declaration
153 # multi-value (an arbitrary number of attributes is allowed), optional
154 # similar to integer, but can also be used in date functions
155 #
156 # sql_attr_timestamp = posted_ts
157 sql_attr_timestamp = last_edited
158 # sql_attr_timestamp = date_added
159
160
161 # floating point attribute declaration
162 # multi-value (an arbitrary number of attributes is allowed), optional
163 # values are stored in single precision, 32-bit IEEE 754 format
164 #
165 # sql_attr_float = lat_radians
166 # sql_attr_float = long_radians
167
168
169 # multi-valued attribute (MVA) attribute declaration
170 # multi-value (an arbitrary number of attributes is allowed), optional
171 # MVA values are variable length lists of unsigned 32-bit integers
172 #
173 # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
174 # ATTR-TYPE is 'uint' or 'timestamp'
175 # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
176 # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
177 # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
178 #
179 # sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags
180 # sql_attr_multi = uint tag from ranged-query; \
181 # SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
182 # SELECT MIN(docid), MAX(docid) FROM tags
183
184
185 # string attribute declaration
186 # multi-value (an arbitrary number of these is allowed), optional
187 # lets you store and retrieve strings
188 #
189 sql_attr_string = object_class
190
191
192 # JSON attribute declaration
193 # multi-value (an arbitrary number of these is allowed), optional
194 # lets you store a JSON document as an (in-memory) attribute for later use
195 #
196 # sql_attr_json = properties
197
198
199 # combined field plus attribute declaration (from a single column)
200 # stores column as an attribute, but also indexes it as a full-text field
201 #
202 # sql_field_string = author
203
204
205 # post-query, executed on sql_query completion
206 # optional, default is empty
207 #
208 # sql_query_post =
209
210
211 # post-index-query, executed on successful indexing completion
212 # optional, default is empty
213 # $maxid expands to max document ID actually fetched from DB
214 #
215 # sql_query_post_index = REPLACE INTO counters ( id, val ) \
216 # VALUES ( 'max_indexed_id', $maxid )
217
218
219 # ranged query throttling, in milliseconds
220 # optional, default is 0 which means no delay
221 # enforces given delay before each query step
222 sql_ranged_throttle = 0
223
224
225 # kill-list query, fetches the document IDs for kill-list
226 # k-list will suppress matches from preceding indexes in the same query
227 # optional, default is empty
228 #
229 # sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex
230
231
232 # columns to unpack on indexer side when indexing
233 # multi-value, optional, default is empty list
234 #
235 # unpack_zlib = zlib_column
236 # unpack_mysqlcompress = compressed_column
237 # unpack_mysqlcompress = compressed_column_2
238
239
240 # maximum unpacked length allowed in MySQL COMPRESS() unpacker
241 # optional, default is 16M
242 #
243 # unpack_mysqlcompress_maxsize = 16M
244
245
246 # hook command to run when SQL connection succeeds
247 # optional, default value is empty (do nothing)
248 #
249 # hook_connect = bash sql_connect.sh
250
251
252 # hook command to run after (any) SQL range query
253 # it may print out "minid maxid" (w/o quotes) to override the range
254 # optional, default value is empty (do nothing)
255 #
256 # hook_query_range = bash sql_query_range.sh
257
258
259 # hook command to run on successful indexing completion
260 # $maxid expands to max document ID actually fetched from DB
261 # optional, default value is empty (do nothing)
262 #
263 # hook_post_index = bash sql_post_index.sh $maxid
264
265 #####################################################################
266 ## xmlpipe2 settings
267 #####################################################################
268
269 # type = xmlpipe
270
271 # shell command to invoke xmlpipe stream producer
272 # mandatory
273 #
274 # xmlpipe_command = cat /var/db/sphinxsearch/test.xml
275
276 # xmlpipe2 field declaration
277 # multi-value, optional, default is empty
278 #
279 # xmlpipe_field = subject
280 # xmlpipe_field = content
281
282
283 # xmlpipe2 attribute declaration
284 # multi-value, optional, default is empty
285 # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
286 # examples:
287 #
288 # xmlpipe_attr_timestamp = published
289 # xmlpipe_attr_uint = author_id
290 # xmlpipe_attr_bool = is_enabled
291 # xmlpipe_attr_float = latitude
292 # xmlpipe_attr_bigint = guid
293 # xmlpipe_attr_multi = tags
294 # xmlpipe_attr_multi_64 = tags64
295 # xmlpipe_attr_string = title
296 # xmlpipe_attr_json = extra_data
297 # xmlpipe_field_string = content
298
299
300 # perform UTF-8 validation, and filter out incorrect codes
301 # avoids XML parser choking on non-UTF-8 documents
302 # optional, default is 0
303 #
304 # xmlpipe_fixup_utf8 = 1
305 }
306
307
308 # inherited source example
309 #
310 # all the parameters are copied from the parent source,
311 # and may then be overridden in this source definition
312 source zvukiruthrottled : zvukiru
313 {
314 sql_ranged_throttle = 100
315 }
316
317 #############################################################################
318 ## index definition
319 #############################################################################
320
321 # local index example
322 #
323 # this is an index which is stored locally in the filesystem
324 #
325 # all indexing-time options (such as morphology and charsets)
326 # are configured per local index
327 index zvukiru
328 {
329 # index type
330 # optional, default is 'plain'
331 # known values are 'plain', 'distributed', and 'rt' (see samples below)
332 # type = plain
333
334 # document source(s) to index
335 # multi-value, mandatory
336 # document IDs must be globally unique across all sources
337 source = zvukiru
338
339 # index files path and file name, without extension
340 # mandatory, path must be writable, extensions will be auto-appended
341 path = /var/db/sphinxsearch/data/zvukiru
342
343 # document attribute values (docinfo) storage mode
344 # optional, default is 'extern'
345 # known values are 'none', 'extern' and 'inline'
346 docinfo = extern
347
348 # dictionary type, 'crc' or 'keywords'
349 # crc is faster to index when no substring/wildcards searches are needed
350 # crc with substrings might be faster to search but is much slower to index
351 # (because all substrings are pre-extracted as individual keywords)
352 # keywords is much faster to index with substrings, and index is much (3-10x) smaller
353 # keywords supports wildcards, crc does not, and never will
354 # optional, default is 'keywords'
355 dict = keywords
356
357 # memory locking for cached data (.spa and .spi), to prevent swapping
358 # optional, default is 0 (do not mlock)
359 # requires searchd to be run from root
360 mlock = 0
361
362 # a list of morphology preprocessors to apply
363 # optional, default is empty
364 #
365 # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
366 # 'soundex', and 'metaphone'; additional preprocessors available from
367 # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
368 # (see libstemmer_c/libstemmer/modules.txt)
369 #
370 morphology = stem_en, stem_ru, soundex
371 # morphology = libstemmer_german
372 # morphology = libstemmer_sv
373 # morphology = none
374
375 # minimum word length at which to enable stemming
376 # optional, default is 1 (stem everything)
377 #
378 min_stemming_len = 2
379
380
381 # stopword files list (space separated)
382 # optional, default is empty
383 # contents are plain text, charset_table and stemming are both applied
384 #
385 # stopwords = /var/db/sphinxsearch/data/stopwords.txt
386
387
388 # wordforms file, in "mapfrom > mapto" plain text format
389 # optional, default is empty
390 #
391 # wordforms = /var/db/sphinxsearch/data/wordforms.txt
392
393
394 # tokenizing exceptions file
395 # optional, default is empty
396 #
397 # plain text, case sensitive, space insensitive in map-from part
398 # one "Map Several Words => ToASingleOne" entry per line
399 #
400 # exceptions = /var/db/sphinxsearch/data/exceptions.txt
401
402
403 # embedded file size limit
404 # optional, default is 16K
405 #
406 # exceptions, wordforms, and stopwords files smaller than this limit
407 # are stored in the index; otherwise, their paths and sizes are stored
408 #
409 # embedded_limit = 16K
410
411 # minimum indexed word length
412 # default is 1 (index everything)
413 min_word_len = 2
414
415
416 # ignored characters list
417 # optional, default value is empty
418 #
419 # ignore_chars = U+00AD
420
421
422 # minimum word prefix length to index
423 # optional, default is 0 (do not index prefixes)
424 #
425 # min_prefix_len = 0
426
427
428 # minimum word infix length to index
429 # optional, default is 0 (do not index infixes)
430 #
431 # min_infix_len = 0
432
433
434 # maximum substring (prefix or infix) length to index
435 # optional, default is 0 (do not limit substring length)
436 #
437 # max_substring_len = 8
438
439
440 # list of fields to limit prefix/infix indexing to
441 # optional, default value is empty (index all fields in prefix/infix mode)
442 #
443 # prefix_fields = filename
444 # infix_fields = url, domain
445
446
447 # expand keywords with exact forms and/or stars when searching fit indexes
448 # search-time only, does not affect indexing, can be 0 or 1
449 # optional, default is 0 (do not expand keywords)
450 #
451 # expand_keywords = 1
452
453
454 # n-gram length to index, for CJK indexing
455 # only supports 0 and 1 for now, other lengths to be implemented
456 # optional, default is 0 (disable n-grams)
457 #
458 # ngram_len = 1
459
460
461 # n-gram characters list, for CJK indexing
462 # optional, default is empty
463 #
464 # ngram_chars = U+3000..U+2FA1F
465
466
467 # phrase boundary characters list
468 # optional, default is empty
469 #
470 # phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
471
472
473 # phrase boundary word position increment
474 # optional, default is 0
475 #
476 # phrase_boundary_step = 100
477
478
479 # blended characters list
480 # blended chars are indexed both as separators and valid characters
481 # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
482 # optional, default is empty
483 #
484 # blend_chars = +, &, U+23
485
486
487 # blended token indexing mode
488 # a comma separated list of blended token indexing variants
489 # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
490 # optional, default is trim_none
491 #
492 # blend_mode = trim_tail, skip_pure
493
494
495 # whether to strip HTML tags from incoming documents
496 # known values are 0 (do not strip) and 1 (do strip)
497 # optional, default is 0
498 html_strip = 0
499
500 # what HTML attributes to index if stripping HTML
501 # optional, default is empty (do not index anything)
502 #
503 # html_index_attrs = img=alt,title; a=title;
504
505
506 # what HTML elements contents to strip
507 # optional, default is empty (do not strip element contents)
508 #
509 # html_remove_elements = style, script
510
511
512 # whether to preopen index data files on startup
513 # optional, default is 0 (do not preopen), searchd-only
514 #
515 # preopen = 1
516
517
518 # whether to enable in-place inversion (2x less disk, 90-95% speed)
519 # optional, default is 0 (use separate temporary files), indexer-only
520 #
521 # inplace_enable = 1
522
523
524 # in-place fine-tuning options
525 # optional, defaults are listed below
526 #
527 # inplace_hit_gap = 0 # preallocated hitlist gap size
528 # inplace_docinfo_gap = 0 # preallocated docinfo gap size
529 # inplace_reloc_factor = 0.1 # relocation buffer size within arena
530 # inplace_write_factor = 0.1 # write buffer size within arena
531
532
533 # whether to index original keywords along with stemmed versions
534 # enables "=exactform" operator to work
535 # optional, default is 0
536 #
537 # index_exact_words = 1
538
539
540 # position increment on overshort (less that min_word_len) words
541 # optional, allowed values are 0 and 1, default is 1
542 #
543 # overshort_step = 1
544
545
546 # position increment on stopword
547 # optional, allowed values are 0 and 1, default is 1
548 #
549 # stopword_step = 1
550
551
552 # hitless words list
553 # positions for these keywords will not be stored in the index
554 # optional, allowed values are 'all', or a list file name
555 #
556 # hitless_words = all
557 # hitless_words = hitless.txt
558
559
560 # detect and index sentence and paragraph boundaries
561 # required for the SENTENCE and PARAGRAPH operators to work
562 # optional, allowed values are 0 and 1, default is 0
563 #
564 # index_sp = 1
565
566
567 # index zones, delimited by HTML/XML tags
568 # a comma separated list of tags and wildcards
569 # required for the ZONE operator to work
570 # optional, default is empty string (do not index zones)
571 #
572 # index_zones = title, h*, th
573
574
575 # index per-document and average per-index field lengths, in tokens
576 # required for the BM25A(), BM25F() in expression ranker
577 # optional, default is 0 (do not index field lenghts)
578 #
579 # index_field_lengths = 1
580
581
582 # regular expressions (regexps) to filter the fields and queries with
583 # gets applied to data source fields when indexing
584 # gets applied to search queries when searching
585 # multi-value, optional, default is empty list of regexps
586 #
587 # regexp_filter = \b(\d+)\" => \1inch
588 # regexp_filter = (blue|red) => color
589
590
591 # list of the words considered frequent with respect to bigram indexing
592 # optional, default is empty
593 #
594 # bigram_freq_words = the, a, i, you, my
595
596
597 # bigram indexing mode
598 # known values are none, all, first_freq, both_freq
599 # option, default is none (do not index bigrams)
600 #
601 # bigram_index = both_freq
602
603
604 # snippet document file name prefix
605 # preprended to file names when generating snippets using load_files option
606 # WARNING, this is a prefix (not a path), trailing slash matters!
607 # optional, default is empty
608 #
609 # snippets_file_prefix = /mnt/mydocs/server1
610
611
612 # whether to apply stopwords before or after stemming
613 # optional, default is 0 (apply stopwords after stemming)
614 #
615 # stopwords_unstemmed = 0
616
617
618 # path to a global (cluster-wide) keyword IDFs file
619 # optional, default is empty (use local IDFs)
620 #
621 # global_idf = /usr/local/sphinx/var/global.idf
622 }
623
624
625 # inherited index example
626 #
627 # all the parameters are copied from the parent index,
628 # and may then be overridden in this index definition
629 index zvukirustemmed : zvukiru
630 {
631 path = /var/db/sphinxsearch/data/zvukirustemmed
632 morphology = stem_en
633 }
634
635
636 # distributed index example
637 #
638 # this is a virtual index which can NOT be directly indexed,
639 # and only contains references to other local and/or remote indexes
640 index dist1
641 {
642 # 'distributed' index type MUST be specified
643 type = distributed
644
645 # local index to be searched
646 # there can be many local indexes configured
647 local = test1
648 local = test1stemmed
649
650 # remote agent
651 # multiple remote agents may be specified
652 # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
653 # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
654 agent = localhost:9313:remote1
655 agent = localhost:9314:remote2,remote3
656 # agent = /var/run/searchd.sock:remote4
657
658 # remote agent mirrors groups, aka mirrors, aka HA agents
659 # defines 2 or more interchangeable mirrors for a given index part
660 #
661 # agent = server3:9312 | server4:9312 :indexchunk2
662 # agent = server3:9312:chunk2server3 | server4:9312:chunk2server4
663 # agent = server3:chunk2server3 | server4:chunk2server4
664 # agent = server21|server22|server23:chunk2
665
666
667 # blackhole remote agent, for debugging/testing
668 # network errors and search results will be ignored
669 #
670 # agent_blackhole = testbox:9312:testindex1,testindex2
671
672
673 # persistenly connected remote agent
674 # reduces connect() pressure, requires that workers IS threads
675 #
676 # agent_persistent = testbox:9312:testindex1,testindex2
677
678
679 # remote agent connection timeout, milliseconds
680 # optional, default is 1000 ms, ie. 1 sec
681 agent_connect_timeout = 1000
682
683 # remote agent query timeout, milliseconds
684 # optional, default is 3000 ms, ie. 3 sec
685 agent_query_timeout = 3000
686
687 # HA mirror agent strategy
688 # optional, defaults to ??? (random mirror)
689 # know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm
690 #
691 # ha_strategy = nodeads
692
693 # path to RLP context file
694 # optional, defaut is empty
695 #
696 # rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml
697 }
698
699
700 # realtime index example
701 #
702 # you can run INSERT, REPLACE, and DELETE on this index on the fly
703 # using MySQL protocol (see 'listen' directive below)
704 index rt
705 {
706 # 'rt' index type must be specified to use RT index
707 type = rt
708
709 # index files path and file name, without extension
710 # mandatory, path must be writable, extensions will be auto-appended
711 path = /var/db/sphinxsearch/data/rt
712
713 # RAM chunk size limit
714 # RT index will keep at most this much data in RAM, then flush to disk
715 # optional, default is 128M
716 #
717 # rt_mem_limit = 512M
718
719 # full-text field declaration
720 # multi-value, mandatory
721 rt_field = title
722 rt_field = content
723
724 # unsigned integer attribute declaration
725 # multi-value (an arbitrary number of attributes is allowed), optional
726 # declares an unsigned 32-bit attribute
727 rt_attr_uint = gid
728
729 # RT indexes currently support the following attribute types:
730 # uint, bigint, float, timestamp, string, mva, mva64, json
731 #
732 # rt_attr_bigint = guid
733 # rt_attr_float = gpa
734 # rt_attr_timestamp = ts_added
735 # rt_attr_string = author
736 # rt_attr_multi = tags
737 # rt_attr_multi_64 = tags64
738 # rt_attr_json = extra_data
739 }
740
741 #############################################################################
742 ## indexer settings
743 #############################################################################
744
745 indexer
746 {
747 # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
748 # optional, default is 128M, max is 2047M, recommended is 256M to 1024M
749 mem_limit = 128M
750
751 # maximum IO calls per second (for I/O throttling)
752 # optional, default is 0 (unlimited)
753 #
754 # max_iops = 40
755
756
757 # maximum IO call size, bytes (for I/O throttling)
758 # optional, default is 0 (unlimited)
759 #
760 # max_iosize = 1048576
761
762
763 # maximum xmlpipe2 field length, bytes
764 # optional, default is 2M
765 #
766 # max_xmlpipe2_field = 4M
767
768
769 # write buffer size, bytes
770 # several (currently up to 4) buffers will be allocated
771 # write buffers are allocated in addition to mem_limit
772 # optional, default is 1M
773 #
774 # write_buffer = 1M
775
776
777 # maximum file field adaptive buffer size
778 # optional, default is 8M, minimum is 1M
779 #
780 # max_file_field_buffer = 32M
781
782
783 # how to handle IO errors in file fields
784 # known values are 'ignore_field', 'skip_document', and 'fail_index'
785 # optional, default is 'ignore_field'
786 #
787 # on_file_field_error = skip_document
788
789
790 # lemmatizer cache size
791 # improves the indexing time when the lemmatization is enabled
792 # optional, default is 256K
793 #
794 # lemmatizer_cache = 512M
795 }
796
797 #############################################################################
798 ## searchd settings
799 #############################################################################
800
801 searchd
802 {
803 # [hostname:]port[:protocol], or /unix/socket/path to listen on
804 # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
805 #
806 # multi-value, multiple listen points are allowed
807 # optional, defaults are 9312:sphinx and 9306:mysql41, as below
808 #
809 # listen = 127.0.0.1
810 # listen = 192.168.0.1:9312
811 # listen = 9312
812 # listen = /var/run/searchd.sock
813 listen = 9312
814 listen = 9306:mysql41
815
816 # log file, searchd run info is logged here
817 # optional, default is 'searchd.log'
818 log = /var/log/sphinxsearch/searchd.log
819
820 # query log file, all search queries are logged here
821 # optional, default is empty (do not log queries)
822 query_log = /var/log/sphinxsearch/sphinx-query.log
823
824 # client read timeout, seconds
825 # optional, default is 5
826 read_timeout = 5
827
828 # request timeout, seconds
829 # optional, default is 5 minutes
830 client_timeout = 300
831
832 # maximum amount of children to fork (concurrent searches to run)
833 # optional, default is 0 (unlimited)
834 max_children = 30
835
836 # maximum amount of persistent connections from this master to each agent host
837 # optional, but necessary if you use agent_persistent. It is reasonable to set the value
838 # as max_children, or less on the agent's hosts.
839 persistent_connections_limit = 30
840
841 # PID file, searchd process ID file name
842 # mandatory
843 pid_file = /var/run/sphinxsearch/searchd.pid
844
845 # seamless rotate, prevents rotate stalls if precaching huge datasets
846 # optional, default is 1
847 seamless_rotate = 1
848
849 # whether to forcibly preopen all indexes on startup
850 # optional, default is 1 (preopen everything)
851 preopen_indexes = 1
852
853 # whether to unlink .old index copies on succesful rotation.
854 # optional, default is 1 (do unlink)
855 unlink_old = 1
856
857 # attribute updates periodic flush timeout, seconds
858 # updates will be automatically dumped to disk this frequently
859 # optional, default is 0 (disable periodic flush)
860 #
861 # attr_flush_period = 900
862
863
864 # MVA updates pool size
865 # shared between all instances of searchd, disables attr flushes!
866 # optional, default size is 1M
867 mva_updates_pool = 1M
868
869 # max allowed network packet size
870 # limits both query packets from clients, and responses from agents
871 # optional, default size is 8M
872 max_packet_size = 8M
873
874 # max allowed per-query filter count
875 # optional, default is 256
876 max_filters = 256
877
878 # max allowed per-filter values count
879 # optional, default is 4096
880 max_filter_values = 4096
881
882
883 # socket listen queue length
884 # optional, default is 5
885 #
886 # listen_backlog = 5
887
888
889 # per-keyword read buffer size
890 # optional, default is 256K
891 #
892 # read_buffer = 256K
893
894
895 # unhinted read size (currently used when reading hits)
896 # optional, default is 32K
897 #
898 # read_unhinted = 32K
899
900
901 # max allowed per-batch query count (aka multi-query count)
902 # optional, default is 32
903 max_batch_queries = 32
904
905
906 # max common subtree document cache size, per-query
907 # optional, default is 0 (disable subtree optimization)
908 #
909 # subtree_docs_cache = 4M
910
911
912 # max common subtree hit cache size, per-query
913 # optional, default is 0 (disable subtree optimization)
914 #
915 # subtree_hits_cache = 8M
916
917
918 # multi-processing mode (MPM)
919 # known values are none, fork, prefork, and threads
920 # threads is required for RT backend to work
921 # optional, default is threads
922 workers = threads # for RT to work
923
924
925 # max threads to create for searching local parts of a distributed index
926 # optional, default is 0, which means disable multi-threaded searching
927 # should work with all MPMs (ie. does NOT require workers=threads)
928 #
929 # dist_threads = 4
930
931
932 # binlog files path; use empty string to disable binlog
933 # optional, default is build-time configured data directory
934 #
935 # binlog_path = # disable logging
936 # binlog_path = /var/db/sphinxsearch/data # binlog.001 etc will be created there
937
938
939 # binlog flush/sync mode
940 # 0 means flush and sync every second
941 # 1 means flush and sync every transaction
942 # 2 means flush every transaction, sync every second
943 # optional, default is 2
944 #
945 # binlog_flush = 2
946
947
948 # binlog per-file size limit
949 # optional, default is 128M, 0 means no limit
950 #
951 # binlog_max_log_size = 256M
952
953
954 # per-thread stack size, only affects workers=threads mode
955 # optional, default is 64K
956 #
957 # thread_stack = 128K
958
959
960 # per-keyword expansion limit (for dict=keywords prefix searches)
961 # optional, default is 0 (no limit)
962 #
963 # expansion_limit = 1000
964
965
966 # RT RAM chunks flush period
967 # optional, default is 0 (no periodic flush)
968 #
969 # rt_flush_period = 900
970
971
972 # query log file format
973 # optional, known values are plain and sphinxql, default is plain
974 #
975 # query_log_format = sphinxql
976
977
978 # version string returned to MySQL network protocol clients
979 # optional, default is empty (use Sphinx version)
980 #
981 # mysql_version_string = 5.0.37
982
983
984 # default server-wide collation
985 # optional, default is libc_ci
986 #
987 # collation_server = utf8_general_ci
988
989
990 # server-wide locale for libc based collations
991 # optional, default is C
992 #
993 # collation_libc_locale = ru_RU.UTF-8
994
995
996 # threaded server watchdog (only used in workers=threads mode)
997 # optional, values are 0 and 1, default is 1 (watchdog on)
998 #
999 # watchdog = 1
1000
1001
1002 # costs for max_predicted_time model, in (imaginary) nanoseconds
1003 # optional, default is "doc=64, hit=48, skip=2048, match=64"
1004 #
1005 # predicted_time_costs = doc=64, hit=48, skip=2048, match=64
1006
1007
1008 # current SphinxQL state (uservars etc) serialization path
1009 # optional, default is none (do not serialize SphinxQL state)
1010 #
1011 # sphinxql_state = sphinxvars.sql
1012
1013
1014 # maximum RT merge thread IO calls per second, and per-call IO size
1015 # useful for throttling (the background) OPTIMIZE INDEX impact
1016 # optional, default is 0 (unlimited)
1017 #
1018 # rt_merge_iops = 40
1019 # rt_merge_maxiosize = 1M
1020
1021
1022 # interval between agent mirror pings, in milliseconds
1023 # 0 means disable pings
1024 # optional, default is 1000
1025 #
1026 # ha_ping_interval = 0
1027
1028
1029 # agent mirror statistics window size, in seconds
1030 # stats older than the window size (karma) are retired
1031 # that is, they will not affect master choice of agents in any way
1032 # optional, default is 60 seconds
1033 #
1034 # ha_period_karma = 60
1035
1036
1037 # delay between preforked children restarts on rotation, in milliseconds
1038 # optional, default is 0 (no delay)
1039 #
1040 # prefork_rotation_throttle = 100
1041
1042
1043 # a prefix to prepend to the local file names when creating snippets
1044 # with load_files and/or load_files_scatter options
1045 # optional, default is empty
1046 #
1047 # snippets_file_prefix = /mnt/common/server1/
1048 }
1049
1050 #############################################################################
1051 ## common settings
1052 #############################################################################
1053
1054 common
1055 {
1056
1057 # lemmatizer dictionaries base path
1058 # optional, defaut is /usr/local/share (see ./configure --datadir)
1059 #
1060 # lemmatizer_base = /usr/local/share/sphinx/dicts
1061
1062
1063 # how to handle syntax errors in JSON attributes
1064 # known values are 'ignore_attr' and 'fail_index'
1065 # optional, default is 'ignore_attr'
1066 #
1067 # on_json_attr_error = fail_index
1068
1069
1070 # whether to auto-convert numeric values from strings in JSON attributes
1071 # with auto-conversion, string value with actually numeric data
1072 # (as in {"key":"12345"}) gets stored as a number, rather than string
1073 # optional, allowed values are 0 and 1, default is 0 (do not convert)
1074 #
1075 # json_autoconv_numbers = 1
1076
1077
1078 # whether and how to auto-convert key names in JSON attributes
1079 # known value is 'lowercase'
1080 # optional, default is unspecified (do nothing)
1081 #
1082 # json_autoconv_keynames = lowercase
1083
1084
1085 # path to RLP root directory
1086 # optional, defaut is /usr/local/share (see ./configure --datadir)
1087 #
1088 # rlp_root = /usr/local/share/sphinx/rlp
1089
1090
1091 # path to RLP environment file
1092 # optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir)
1093 #
1094 # rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml
1095
1096
1097 # maximum total size of documents batched before processing them by the RLP
1098 # optional, default is 51200
1099 #
1100 # rlp_max_batch_size = 100k
1101
1102
1103 # maximum number of documents batched before processing them by the RLP
1104 # optional, default is 50
1105 #
1106 # rlp_max_batch_docs = 100
1107
1108
1109 # trusted plugin directory
1110 # optional, default is empty (disable UDFs)
1111 #
1112 # plugin_dir = /usr/local/sphinx/lib
1113
1114 }
1115
1116 # --eof--

Небольшая справка по веткам

cnddist – контейнер, в котором хранятся все дистрибутивы всех библиотек и программных пакетов, которые использовались при построении различных версий Contenido. Если какой-то библиотеки в данном хранилище нет, инсталлятор сделает попытку "подтянуть" ее с веба (например, с CPAN). Если библиотека слишком старая, есть очень большая вероятность, что ее там уже нет. Поэтому мы храним весь хлам от всех сборок. Если какой-то дистрибутив вдруг отсутствует в cnddist - напишите нам, мы положим его туда.

koi8 – отмирающая ветка, чей код, выдача и все внутренние библиотеки заточены на кодировку KOI8-R. Вносятся только те дополнения, которые касаются внешнего вида и функционала админки, баги ядра, обязательные обновления портов и мелочи, которые легко скопипастить. В дальнейшем планируется полная остановка поддержки по данной ветке.

utf8 – актуальная ветка, заточенная под UTF-8.

Внутри каждой ветки: core – исходники ядра; install – скрипт установки инсталляции; plugins – плагины; samples – "готовые к употреблению" проекты, которые можно поставить, запустить и посмотреть, как они работают.