1 |
603 |
ahitrov |
# |
2 |
|
|
# Sphinx configuration file sample |
3 |
|
|
# |
4 |
|
|
# WARNING! While this sample file mentions all available options, |
5 |
|
|
# it contains (very) short helper descriptions only. Please refer to |
6 |
|
|
# doc/sphinx.html for details. |
7 |
|
|
# |
8 |
|
|
|
9 |
|
|
############################################################################# |
10 |
|
|
## data source definition |
11 |
|
|
############################################################################# |
12 |
|
|
|
13 |
|
|
source zvukiru |
14 |
|
|
{ |
15 |
|
|
# data source type. mandatory, no default value |
16 |
|
|
# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc |
17 |
|
|
type = pgsql |
18 |
|
|
|
19 |
|
|
##################################################################### |
20 |
|
|
## SQL settings (for 'mysql' and 'pgsql' types) |
21 |
|
|
##################################################################### |
22 |
|
|
|
23 |
|
|
# some straightforward parameters for SQL source types |
24 |
|
|
sql_host = localhost |
25 |
|
|
sql_user = zvuki |
26 |
|
|
sql_pass = sarUchOov |
27 |
|
|
sql_db = zvukirutf |
28 |
|
|
sql_port = 5432 # optional, default is 3306 |
29 |
|
|
|
30 |
|
|
# UNIX socket name |
31 |
|
|
# optional, default is empty (reuse client library defaults) |
32 |
|
|
# usually '/var/lib/mysql/mysql.sock' on Linux |
33 |
|
|
# usually '/tmp/mysql.sock' on FreeBSD |
34 |
|
|
# |
35 |
|
|
# sql_sock = /tmp/mysql.sock |
36 |
|
|
|
37 |
|
|
|
38 |
|
|
# MySQL specific client connection flags |
39 |
|
|
# optional, default is 0 |
40 |
|
|
# |
41 |
|
|
# mysql_connect_flags = 32 # enable compression |
42 |
|
|
|
43 |
|
|
# MySQL specific SSL certificate settings |
44 |
|
|
# optional, defaults are empty |
45 |
|
|
# |
46 |
|
|
# mysql_ssl_cert = /etc/ssl/client-cert.pem |
47 |
|
|
# mysql_ssl_key = /etc/ssl/client-key.pem |
48 |
|
|
# mysql_ssl_ca = /etc/ssl/cacert.pem |
49 |
|
|
|
50 |
|
|
# MS SQL specific Windows authentication mode flag |
51 |
|
|
# MUST be in sync with charset_type index-level setting |
52 |
|
|
# optional, default is 0 |
53 |
|
|
# |
54 |
|
|
# mssql_winauth = 1 # use currently logged on user credentials |
55 |
|
|
|
56 |
|
|
|
57 |
|
|
# ODBC specific DSN (data source name) |
58 |
|
|
# mandatory for odbc source type, no default value |
59 |
|
|
# |
60 |
|
|
# odbc_dsn = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)}; |
61 |
|
|
# sql_query = SELECT id, data FROM documents.csv |
62 |
|
|
|
63 |
|
|
|
64 |
|
|
# ODBC and MS SQL specific, per-column buffer sizes |
65 |
|
|
# optional, default is auto-detect |
66 |
|
|
# |
67 |
|
|
# sql_column_buffers = content=12M, comments=1M |
68 |
|
|
|
69 |
|
|
|
70 |
|
|
# pre-query, executed before the main fetch query |
71 |
|
|
# multi-value, optional, default is empty list of queries |
72 |
|
|
# |
73 |
|
|
# sql_query_pre = SET NAMES utf8 |
74 |
|
|
# sql_query_pre = SET SESSION query_cache_type=OFF |
75 |
|
|
|
76 |
|
|
|
77 |
|
|
# main document fetch query |
78 |
|
|
# mandatory, integer document ID field MUST be the first selected column |
79 |
|
|
sql_query = \ |
80 |
|
|
SELECT id, object_id, object_class, extract(epoch from date_trunc('seconds', mtime)) AS last_edited, is_deleted, name as title, search as content \ |
81 |
|
|
FROM search |
82 |
|
|
|
83 |
|
|
|
84 |
|
|
# joined/payload field fetch query |
85 |
|
|
# joined fields let you avoid (slow) JOIN and GROUP_CONCAT |
86 |
|
|
# payload fields let you attach custom per-keyword values (eg. for ranking) |
87 |
|
|
# |
88 |
|
|
# syntax is FIELD-NAME 'from' ( 'query' | 'payload-query' ); QUERY |
89 |
|
|
# joined field QUERY should return 2 columns (docid, text) |
90 |
|
|
# payload field QUERY should return 3 columns (docid, keyword, weight) |
91 |
|
|
# |
92 |
|
|
# REQUIRES that query results are in ascending document ID order! |
93 |
|
|
# multi-value, optional, default is empty list of queries |
94 |
|
|
# |
95 |
|
|
# sql_joined_field = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC |
96 |
|
|
# sql_joined_field = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC |
97 |
|
|
|
98 |
|
|
|
99 |
|
|
# file based field declaration |
100 |
|
|
# |
101 |
|
|
# content of this field is treated as a file name |
102 |
|
|
# and the file gets loaded and indexed in place of a field |
103 |
|
|
# |
104 |
|
|
# max file size is limited by max_file_field_buffer indexer setting |
105 |
|
|
# file IO errors are non-fatal and get reported as warnings |
106 |
|
|
# |
107 |
|
|
# sql_file_field = content_file_path |
108 |
|
|
|
109 |
|
|
|
110 |
|
|
# range query setup, query that must return min and max ID values |
111 |
|
|
# optional, default is empty |
112 |
|
|
# |
113 |
|
|
# sql_query will need to reference $start and $end boundaries |
114 |
|
|
# if using ranged query: |
115 |
|
|
# |
116 |
|
|
# sql_query = \ |
117 |
|
|
# SELECT doc.id, doc.id AS group, doc.title, doc.data \ |
118 |
|
|
# FROM documents doc \ |
119 |
|
|
# WHERE id>=$start AND id<=$end |
120 |
|
|
# |
121 |
|
|
# sql_query_range = SELECT MIN(id),MAX(id) FROM documents |
122 |
|
|
|
123 |
|
|
|
124 |
|
|
# range query step |
125 |
|
|
# optional, default is 1024 |
126 |
|
|
# |
127 |
|
|
sql_range_step = 1000 |
128 |
|
|
|
129 |
|
|
|
130 |
|
|
# unsigned integer attribute declaration |
131 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
132 |
|
|
# optional bit size can be specified, default is 32 |
133 |
|
|
# |
134 |
|
|
# sql_attr_uint = author_id |
135 |
|
|
# sql_attr_uint = forum_id:9 # 9 bits for forum_id |
136 |
|
|
sql_attr_uint = object_id |
137 |
|
|
|
138 |
|
|
# boolean attribute declaration |
139 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
140 |
|
|
# equivalent to sql_attr_uint with 1-bit size |
141 |
|
|
# |
142 |
|
|
sql_attr_bool = is_deleted |
143 |
|
|
|
144 |
|
|
|
145 |
|
|
# bigint attribute declaration |
146 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
147 |
|
|
# declares a signed (unlike uint!) 64-bit attribute |
148 |
|
|
# |
149 |
|
|
# sql_attr_bigint = my_bigint_id |
150 |
|
|
|
151 |
|
|
|
152 |
|
|
# UNIX timestamp attribute declaration |
153 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
154 |
|
|
# similar to integer, but can also be used in date functions |
155 |
|
|
# |
156 |
|
|
# sql_attr_timestamp = posted_ts |
157 |
|
|
sql_attr_timestamp = last_edited |
158 |
|
|
# sql_attr_timestamp = date_added |
159 |
|
|
|
160 |
|
|
|
161 |
|
|
# floating point attribute declaration |
162 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
163 |
|
|
# values are stored in single precision, 32-bit IEEE 754 format |
164 |
|
|
# |
165 |
|
|
# sql_attr_float = lat_radians |
166 |
|
|
# sql_attr_float = long_radians |
167 |
|
|
|
168 |
|
|
|
169 |
|
|
# multi-valued attribute (MVA) attribute declaration |
170 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
171 |
|
|
# MVA values are variable length lists of unsigned 32-bit integers |
172 |
|
|
# |
173 |
|
|
# syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] |
174 |
|
|
# ATTR-TYPE is 'uint' or 'timestamp' |
175 |
|
|
# SOURCE-TYPE is 'field', 'query', or 'ranged-query' |
176 |
|
|
# QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs |
177 |
|
|
# RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range' |
178 |
|
|
# |
179 |
|
|
# sql_attr_multi = uint tag from query; SELECT docid, tagid FROM tags |
180 |
|
|
# sql_attr_multi = uint tag from ranged-query; \ |
181 |
|
|
# SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \ |
182 |
|
|
# SELECT MIN(docid), MAX(docid) FROM tags |
183 |
|
|
|
184 |
|
|
|
185 |
|
|
# string attribute declaration |
186 |
|
|
# multi-value (an arbitrary number of these is allowed), optional |
187 |
|
|
# lets you store and retrieve strings |
188 |
|
|
# |
189 |
|
|
sql_attr_string = object_class |
190 |
|
|
|
191 |
|
|
|
192 |
|
|
# JSON attribute declaration |
193 |
|
|
# multi-value (an arbitrary number of these is allowed), optional |
194 |
|
|
# lets you store a JSON document as an (in-memory) attribute for later use |
195 |
|
|
# |
196 |
|
|
# sql_attr_json = properties |
197 |
|
|
|
198 |
|
|
|
199 |
|
|
# combined field plus attribute declaration (from a single column) |
200 |
|
|
# stores column as an attribute, but also indexes it as a full-text field |
201 |
|
|
# |
202 |
|
|
# sql_field_string = author |
203 |
|
|
|
204 |
|
|
|
205 |
|
|
# post-query, executed on sql_query completion |
206 |
|
|
# optional, default is empty |
207 |
|
|
# |
208 |
|
|
# sql_query_post = |
209 |
|
|
|
210 |
|
|
|
211 |
|
|
# post-index-query, executed on successful indexing completion |
212 |
|
|
# optional, default is empty |
213 |
|
|
# $maxid expands to max document ID actually fetched from DB |
214 |
|
|
# |
215 |
|
|
# sql_query_post_index = REPLACE INTO counters ( id, val ) \ |
216 |
|
|
# VALUES ( 'max_indexed_id', $maxid ) |
217 |
|
|
|
218 |
|
|
|
219 |
|
|
# ranged query throttling, in milliseconds |
220 |
|
|
# optional, default is 0 which means no delay |
221 |
|
|
# enforces given delay before each query step |
222 |
|
|
sql_ranged_throttle = 0 |
223 |
|
|
|
224 |
|
|
|
225 |
|
|
# kill-list query, fetches the document IDs for kill-list |
226 |
|
|
# k-list will suppress matches from preceding indexes in the same query |
227 |
|
|
# optional, default is empty |
228 |
|
|
# |
229 |
|
|
# sql_query_killlist = SELECT id FROM documents WHERE edited>=@last_reindex |
230 |
|
|
|
231 |
|
|
|
232 |
|
|
# columns to unpack on indexer side when indexing |
233 |
|
|
# multi-value, optional, default is empty list |
234 |
|
|
# |
235 |
|
|
# unpack_zlib = zlib_column |
236 |
|
|
# unpack_mysqlcompress = compressed_column |
237 |
|
|
# unpack_mysqlcompress = compressed_column_2 |
238 |
|
|
|
239 |
|
|
|
240 |
|
|
# maximum unpacked length allowed in MySQL COMPRESS() unpacker |
241 |
|
|
# optional, default is 16M |
242 |
|
|
# |
243 |
|
|
# unpack_mysqlcompress_maxsize = 16M |
244 |
|
|
|
245 |
|
|
|
246 |
|
|
# hook command to run when SQL connection succeeds |
247 |
|
|
# optional, default value is empty (do nothing) |
248 |
|
|
# |
249 |
|
|
# hook_connect = bash sql_connect.sh |
250 |
|
|
|
251 |
|
|
|
252 |
|
|
# hook command to run after (any) SQL range query |
253 |
|
|
# it may print out "minid maxid" (w/o quotes) to override the range |
254 |
|
|
# optional, default value is empty (do nothing) |
255 |
|
|
# |
256 |
|
|
# hook_query_range = bash sql_query_range.sh |
257 |
|
|
|
258 |
|
|
|
259 |
|
|
# hook command to run on successful indexing completion |
260 |
|
|
# $maxid expands to max document ID actually fetched from DB |
261 |
|
|
# optional, default value is empty (do nothing) |
262 |
|
|
# |
263 |
|
|
# hook_post_index = bash sql_post_index.sh $maxid |
264 |
|
|
|
265 |
|
|
##################################################################### |
266 |
|
|
## xmlpipe2 settings |
267 |
|
|
##################################################################### |
268 |
|
|
|
269 |
|
|
# type = xmlpipe |
270 |
|
|
|
271 |
|
|
# shell command to invoke xmlpipe stream producer |
272 |
|
|
# mandatory |
273 |
|
|
# |
274 |
|
|
# xmlpipe_command = cat /var/db/sphinxsearch/test.xml |
275 |
|
|
|
276 |
|
|
# xmlpipe2 field declaration |
277 |
|
|
# multi-value, optional, default is empty |
278 |
|
|
# |
279 |
|
|
# xmlpipe_field = subject |
280 |
|
|
# xmlpipe_field = content |
281 |
|
|
|
282 |
|
|
|
283 |
|
|
# xmlpipe2 attribute declaration |
284 |
|
|
# multi-value, optional, default is empty |
285 |
|
|
# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX |
286 |
|
|
# examples: |
287 |
|
|
# |
288 |
|
|
# xmlpipe_attr_timestamp = published |
289 |
|
|
# xmlpipe_attr_uint = author_id |
290 |
|
|
# xmlpipe_attr_bool = is_enabled |
291 |
|
|
# xmlpipe_attr_float = latitude |
292 |
|
|
# xmlpipe_attr_bigint = guid |
293 |
|
|
# xmlpipe_attr_multi = tags |
294 |
|
|
# xmlpipe_attr_multi_64 = tags64 |
295 |
|
|
# xmlpipe_attr_string = title |
296 |
|
|
# xmlpipe_attr_json = extra_data |
297 |
|
|
# xmlpipe_field_string = content |
298 |
|
|
|
299 |
|
|
|
300 |
|
|
# perform UTF-8 validation, and filter out incorrect codes |
301 |
|
|
# avoids XML parser choking on non-UTF-8 documents |
302 |
|
|
# optional, default is 0 |
303 |
|
|
# |
304 |
|
|
# xmlpipe_fixup_utf8 = 1 |
305 |
|
|
} |
306 |
|
|
|
307 |
|
|
|
308 |
|
|
# inherited source example |
309 |
|
|
# |
310 |
|
|
# all the parameters are copied from the parent source, |
311 |
|
|
# and may then be overridden in this source definition |
312 |
|
|
source zvukiruthrottled : zvukiru |
313 |
|
|
{ |
314 |
|
|
sql_ranged_throttle = 100 |
315 |
|
|
} |
316 |
|
|
|
317 |
|
|
############################################################################# |
318 |
|
|
## index definition |
319 |
|
|
############################################################################# |
320 |
|
|
|
321 |
|
|
# local index example |
322 |
|
|
# |
323 |
|
|
# this is an index which is stored locally in the filesystem |
324 |
|
|
# |
325 |
|
|
# all indexing-time options (such as morphology and charsets) |
326 |
|
|
# are configured per local index |
327 |
|
|
index zvukiru |
328 |
|
|
{ |
329 |
|
|
# index type |
330 |
|
|
# optional, default is 'plain' |
331 |
|
|
# known values are 'plain', 'distributed', and 'rt' (see samples below) |
332 |
|
|
# type = plain |
333 |
|
|
|
334 |
|
|
# document source(s) to index |
335 |
|
|
# multi-value, mandatory |
336 |
|
|
# document IDs must be globally unique across all sources |
337 |
|
|
source = zvukiru |
338 |
|
|
|
339 |
|
|
# index files path and file name, without extension |
340 |
|
|
# mandatory, path must be writable, extensions will be auto-appended |
341 |
|
|
path = /var/db/sphinxsearch/data/zvukiru |
342 |
|
|
|
343 |
|
|
# document attribute values (docinfo) storage mode |
344 |
|
|
# optional, default is 'extern' |
345 |
|
|
# known values are 'none', 'extern' and 'inline' |
346 |
|
|
docinfo = extern |
347 |
|
|
|
348 |
|
|
# dictionary type, 'crc' or 'keywords' |
349 |
|
|
# crc is faster to index when no substring/wildcards searches are needed |
350 |
|
|
# crc with substrings might be faster to search but is much slower to index |
351 |
|
|
# (because all substrings are pre-extracted as individual keywords) |
352 |
|
|
# keywords is much faster to index with substrings, and index is much (3-10x) smaller |
353 |
|
|
# keywords supports wildcards, crc does not, and never will |
354 |
|
|
# optional, default is 'keywords' |
355 |
|
|
dict = keywords |
356 |
|
|
|
357 |
|
|
# memory locking for cached data (.spa and .spi), to prevent swapping |
358 |
|
|
# optional, default is 0 (do not mlock) |
359 |
|
|
# requires searchd to be run from root |
360 |
|
|
mlock = 0 |
361 |
|
|
|
362 |
|
|
# a list of morphology preprocessors to apply |
363 |
|
|
# optional, default is empty |
364 |
|
|
# |
365 |
|
|
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', |
366 |
|
|
# 'soundex', and 'metaphone'; additional preprocessors available from |
367 |
|
|
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code |
368 |
|
|
# (see libstemmer_c/libstemmer/modules.txt) |
369 |
|
|
# |
370 |
|
|
morphology = stem_en, stem_ru, soundex |
371 |
|
|
# morphology = libstemmer_german |
372 |
|
|
# morphology = libstemmer_sv |
373 |
|
|
# morphology = none |
374 |
|
|
|
375 |
|
|
# minimum word length at which to enable stemming |
376 |
|
|
# optional, default is 1 (stem everything) |
377 |
|
|
# |
378 |
|
|
min_stemming_len = 2 |
379 |
|
|
|
380 |
|
|
|
381 |
|
|
# stopword files list (space separated) |
382 |
|
|
# optional, default is empty |
383 |
|
|
# contents are plain text, charset_table and stemming are both applied |
384 |
|
|
# |
385 |
|
|
# stopwords = /var/db/sphinxsearch/data/stopwords.txt |
386 |
|
|
|
387 |
|
|
|
388 |
|
|
# wordforms file, in "mapfrom > mapto" plain text format |
389 |
|
|
# optional, default is empty |
390 |
|
|
# |
391 |
|
|
# wordforms = /var/db/sphinxsearch/data/wordforms.txt |
392 |
|
|
|
393 |
|
|
|
394 |
|
|
# tokenizing exceptions file |
395 |
|
|
# optional, default is empty |
396 |
|
|
# |
397 |
|
|
# plain text, case sensitive, space insensitive in map-from part |
398 |
|
|
# one "Map Several Words => ToASingleOne" entry per line |
399 |
|
|
# |
400 |
|
|
# exceptions = /var/db/sphinxsearch/data/exceptions.txt |
401 |
|
|
|
402 |
|
|
|
403 |
|
|
# embedded file size limit |
404 |
|
|
# optional, default is 16K |
405 |
|
|
# |
406 |
|
|
# exceptions, wordforms, and stopwords files smaller than this limit |
407 |
|
|
# are stored in the index; otherwise, their paths and sizes are stored |
408 |
|
|
# |
409 |
|
|
# embedded_limit = 16K |
410 |
|
|
|
411 |
|
|
# minimum indexed word length |
412 |
|
|
# default is 1 (index everything) |
413 |
|
|
min_word_len = 2 |
414 |
|
|
|
415 |
|
|
|
416 |
|
|
# ignored characters list |
417 |
|
|
# optional, default value is empty |
418 |
|
|
# |
419 |
|
|
# ignore_chars = U+00AD |
420 |
|
|
|
421 |
|
|
|
422 |
|
|
# minimum word prefix length to index |
423 |
|
|
# optional, default is 0 (do not index prefixes) |
424 |
|
|
# |
425 |
|
|
# min_prefix_len = 0 |
426 |
|
|
|
427 |
|
|
|
428 |
|
|
# minimum word infix length to index |
429 |
|
|
# optional, default is 0 (do not index infixes) |
430 |
|
|
# |
431 |
|
|
# min_infix_len = 0 |
432 |
|
|
|
433 |
|
|
|
434 |
|
|
# maximum substring (prefix or infix) length to index |
435 |
|
|
# optional, default is 0 (do not limit substring length) |
436 |
|
|
# |
437 |
|
|
# max_substring_len = 8 |
438 |
|
|
|
439 |
|
|
|
440 |
|
|
# list of fields to limit prefix/infix indexing to |
441 |
|
|
# optional, default value is empty (index all fields in prefix/infix mode) |
442 |
|
|
# |
443 |
|
|
# prefix_fields = filename |
444 |
|
|
# infix_fields = url, domain |
445 |
|
|
|
446 |
|
|
|
447 |
|
|
# expand keywords with exact forms and/or stars when searching fit indexes |
448 |
|
|
# search-time only, does not affect indexing, can be 0 or 1 |
449 |
|
|
# optional, default is 0 (do not expand keywords) |
450 |
|
|
# |
451 |
|
|
# expand_keywords = 1 |
452 |
|
|
|
453 |
|
|
|
454 |
|
|
# n-gram length to index, for CJK indexing |
455 |
|
|
# only supports 0 and 1 for now, other lengths to be implemented |
456 |
|
|
# optional, default is 0 (disable n-grams) |
457 |
|
|
# |
458 |
|
|
# ngram_len = 1 |
459 |
|
|
|
460 |
|
|
|
461 |
|
|
# n-gram characters list, for CJK indexing |
462 |
|
|
# optional, default is empty |
463 |
|
|
# |
464 |
|
|
# ngram_chars = U+3000..U+2FA1F |
465 |
|
|
|
466 |
|
|
|
467 |
|
|
# phrase boundary characters list |
468 |
|
|
# optional, default is empty |
469 |
|
|
# |
470 |
|
|
# phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis |
471 |
|
|
|
472 |
|
|
|
473 |
|
|
# phrase boundary word position increment |
474 |
|
|
# optional, default is 0 |
475 |
|
|
# |
476 |
|
|
# phrase_boundary_step = 100 |
477 |
|
|
|
478 |
|
|
|
479 |
|
|
# blended characters list |
480 |
|
|
# blended chars are indexed both as separators and valid characters |
481 |
|
|
# for instance, AT&T will results in 3 tokens ("at", "t", and "at&t") |
482 |
|
|
# optional, default is empty |
483 |
|
|
# |
484 |
|
|
# blend_chars = +, &, U+23 |
485 |
|
|
|
486 |
|
|
|
487 |
|
|
# blended token indexing mode |
488 |
|
|
# a comma separated list of blended token indexing variants |
489 |
|
|
# known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure |
490 |
|
|
# optional, default is trim_none |
491 |
|
|
# |
492 |
|
|
# blend_mode = trim_tail, skip_pure |
493 |
|
|
|
494 |
|
|
|
495 |
|
|
# whether to strip HTML tags from incoming documents |
496 |
|
|
# known values are 0 (do not strip) and 1 (do strip) |
497 |
|
|
# optional, default is 0 |
498 |
|
|
html_strip = 0 |
499 |
|
|
|
500 |
|
|
# what HTML attributes to index if stripping HTML |
501 |
|
|
# optional, default is empty (do not index anything) |
502 |
|
|
# |
503 |
|
|
# html_index_attrs = img=alt,title; a=title; |
504 |
|
|
|
505 |
|
|
|
506 |
|
|
# what HTML elements contents to strip |
507 |
|
|
# optional, default is empty (do not strip element contents) |
508 |
|
|
# |
509 |
|
|
# html_remove_elements = style, script |
510 |
|
|
|
511 |
|
|
|
512 |
|
|
# whether to preopen index data files on startup |
513 |
|
|
# optional, default is 0 (do not preopen), searchd-only |
514 |
|
|
# |
515 |
|
|
# preopen = 1 |
516 |
|
|
|
517 |
|
|
|
518 |
|
|
# whether to enable in-place inversion (2x less disk, 90-95% speed) |
519 |
|
|
# optional, default is 0 (use separate temporary files), indexer-only |
520 |
|
|
# |
521 |
|
|
# inplace_enable = 1 |
522 |
|
|
|
523 |
|
|
|
524 |
|
|
# in-place fine-tuning options |
525 |
|
|
# optional, defaults are listed below |
526 |
|
|
# |
527 |
|
|
# inplace_hit_gap = 0 # preallocated hitlist gap size |
528 |
|
|
# inplace_docinfo_gap = 0 # preallocated docinfo gap size |
529 |
|
|
# inplace_reloc_factor = 0.1 # relocation buffer size within arena |
530 |
|
|
# inplace_write_factor = 0.1 # write buffer size within arena |
531 |
|
|
|
532 |
|
|
|
533 |
|
|
# whether to index original keywords along with stemmed versions |
534 |
|
|
# enables "=exactform" operator to work |
535 |
|
|
# optional, default is 0 |
536 |
|
|
# |
537 |
|
|
# index_exact_words = 1 |
538 |
|
|
|
539 |
|
|
|
540 |
|
|
# position increment on overshort (less that min_word_len) words |
541 |
|
|
# optional, allowed values are 0 and 1, default is 1 |
542 |
|
|
# |
543 |
|
|
# overshort_step = 1 |
544 |
|
|
|
545 |
|
|
|
546 |
|
|
# position increment on stopword |
547 |
|
|
# optional, allowed values are 0 and 1, default is 1 |
548 |
|
|
# |
549 |
|
|
# stopword_step = 1 |
550 |
|
|
|
551 |
|
|
|
552 |
|
|
# hitless words list |
553 |
|
|
# positions for these keywords will not be stored in the index |
554 |
|
|
# optional, allowed values are 'all', or a list file name |
555 |
|
|
# |
556 |
|
|
# hitless_words = all |
557 |
|
|
# hitless_words = hitless.txt |
558 |
|
|
|
559 |
|
|
|
560 |
|
|
# detect and index sentence and paragraph boundaries |
561 |
|
|
# required for the SENTENCE and PARAGRAPH operators to work |
562 |
|
|
# optional, allowed values are 0 and 1, default is 0 |
563 |
|
|
# |
564 |
|
|
# index_sp = 1 |
565 |
|
|
|
566 |
|
|
|
567 |
|
|
# index zones, delimited by HTML/XML tags |
568 |
|
|
# a comma separated list of tags and wildcards |
569 |
|
|
# required for the ZONE operator to work |
570 |
|
|
# optional, default is empty string (do not index zones) |
571 |
|
|
# |
572 |
|
|
# index_zones = title, h*, th |
573 |
|
|
|
574 |
|
|
|
575 |
|
|
# index per-document and average per-index field lengths, in tokens |
576 |
|
|
# required for the BM25A(), BM25F() in expression ranker |
577 |
|
|
# optional, default is 0 (do not index field lenghts) |
578 |
|
|
# |
579 |
|
|
# index_field_lengths = 1 |
580 |
|
|
|
581 |
|
|
|
582 |
|
|
# regular expressions (regexps) to filter the fields and queries with |
583 |
|
|
# gets applied to data source fields when indexing |
584 |
|
|
# gets applied to search queries when searching |
585 |
|
|
# multi-value, optional, default is empty list of regexps |
586 |
|
|
# |
587 |
|
|
# regexp_filter = \b(\d+)\" => \1inch |
588 |
|
|
# regexp_filter = (blue|red) => color |
589 |
|
|
|
590 |
|
|
|
591 |
|
|
# list of the words considered frequent with respect to bigram indexing |
592 |
|
|
# optional, default is empty |
593 |
|
|
# |
594 |
|
|
# bigram_freq_words = the, a, i, you, my |
595 |
|
|
|
596 |
|
|
|
597 |
|
|
# bigram indexing mode |
598 |
|
|
# known values are none, all, first_freq, both_freq |
599 |
|
|
# option, default is none (do not index bigrams) |
600 |
|
|
# |
601 |
|
|
# bigram_index = both_freq |
602 |
|
|
|
603 |
|
|
|
604 |
|
|
# snippet document file name prefix |
605 |
|
|
# preprended to file names when generating snippets using load_files option |
606 |
|
|
# WARNING, this is a prefix (not a path), trailing slash matters! |
607 |
|
|
# optional, default is empty |
608 |
|
|
# |
609 |
|
|
# snippets_file_prefix = /mnt/mydocs/server1 |
610 |
|
|
|
611 |
|
|
|
612 |
|
|
# whether to apply stopwords before or after stemming |
613 |
|
|
# optional, default is 0 (apply stopwords after stemming) |
614 |
|
|
# |
615 |
|
|
# stopwords_unstemmed = 0 |
616 |
|
|
|
617 |
|
|
|
618 |
|
|
# path to a global (cluster-wide) keyword IDFs file |
619 |
|
|
# optional, default is empty (use local IDFs) |
620 |
|
|
# |
621 |
|
|
# global_idf = /usr/local/sphinx/var/global.idf |
622 |
|
|
} |
623 |
|
|
|
624 |
|
|
|
625 |
|
|
# inherited index example |
626 |
|
|
# |
627 |
|
|
# all the parameters are copied from the parent index, |
628 |
|
|
# and may then be overridden in this index definition |
629 |
|
|
index zvukirustemmed : zvukiru |
630 |
|
|
{ |
631 |
|
|
path = /var/db/sphinxsearch/data/zvukirustemmed |
632 |
|
|
morphology = stem_en |
633 |
|
|
} |
634 |
|
|
|
635 |
|
|
|
636 |
|
|
# distributed index example |
637 |
|
|
# |
638 |
|
|
# this is a virtual index which can NOT be directly indexed, |
639 |
|
|
# and only contains references to other local and/or remote indexes |
640 |
|
|
index dist1 |
641 |
|
|
{ |
642 |
|
|
# 'distributed' index type MUST be specified |
643 |
|
|
type = distributed |
644 |
|
|
|
645 |
|
|
# local index to be searched |
646 |
|
|
# there can be many local indexes configured |
647 |
|
|
local = test1 |
648 |
|
|
local = test1stemmed |
649 |
|
|
|
650 |
|
|
# remote agent |
651 |
|
|
# multiple remote agents may be specified |
652 |
|
|
# syntax for TCP connections is 'hostname:port:index1,[index2[,...]]' |
653 |
|
|
# syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]' |
654 |
|
|
agent = localhost:9313:remote1 |
655 |
|
|
agent = localhost:9314:remote2,remote3 |
656 |
|
|
# agent = /var/run/searchd.sock:remote4 |
657 |
|
|
|
658 |
|
|
# remote agent mirrors groups, aka mirrors, aka HA agents |
659 |
|
|
# defines 2 or more interchangeable mirrors for a given index part |
660 |
|
|
# |
661 |
|
|
# agent = server3:9312 | server4:9312 :indexchunk2 |
662 |
|
|
# agent = server3:9312:chunk2server3 | server4:9312:chunk2server4 |
663 |
|
|
# agent = server3:chunk2server3 | server4:chunk2server4 |
664 |
|
|
# agent = server21|server22|server23:chunk2 |
665 |
|
|
|
666 |
|
|
|
667 |
|
|
# blackhole remote agent, for debugging/testing |
668 |
|
|
# network errors and search results will be ignored |
669 |
|
|
# |
670 |
|
|
# agent_blackhole = testbox:9312:testindex1,testindex2 |
671 |
|
|
|
672 |
|
|
|
673 |
|
|
# persistenly connected remote agent |
674 |
|
|
# reduces connect() pressure, requires that workers IS threads |
675 |
|
|
# |
676 |
|
|
# agent_persistent = testbox:9312:testindex1,testindex2 |
677 |
|
|
|
678 |
|
|
|
679 |
|
|
# remote agent connection timeout, milliseconds |
680 |
|
|
# optional, default is 1000 ms, ie. 1 sec |
681 |
|
|
agent_connect_timeout = 1000 |
682 |
|
|
|
683 |
|
|
# remote agent query timeout, milliseconds |
684 |
|
|
# optional, default is 3000 ms, ie. 3 sec |
685 |
|
|
agent_query_timeout = 3000 |
686 |
|
|
|
687 |
|
|
# HA mirror agent strategy |
688 |
|
|
# optional, defaults to ??? (random mirror) |
689 |
|
|
# know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm |
690 |
|
|
# |
691 |
|
|
# ha_strategy = nodeads |
692 |
|
|
|
693 |
|
|
# path to RLP context file |
694 |
|
|
# optional, defaut is empty |
695 |
|
|
# |
696 |
|
|
# rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml |
697 |
|
|
} |
698 |
|
|
|
699 |
|
|
|
700 |
|
|
# realtime index example |
701 |
|
|
# |
702 |
|
|
# you can run INSERT, REPLACE, and DELETE on this index on the fly |
703 |
|
|
# using MySQL protocol (see 'listen' directive below) |
704 |
|
|
index rt |
705 |
|
|
{ |
706 |
|
|
# 'rt' index type must be specified to use RT index |
707 |
|
|
type = rt |
708 |
|
|
|
709 |
|
|
# index files path and file name, without extension |
710 |
|
|
# mandatory, path must be writable, extensions will be auto-appended |
711 |
|
|
path = /var/db/sphinxsearch/data/rt |
712 |
|
|
|
713 |
|
|
# RAM chunk size limit |
714 |
|
|
# RT index will keep at most this much data in RAM, then flush to disk |
715 |
|
|
# optional, default is 128M |
716 |
|
|
# |
717 |
|
|
# rt_mem_limit = 512M |
718 |
|
|
|
719 |
|
|
# full-text field declaration |
720 |
|
|
# multi-value, mandatory |
721 |
|
|
rt_field = title |
722 |
|
|
rt_field = content |
723 |
|
|
|
724 |
|
|
# unsigned integer attribute declaration |
725 |
|
|
# multi-value (an arbitrary number of attributes is allowed), optional |
726 |
|
|
# declares an unsigned 32-bit attribute |
727 |
|
|
rt_attr_uint = gid |
728 |
|
|
|
729 |
|
|
# RT indexes currently support the following attribute types: |
730 |
|
|
# uint, bigint, float, timestamp, string, mva, mva64, json |
731 |
|
|
# |
732 |
|
|
# rt_attr_bigint = guid |
733 |
|
|
# rt_attr_float = gpa |
734 |
|
|
# rt_attr_timestamp = ts_added |
735 |
|
|
# rt_attr_string = author |
736 |
|
|
# rt_attr_multi = tags |
737 |
|
|
# rt_attr_multi_64 = tags64 |
738 |
|
|
# rt_attr_json = extra_data |
739 |
|
|
} |
740 |
|
|
|
741 |
|
|
############################################################################# |
742 |
|
|
## indexer settings |
743 |
|
|
############################################################################# |
744 |
|
|
|
745 |
|
|
indexer |
746 |
|
|
{ |
747 |
|
|
# memory limit, in bytes, kiloytes (16384K) or megabytes (256M) |
748 |
|
|
# optional, default is 128M, max is 2047M, recommended is 256M to 1024M |
749 |
|
|
mem_limit = 128M |
750 |
|
|
|
751 |
|
|
# maximum IO calls per second (for I/O throttling) |
752 |
|
|
# optional, default is 0 (unlimited) |
753 |
|
|
# |
754 |
|
|
# max_iops = 40 |
755 |
|
|
|
756 |
|
|
|
757 |
|
|
# maximum IO call size, bytes (for I/O throttling) |
758 |
|
|
# optional, default is 0 (unlimited) |
759 |
|
|
# |
760 |
|
|
# max_iosize = 1048576 |
761 |
|
|
|
762 |
|
|
|
763 |
|
|
# maximum xmlpipe2 field length, bytes |
764 |
|
|
# optional, default is 2M |
765 |
|
|
# |
766 |
|
|
# max_xmlpipe2_field = 4M |
767 |
|
|
|
768 |
|
|
|
769 |
|
|
# write buffer size, bytes |
770 |
|
|
# several (currently up to 4) buffers will be allocated |
771 |
|
|
# write buffers are allocated in addition to mem_limit |
772 |
|
|
# optional, default is 1M |
773 |
|
|
# |
774 |
|
|
# write_buffer = 1M |
775 |
|
|
|
776 |
|
|
|
777 |
|
|
# maximum file field adaptive buffer size |
778 |
|
|
# optional, default is 8M, minimum is 1M |
779 |
|
|
# |
780 |
|
|
# max_file_field_buffer = 32M |
781 |
|
|
|
782 |
|
|
|
783 |
|
|
# how to handle IO errors in file fields |
784 |
|
|
# known values are 'ignore_field', 'skip_document', and 'fail_index' |
785 |
|
|
# optional, default is 'ignore_field' |
786 |
|
|
# |
787 |
|
|
# on_file_field_error = skip_document |
788 |
|
|
|
789 |
|
|
|
790 |
|
|
# lemmatizer cache size |
791 |
|
|
# improves the indexing time when the lemmatization is enabled |
792 |
|
|
# optional, default is 256K |
793 |
|
|
# |
794 |
|
|
# lemmatizer_cache = 512M |
795 |
|
|
} |
796 |
|
|
|
797 |
|
|
############################################################################# |
798 |
|
|
## searchd settings |
799 |
|
|
############################################################################# |
800 |
|
|
|
801 |
|
|
searchd |
802 |
|
|
{ |
803 |
|
|
# [hostname:]port[:protocol], or /unix/socket/path to listen on |
804 |
|
|
# known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) |
805 |
|
|
# |
806 |
|
|
# multi-value, multiple listen points are allowed |
807 |
|
|
# optional, defaults are 9312:sphinx and 9306:mysql41, as below |
808 |
|
|
# |
809 |
|
|
# listen = 127.0.0.1 |
810 |
|
|
# listen = 192.168.0.1:9312 |
811 |
|
|
# listen = 9312 |
812 |
|
|
# listen = /var/run/searchd.sock |
813 |
|
|
listen = 9312 |
814 |
|
|
listen = 9306:mysql41 |
815 |
|
|
|
816 |
|
|
# log file, searchd run info is logged here |
817 |
|
|
# optional, default is 'searchd.log' |
818 |
|
|
log = /var/log/sphinxsearch/searchd.log |
819 |
|
|
|
820 |
|
|
# query log file, all search queries are logged here |
821 |
|
|
# optional, default is empty (do not log queries) |
822 |
|
|
query_log = /var/log/sphinxsearch/sphinx-query.log |
823 |
|
|
|
824 |
|
|
# client read timeout, seconds |
825 |
|
|
# optional, default is 5 |
826 |
|
|
read_timeout = 5 |
827 |
|
|
|
828 |
|
|
# request timeout, seconds |
829 |
|
|
# optional, default is 5 minutes |
830 |
|
|
client_timeout = 300 |
831 |
|
|
|
832 |
|
|
# maximum amount of children to fork (concurrent searches to run) |
833 |
|
|
# optional, default is 0 (unlimited) |
834 |
|
|
max_children = 30 |
835 |
|
|
|
836 |
|
|
# maximum amount of persistent connections from this master to each agent host |
837 |
|
|
# optional, but necessary if you use agent_persistent. It is reasonable to set the value |
838 |
|
|
# as max_children, or less on the agent's hosts. |
839 |
|
|
persistent_connections_limit = 30 |
840 |
|
|
|
841 |
|
|
# PID file, searchd process ID file name |
842 |
|
|
# mandatory |
843 |
|
|
pid_file = /var/run/sphinxsearch/searchd.pid |
844 |
|
|
|
845 |
|
|
# seamless rotate, prevents rotate stalls if precaching huge datasets |
846 |
|
|
# optional, default is 1 |
847 |
|
|
seamless_rotate = 1 |
848 |
|
|
|
849 |
|
|
# whether to forcibly preopen all indexes on startup |
850 |
|
|
# optional, default is 1 (preopen everything) |
851 |
|
|
preopen_indexes = 1 |
852 |
|
|
|
853 |
|
|
# whether to unlink .old index copies on succesful rotation. |
854 |
|
|
# optional, default is 1 (do unlink) |
855 |
|
|
unlink_old = 1 |
856 |
|
|
|
857 |
|
|
# attribute updates periodic flush timeout, seconds |
858 |
|
|
# updates will be automatically dumped to disk this frequently |
859 |
|
|
# optional, default is 0 (disable periodic flush) |
860 |
|
|
# |
861 |
|
|
# attr_flush_period = 900 |
862 |
|
|
|
863 |
|
|
|
864 |
|
|
# MVA updates pool size |
865 |
|
|
# shared between all instances of searchd, disables attr flushes! |
866 |
|
|
# optional, default size is 1M |
867 |
|
|
mva_updates_pool = 1M |
868 |
|
|
|
869 |
|
|
# max allowed network packet size |
870 |
|
|
# limits both query packets from clients, and responses from agents |
871 |
|
|
# optional, default size is 8M |
872 |
|
|
max_packet_size = 8M |
873 |
|
|
|
874 |
|
|
# max allowed per-query filter count |
875 |
|
|
# optional, default is 256 |
876 |
|
|
max_filters = 256 |
877 |
|
|
|
878 |
|
|
# max allowed per-filter values count |
879 |
|
|
# optional, default is 4096 |
880 |
|
|
max_filter_values = 4096 |
881 |
|
|
|
882 |
|
|
|
883 |
|
|
# socket listen queue length |
884 |
|
|
# optional, default is 5 |
885 |
|
|
# |
886 |
|
|
# listen_backlog = 5 |
887 |
|
|
|
888 |
|
|
|
889 |
|
|
# per-keyword read buffer size |
890 |
|
|
# optional, default is 256K |
891 |
|
|
# |
892 |
|
|
# read_buffer = 256K |
893 |
|
|
|
894 |
|
|
|
895 |
|
|
# unhinted read size (currently used when reading hits) |
896 |
|
|
# optional, default is 32K |
897 |
|
|
# |
898 |
|
|
# read_unhinted = 32K |
899 |
|
|
|
900 |
|
|
|
901 |
|
|
# max allowed per-batch query count (aka multi-query count) |
902 |
|
|
# optional, default is 32 |
903 |
|
|
max_batch_queries = 32 |
904 |
|
|
|
905 |
|
|
|
906 |
|
|
# max common subtree document cache size, per-query |
907 |
|
|
# optional, default is 0 (disable subtree optimization) |
908 |
|
|
# |
909 |
|
|
# subtree_docs_cache = 4M |
910 |
|
|
|
911 |
|
|
|
912 |
|
|
# max common subtree hit cache size, per-query |
913 |
|
|
# optional, default is 0 (disable subtree optimization) |
914 |
|
|
# |
915 |
|
|
# subtree_hits_cache = 8M |
916 |
|
|
|
917 |
|
|
|
918 |
|
|
# multi-processing mode (MPM) |
919 |
|
|
# known values are none, fork, prefork, and threads |
920 |
|
|
# threads is required for RT backend to work |
921 |
|
|
# optional, default is threads |
922 |
|
|
workers = threads # for RT to work |
923 |
|
|
|
924 |
|
|
|
925 |
|
|
# max threads to create for searching local parts of a distributed index |
926 |
|
|
# optional, default is 0, which means disable multi-threaded searching |
927 |
|
|
# should work with all MPMs (ie. does NOT require workers=threads) |
928 |
|
|
# |
929 |
|
|
# dist_threads = 4 |
930 |
|
|
|
931 |
|
|
|
932 |
|
|
# binlog files path; use empty string to disable binlog |
933 |
|
|
# optional, default is build-time configured data directory |
934 |
|
|
# |
935 |
|
|
# binlog_path = # disable logging |
936 |
|
|
# binlog_path = /var/db/sphinxsearch/data # binlog.001 etc will be created there |
937 |
|
|
|
938 |
|
|
|
939 |
|
|
# binlog flush/sync mode |
940 |
|
|
# 0 means flush and sync every second |
941 |
|
|
# 1 means flush and sync every transaction |
942 |
|
|
# 2 means flush every transaction, sync every second |
943 |
|
|
# optional, default is 2 |
944 |
|
|
# |
945 |
|
|
# binlog_flush = 2 |
946 |
|
|
|
947 |
|
|
|
948 |
|
|
# binlog per-file size limit |
949 |
|
|
# optional, default is 128M, 0 means no limit |
950 |
|
|
# |
951 |
|
|
# binlog_max_log_size = 256M |
952 |
|
|
|
953 |
|
|
|
954 |
|
|
# per-thread stack size, only affects workers=threads mode |
955 |
|
|
# optional, default is 64K |
956 |
|
|
# |
957 |
|
|
# thread_stack = 128K |
958 |
|
|
|
959 |
|
|
|
960 |
|
|
# per-keyword expansion limit (for dict=keywords prefix searches) |
961 |
|
|
# optional, default is 0 (no limit) |
962 |
|
|
# |
963 |
|
|
# expansion_limit = 1000 |
964 |
|
|
|
965 |
|
|
|
966 |
|
|
# RT RAM chunks flush period |
967 |
|
|
# optional, default is 0 (no periodic flush) |
968 |
|
|
# |
969 |
|
|
# rt_flush_period = 900 |
970 |
|
|
|
971 |
|
|
|
972 |
|
|
# query log file format |
973 |
|
|
# optional, known values are plain and sphinxql, default is plain |
974 |
|
|
# |
975 |
|
|
# query_log_format = sphinxql |
976 |
|
|
|
977 |
|
|
|
978 |
|
|
# version string returned to MySQL network protocol clients |
979 |
|
|
# optional, default is empty (use Sphinx version) |
980 |
|
|
# |
981 |
|
|
# mysql_version_string = 5.0.37 |
982 |
|
|
|
983 |
|
|
|
984 |
|
|
# default server-wide collation |
985 |
|
|
# optional, default is libc_ci |
986 |
|
|
# |
987 |
|
|
# collation_server = utf8_general_ci |
988 |
|
|
|
989 |
|
|
|
990 |
|
|
# server-wide locale for libc based collations |
991 |
|
|
# optional, default is C |
992 |
|
|
# |
993 |
|
|
# collation_libc_locale = ru_RU.UTF-8 |
994 |
|
|
|
995 |
|
|
|
996 |
|
|
# threaded server watchdog (only used in workers=threads mode) |
997 |
|
|
# optional, values are 0 and 1, default is 1 (watchdog on) |
998 |
|
|
# |
999 |
|
|
# watchdog = 1 |
1000 |
|
|
|
1001 |
|
|
|
1002 |
|
|
# costs for max_predicted_time model, in (imaginary) nanoseconds |
1003 |
|
|
# optional, default is "doc=64, hit=48, skip=2048, match=64" |
1004 |
|
|
# |
1005 |
|
|
# predicted_time_costs = doc=64, hit=48, skip=2048, match=64 |
1006 |
|
|
|
1007 |
|
|
|
1008 |
|
|
# current SphinxQL state (uservars etc) serialization path |
1009 |
|
|
# optional, default is none (do not serialize SphinxQL state) |
1010 |
|
|
# |
1011 |
|
|
# sphinxql_state = sphinxvars.sql |
1012 |
|
|
|
1013 |
|
|
|
1014 |
|
|
# maximum RT merge thread IO calls per second, and per-call IO size |
1015 |
|
|
# useful for throttling (the background) OPTIMIZE INDEX impact |
1016 |
|
|
# optional, default is 0 (unlimited) |
1017 |
|
|
# |
1018 |
|
|
# rt_merge_iops = 40 |
1019 |
|
|
# rt_merge_maxiosize = 1M |
1020 |
|
|
|
1021 |
|
|
|
1022 |
|
|
# interval between agent mirror pings, in milliseconds |
1023 |
|
|
# 0 means disable pings |
1024 |
|
|
# optional, default is 1000 |
1025 |
|
|
# |
1026 |
|
|
# ha_ping_interval = 0 |
1027 |
|
|
|
1028 |
|
|
|
1029 |
|
|
# agent mirror statistics window size, in seconds |
1030 |
|
|
# stats older than the window size (karma) are retired |
1031 |
|
|
# that is, they will not affect master choice of agents in any way |
1032 |
|
|
# optional, default is 60 seconds |
1033 |
|
|
# |
1034 |
|
|
# ha_period_karma = 60 |
1035 |
|
|
|
1036 |
|
|
|
1037 |
|
|
# delay between preforked children restarts on rotation, in milliseconds |
1038 |
|
|
# optional, default is 0 (no delay) |
1039 |
|
|
# |
1040 |
|
|
# prefork_rotation_throttle = 100 |
1041 |
|
|
|
1042 |
|
|
|
1043 |
|
|
# a prefix to prepend to the local file names when creating snippets |
1044 |
|
|
# with load_files and/or load_files_scatter options |
1045 |
|
|
# optional, default is empty |
1046 |
|
|
# |
1047 |
|
|
# snippets_file_prefix = /mnt/common/server1/ |
1048 |
|
|
} |
1049 |
|
|
|
1050 |
|
|
############################################################################# |
1051 |
|
|
## common settings |
1052 |
|
|
############################################################################# |
1053 |
|
|
|
1054 |
|
|
common |
1055 |
|
|
{ |
1056 |
|
|
|
1057 |
|
|
# lemmatizer dictionaries base path |
1058 |
|
|
# optional, defaut is /usr/local/share (see ./configure --datadir) |
1059 |
|
|
# |
1060 |
|
|
# lemmatizer_base = /usr/local/share/sphinx/dicts |
1061 |
|
|
|
1062 |
|
|
|
1063 |
|
|
# how to handle syntax errors in JSON attributes |
1064 |
|
|
# known values are 'ignore_attr' and 'fail_index' |
1065 |
|
|
# optional, default is 'ignore_attr' |
1066 |
|
|
# |
1067 |
|
|
# on_json_attr_error = fail_index |
1068 |
|
|
|
1069 |
|
|
|
1070 |
|
|
# whether to auto-convert numeric values from strings in JSON attributes |
1071 |
|
|
# with auto-conversion, string value with actually numeric data |
1072 |
|
|
# (as in {"key":"12345"}) gets stored as a number, rather than string |
1073 |
|
|
# optional, allowed values are 0 and 1, default is 0 (do not convert) |
1074 |
|
|
# |
1075 |
|
|
# json_autoconv_numbers = 1 |
1076 |
|
|
|
1077 |
|
|
|
1078 |
|
|
# whether and how to auto-convert key names in JSON attributes |
1079 |
|
|
# known value is 'lowercase' |
1080 |
|
|
# optional, default is unspecified (do nothing) |
1081 |
|
|
# |
1082 |
|
|
# json_autoconv_keynames = lowercase |
1083 |
|
|
|
1084 |
|
|
|
1085 |
|
|
# path to RLP root directory |
1086 |
|
|
# optional, defaut is /usr/local/share (see ./configure --datadir) |
1087 |
|
|
# |
1088 |
|
|
# rlp_root = /usr/local/share/sphinx/rlp |
1089 |
|
|
|
1090 |
|
|
|
1091 |
|
|
# path to RLP environment file |
1092 |
|
|
# optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir) |
1093 |
|
|
# |
1094 |
|
|
# rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml |
1095 |
|
|
|
1096 |
|
|
|
1097 |
|
|
# maximum total size of documents batched before processing them by the RLP |
1098 |
|
|
# optional, default is 51200 |
1099 |
|
|
# |
1100 |
|
|
# rlp_max_batch_size = 100k |
1101 |
|
|
|
1102 |
|
|
|
1103 |
|
|
# maximum number of documents batched before processing them by the RLP |
1104 |
|
|
# optional, default is 50 |
1105 |
|
|
# |
1106 |
|
|
# rlp_max_batch_docs = 100 |
1107 |
|
|
|
1108 |
|
|
|
1109 |
|
|
# trusted plugin directory |
1110 |
|
|
# optional, default is empty (disable UDFs) |
1111 |
|
|
# |
1112 |
|
|
# plugin_dir = /usr/local/sphinx/lib |
1113 |
|
|
|
1114 |
|
|
} |
1115 |
|
|
|
1116 |
|
|
# --eof-- |