From 317cb833f9d15aa24f265386762f3b80615860a8 Mon Sep 17 00:00:00 2001 From: Daria Mayorova Date: Tue, 17 Jan 2023 12:48:13 +0100 Subject: [PATCH] Update thinking_sphinx.yml configuration file: - add batch_size configuration (see below) - remove the environments development, test, preview that are not needed in the container image - group the different settings according to what they configure (thinking sphinx, searchd, etc.) batch_size setting is required for reindexing in SaaS, as with the default value 1000 reindexing with `rake ts:rt:index` fails with the following error: ``` ThinkingSphinx::QueryLengthError: The supplied SphinxQL statement is 10266420 characters long. The maximum allowed length is 8388603. ``` Probably that's because of indexing CMS contents that can be quite long. --- config/docker/thinking_sphinx.yml | 59 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/config/docker/thinking_sphinx.yml b/config/docker/thinking_sphinx.yml index a3337971bd..1b317157c2 100644 --- a/config/docker/thinking_sphinx.yml +++ b/config/docker/thinking_sphinx.yml @@ -1,44 +1,43 @@ -common: &sphinx - configuration_file: <%= ENV.fetch('THINKING_SPHINX_CONFIGURATION_FILE') { Rails.root.join("config/#{Rails.env}.sphinx.conf")} %> +production: + # ---------------------------------- + # Thinking Sphinx configuration + # see https://gist.github.com/pat/a7d73376dd657b4457092efc9e9c418a + # ---------------------------------- + mysql41: 9306 + address: <%= ENV['THINKING_SPHINX_ADDRESS'] || '0.0.0.0' %> + big_document_ids: true + + configuration_file: <%= ENV.fetch('THINKING_SPHINX_CONFIGURATION_FILE') { Rails.root.join("config/#{Rails.env}.sphinx.conf")} %> + + # Batch size for real-time index processing (via the `ts:index` and `ts:rebuild` tasks), the default is 1000 + batch_size: <%= ENV.fetch('THINKING_SPHINX_BATCH_SIZE', 1000) %> + + # ---------------------------------- + # Sphinx index configuration, see http://sphinxsearch.com/docs/current.html#confgroup-index + # ---------------------------------- + charset_table: 0..9, A..Z->a..z, a..z # strip _ from words + html_strip: 1 html_remove_elements: "style, script" html_index_attrs: "img=alt,title; a=title" min_infix_len: 3 - pid_file: <%= ENV.fetch('THINKING_SPHINX_PID_FILE') { Rails.root.join('log', "searchd.#{Rails.env}.pid") } %> + + # ---------------------------------- + # Data source configuration, see http://sphinxsearch.com/docs/current.html#confgroup-source + # ---------------------------------- # This is from TS FAQ - makes reindex MUCH faster. sql_range_step: 2000000000 - charset_table: 0..9, A..Z->a..z, a..z # strip _ from words + # ---------------------------------- + # Sphinx daemon (searchd) configuration, see http://sphinxsearch.com/docs/current.html#confgroup-searchd + # ---------------------------------- + + # Use standard output for logs in container environments query_log: /dev/stdout log: /dev/stdout -development: - <<: *sphinx - -test: &test - <<: *sphinx -<% case ENV['DATABASE_URL'].to_s - when /^oracle/ %> -sql_port: 1521 - <% when /^postgresql/ %> -sql_port: 5432 - <% else %> -mysql41: <%= 9313 + ENV['TEST_ENV_NUMBER'].to_i %> - <% end %> -configuration_file: <%= Rails.root.join('config', "test#{ENV['TEST_ENV_NUMBER']}.sphinx.conf") %> -indices_location: <%= Rails.root.join("db/sphinx/test#{ENV['TEST_ENV_NUMBER']}") %> -pid_file: <%= Rails.root.join("log/searchd.test#{ENV['TEST_ENV_NUMBER']}.pid") %> -hard_retry_count: 5 -binlog_path: '' - -preview: - <<: *sphinx - binlog_path: '' - address: <%= ENV['THINKING_SPHINX_ADDRESS'] || '0.0.0.0' %> + pid_file: <%= ENV.fetch('THINKING_SPHINX_PID_FILE') { Rails.root.join('log', "searchd.#{Rails.env}.pid") } %> -production: - <<: *sphinx binlog_path: '' - address: <%= ENV['THINKING_SPHINX_ADDRESS'] || '0.0.0.0' %>