From 053b4a0a026ae8fd689d95a8d4f3b1a7b6d6779f Mon Sep 17 00:00:00 2001 From: Alexander Baygeldin Date: Mon, 14 Mar 2022 23:59:58 +0300 Subject: [PATCH] feat: Support multithreaded and multiprocess environments - acquire an index writer on every writing operation - if `exclusive_writer` is set to `true`, acquire it only once - add `transaction` method to group writing operations BREAKING CHANGE: `commit` method is no longer public --- README.md | 35 +++++++-- bin/console | 17 +++-- ext/Rakefile | 2 +- lib/tantiny.rb | 2 + lib/tantiny/errors.rb | 13 +++- lib/tantiny/helpers.rb | 10 +++ lib/tantiny/index.rb | 118 +++++++++++++++++++++++------ sig/tantiny/helpers.rbs | 2 + sig/tantiny/index.rbs | 29 +++++++- spec/spec_helper.rb | 1 + spec/tantiny/helpers_spec.rb | 29 ++++++++ spec/tantiny/index_spec.rb | 140 +++++++++++++++++++++++++++++------ spec/tantiny/query_spec.rb | 12 ++- src/helpers.rs | 10 ++- src/index.rs | 53 +++++++++---- src/lib.rs | 1 - tantiny.gemspec | 1 + 17 files changed, 387 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 1fb49f7..7233cf2 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ Need a fast full-text search for your Ruby script, but Solr and Elasticsearch are an overkill? 😏 -You're in the right place. **Tantiny** is a minimalistic full-text search library for Ruby based on [Tantivy](https://github.com/quickwit-oss/tantivy) (an awesome alternative to Apache Lucene written in Rust). It's great for cases when your task at hand requires a full-text search, but configuring a full-blown distributed search engine would take more time than the task itself. And even if you already use such an engine in your project (which is highly likely, actually), it still might be easier to just use Tantiny instead because unlike Solr and Elasticsearch it doesn't need *anything* to work (no separate server or process or whatever), it's purely embeddable. So, when you find yourself in a situation when using your search engine of choice would be tricky/inconvinient or would require additional setup you can always revert back to a quick and dirty solution that is nontheless flexible and fast. +You're in the right place. **Tantiny** is a minimalistic full-text search library for Ruby based on [Tanti**v**y](https://github.com/quickwit-oss/tantivy) (an awesome alternative to Apache Lucene written in Rust). It's great for cases when your task at hand requires a full-text search, but configuring a full-blown distributed search engine would take more time than the task itself. And even if you already use such an engine in your project (which is highly likely, actually), it still might be easier to just use Tantiny instead because unlike Solr and Elasticsearch it doesn't need *anything* to work (no separate server or process or whatever), it's purely embeddable. So, when you find yourself in a situation when using your search engine of choice would be tricky/inconvinient or would require additional setup you can always revert back to a quick and dirty solution that is nontheless flexible and fast. -Tantiny is not exactly bindings to Tantivy, but it tries to be close. The main philosophy is to provide low-level access to Tantivy's inverted index, but with a nice Ruby-esque API, sensible defaults, and additional functionality sprinkled on top. +Tantiny is not exactly Ruby bindings to Tantivy, but it tries to be close. The main philosophy is to provide low-level access to Tantivy's inverted index, but with a nice Ruby-esque API, sensible defaults, and additional functionality sprinkled on top. Take a look at the most basic example: @@ -20,7 +20,6 @@ index << { id: 1, description: "Hello World!" } index << { id: 2, description: "What's up?" } index << { id: 3, description: "Goodbye World!" } -index.commit index.reload index.search("world") # 1, 3 @@ -91,6 +90,8 @@ rio_bravo = OpenStruct.new( release_date: Date.parse("March 18, 1959") ) +index << rio_bravo + hanabi = { imdb_id: "tt0119250", type: "/crime/Japan", @@ -101,6 +102,8 @@ hanabi = { release_date: Date.parse("December 1, 1998") } +index << hanabi + brother = { imdb_id: "tt0118767", type: "/crime/Russia", @@ -111,8 +114,6 @@ brother = { release_date: Date.parse("December 12, 1997") } -index << rio_bravo -index << hanabi index << brother ``` @@ -129,12 +130,32 @@ You can also delete it if you want: index.delete(rio_bravo.imdb_id) ``` -After that you need to commit the index for the changes to take place: +### Transactions + +If you need to perform multiple writing operations (i.e. more than one) you should always use `transaction`: + +```ruby +index.transaction do + index << rio_bravo + index << hanabi + index << brother +end +``` + +Transactions group changes and [commit](https://docs.rs/tantivy/latest/tantivy/struct.IndexWriter.html#method.commit) them to the index in one go. This is *dramatically* more efficient than performing these changes one by one. In fact, all writing operations (i.e. `<<` and `delete`) are wrapped in a transaction implicitly when you call them outside of a transaction, so calling `<<` 10 times outside of a transaction is the same thing as performing 10 separate transactions. + +### Concurrency and thread-safety + +Tantiny is thread-safe meaning that you can safely share a single instance of the index between threads. You can also spawn separate processes that could write to and read from the same index. However, while reading from the index should be parallel, writing to it is **not**. Whenever you call `transaction` or any other operation that modify the index (i.e. `<<` and `delete`) it will lock the index for the duration of the operation or wait for another process or thread to release the lock. The only exception to this is when there is another process with an index with an exclusive writer running somewhere in which case the methods that modify the index will fail immediately. + +Thus, it's best to have a single writer process and many reader processes if you want to avoid blocking calls. The proper way to do this is to set `exclusive_writer` to `true` when initializing the index: ```ruby -index.commit +index = Tantiny::Index.new("/path/to/index", exclusive_writer: true) {} ``` +This way the [index writer](https://docs.rs/tantivy/latest/tantivy/struct.IndexWriter.html) will only be acquired once which means the memory for it and indexing threads will only be allocated once as well. Otherwise a new index writer is acquired every time you perform a writing operation. + ## Searching Make sure that your index is up-to-date by reloading it first: diff --git a/bin/console b/bin/console index 33ffd07..66d465f 100755 --- a/bin/console +++ b/bin/console @@ -7,9 +7,13 @@ require "pry" require "tantiny" path = File.join(__dir__, "../tmp") -en_stem = Tantiny::Tokenizer.new(:stemmer, language: :en) -index = Tantiny::Index.new path, tokenizer: en_stem do +options = { + tokenizer: Tantiny::Tokenizer.new(:stemmer, language: :en), + exclusive_writer: true, +} + +index = Tantiny::Index.new(path, **options) do id :imdb_id facet :category string :title @@ -49,11 +53,12 @@ brother = { release_date: Date.parse("December 12, 1997") } -index << rio_bravo -index << hanabi -index << brother +index.transaction do + index << rio_bravo + index << hanabi + index << brother +end -index.commit index.reload binding.pry \ No newline at end of file diff --git a/ext/Rakefile b/ext/Rakefile index 7d17656..1312785 100644 --- a/ext/Rakefile +++ b/ext/Rakefile @@ -1,5 +1,5 @@ require "thermite/tasks" -project_dir = File.dirname(File.dirname(__FILE__)) +project_dir = File.dirname(__FILE__, 2) Thermite::Tasks.new(cargo_project_path: project_dir, ruby_project_path: project_dir) task default: %w[thermite:build] diff --git a/lib/tantiny.rb b/lib/tantiny.rb index dd8745d..d3589f2 100644 --- a/lib/tantiny.rb +++ b/lib/tantiny.rb @@ -4,6 +4,8 @@ RubyNext::Language.setup_gem_load_path require "rutie" +require "concurrent" +require "fileutils" require "tantiny/version" require "tantiny/errors" diff --git a/lib/tantiny/errors.rb b/lib/tantiny/errors.rb index 8f9f384..3077712 100644 --- a/lib/tantiny/errors.rb +++ b/lib/tantiny/errors.rb @@ -3,9 +3,18 @@ module Tantiny class TantivyError < StandardError; end - class UnknownField < StandardError + class IndexWriterBusyError < StandardError def initialize - super("Can't find the specified field in the schema.") + msg = "Failed to acquire an index writer. "\ + "Is there an active index with an exclusive writer already?" + + super(msg) + end + end + + class UnexpectedNone < StandardError + def initialize(type) + super("Didn't expect Option<#{type}> to be empty.") end end diff --git a/lib/tantiny/helpers.rb b/lib/tantiny/helpers.rb index e8766ee..1e24268 100644 --- a/lib/tantiny/helpers.rb +++ b/lib/tantiny/helpers.rb @@ -5,5 +5,15 @@ module Helpers def self.timestamp(date) date.to_datetime.iso8601 end + + def self.with_lock(lockfile) + File.open(lockfile, File::CREAT) do |file| + file.flock(File::LOCK_EX) + + yield + + file.flock(File::LOCK_UN) + end + end end end diff --git a/lib/tantiny/index.rb b/lib/tantiny/index.rb index 4a70d06..e673252 100644 --- a/lib/tantiny/index.rb +++ b/lib/tantiny/index.rb @@ -1,23 +1,19 @@ # frozen_string_literal: true -require "fileutils" - module Tantiny class Index - DEFAULT_INDEX_SIZE = 50_000_000 + LOCKFILE = ".tantiny.lock" + DEFAULT_WRITER_MEMORY = 5_000_000 # 5MB DEFAULT_LIMIT = 10 def self.new(path, **options, &block) - index_size = options[:size] || DEFAULT_INDEX_SIZE - default_tokenizer = options[:tokenizer] || Tokenizer.default - FileUtils.mkdir_p(path) + default_tokenizer = options[:tokenizer] || Tokenizer.default schema = Schema.new(default_tokenizer, &block) object = __new( path.to_s, - index_size, schema.default_tokenizer, schema.field_tokenizers.transform_keys(&:to_s), schema.text_fields.map(&:to_s), @@ -28,15 +24,40 @@ def self.new(path, **options, &block) schema.facet_fields.map(&:to_s) ) - object.send(:schema=, schema) + object.send(:initialize, path, schema, **options) object end + def initialize(path, schema, **options) + @path = path + @schema = schema + + @indexer_memory = options[:writer_memory] || DEFAULT_WRITER_MEMORY + @exclusive_writer = options[:exclusive_writer] || false + + @active_transaction = Concurrent::ThreadLocalVar.new(false) + @transaction_semaphore = Mutex.new + + acquire_index_writer if exclusive_writer? + end + attr_reader :schema - def commit - __commit + def transaction + if inside_transaction? + yield + else + synchronize do + open_transaction! + + yield + + close_transaction! + end + end + + nil end def reload @@ -44,19 +65,23 @@ def reload end def <<(document) - __add_document( - resolve(document, schema.id_field).to_s, - slice_document(document, schema.text_fields) { |v| v.to_s }, - slice_document(document, schema.string_fields) { |v| v.to_s }, - slice_document(document, schema.integer_fields) { |v| v.to_i }, - slice_document(document, schema.double_fields) { |v| v.to_f }, - slice_document(document, schema.date_fields) { |v| Helpers.timestamp(v) }, - slice_document(document, schema.facet_fields) { |v| v.to_s } - ) + transaction do + __add_document( + resolve(document, schema.id_field).to_s, + slice_document(document, schema.text_fields) { |v| v.to_s }, + slice_document(document, schema.string_fields) { |v| v.to_s }, + slice_document(document, schema.integer_fields) { |v| v.to_i }, + slice_document(document, schema.double_fields) { |v| v.to_f }, + slice_document(document, schema.date_fields) { |v| Helpers.timestamp(v) }, + slice_document(document, schema.facet_fields) { |v| v.to_s } + ) + end end def delete(id) - __delete_document(id.to_s) + transaction do + __delete_document(id.to_s) + end end def search(query, limit: DEFAULT_LIMIT, **smart_query_options) @@ -83,8 +108,6 @@ def search(query, limit: DEFAULT_LIMIT, **smart_query_options) private - attr_writer :schema - def slice_document(document, fields, &block) fields.inject({}) do |hash, field| hash.tap { |h| h[field.to_s] = resolve(document, field) } @@ -94,5 +117,56 @@ def slice_document(document, fields, &block) def resolve(document, field) document.is_a?(Hash) ? document[field] : document.send(field) end + + def acquire_index_writer + __acquire_index_writer(@indexer_memory) + rescue TantivyError => e + case e.message + when /Failed to acquire Lockfile/ + raise IndexWriterBusyError.new + else + raise + end + end + + def release_index_writer + __release_index_writer + end + + def commit + __commit + end + + def open_transaction! + acquire_index_writer unless exclusive_writer? + + @active_transaction.value = true + end + + def close_transaction! + commit + + release_index_writer unless exclusive_writer? + + @active_transaction.value = false + end + + def inside_transaction? + @active_transaction.value + end + + def exclusive_writer? + @exclusive_writer + end + + def synchronize(&block) + @transaction_semaphore.synchronize do + Helpers.with_lock(lockfile_path, &block) + end + end + + def lockfile_path + @lockfile_path ||= File.join(@path, LOCKFILE) + end end end diff --git a/sig/tantiny/helpers.rbs b/sig/tantiny/helpers.rbs index 04fc825..4d85c60 100644 --- a/sig/tantiny/helpers.rbs +++ b/sig/tantiny/helpers.rbs @@ -2,5 +2,7 @@ module Tantiny module Helpers def self.timestamp: ((Date | DateTime) date) -> String + + def self.with_lock: (String lockfile) { (*untyped) -> void } -> void end end \ No newline at end of file diff --git a/sig/tantiny/index.rbs b/sig/tantiny/index.rbs index 036222b..d1db90f 100644 --- a/sig/tantiny/index.rbs +++ b/sig/tantiny/index.rbs @@ -1,6 +1,7 @@ module Tantiny class Index - DEFAULT_INDEX_SIZE: Integer + LOCKFILE: String + DEFAULT_WRITER_MEMORY: Integer DEFAULT_LIMIT: Integer def self.new: ( @@ -10,7 +11,6 @@ module Tantiny def self.__new: ( String path, - Integer index_size, Tokenizer default_tokenizer, Hash[String, Tokenizer] field_tokenizers, Array[String] text_fields, @@ -21,9 +21,16 @@ module Tantiny Array[String] facet_fields ) -> Index + def initialize: ( + String path, + Schema schema, + **untyped options + ) -> void + attr_reader schema: Schema - def commit: () -> void + def transaction: () { (*untyped) -> void } -> void + def reload: () -> void def <<: (untyped document) -> void def delete: (String id) -> void @@ -62,9 +69,12 @@ module Tantiny def __search: (Query query, Integer limit) -> Array[String] + def __acquire_index_writer: (Integer overall_memory) -> void + def __release_index_writer: () -> void + private - attr_writer schema: Schema + def commit: () -> void def slice_document: ( untyped document, @@ -78,5 +88,16 @@ module Tantiny ) -> Array[String] def resolve: (untyped document, Symbol field) -> untyped + + def synchronize: () { (*untyped) -> void } -> void + def lockfile_path: () -> String + + def exclusive_writer?: () -> bool + def acquire_index_writer: () -> void + def release_index_writer: () -> void + + def open_transaction!: () -> void + def close_transaction!: () -> void + def inside_transaction?: () -> bool end end \ No newline at end of file diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 0780863..d48c6c7 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -4,6 +4,7 @@ SimpleCov.start if ENV["COVERAGE"] require "fileutils" +require "pathname" require "tmpdir" require "pry" diff --git a/spec/tantiny/helpers_spec.rb b/spec/tantiny/helpers_spec.rb index d465a32..8637065 100644 --- a/spec/tantiny/helpers_spec.rb +++ b/spec/tantiny/helpers_spec.rb @@ -6,4 +6,33 @@ expect(subject.timestamp(Date.new(2022))).to eq("2022-01-01T00:00:00+00:00") end end + + describe "::with_lock" do + let!(:lockfile) { Tempfile.new } + + after do + lockfile.delete + end + + it "creates the lockfile if it doesn't exist" do + lockfile_path = lockfile.path + lockfile.delete + + subject.with_lock(lockfile_path) {} + + expect(Pathname.new(lockfile_path)).to exist + end + + it "exclusively locks the lockfile for the duration of block execution" do + collaborator = double("Collaborator") + file = double("File") + + allow(File).to receive(:open).and_yield(file) + expect(file).to receive(:flock).with(File::LOCK_EX).ordered + expect(collaborator).to receive(:hello).ordered + expect(file).to receive(:flock).with(File::LOCK_UN).ordered + + subject.with_lock(lockfile.path) { collaborator.hello } + end + end end diff --git a/spec/tantiny/index_spec.rb b/spec/tantiny/index_spec.rb index c81e499..3d93eeb 100644 --- a/spec/tantiny/index_spec.rb +++ b/spec/tantiny/index_spec.rb @@ -2,10 +2,11 @@ RSpec.describe Tantiny::Index do subject(:index) do - Tantiny::Index.new(tmpdir, tokenizer: tokenizer, &schema_block) + Tantiny::Index.new(tmpdir, **options, &schema_block) end let(:tmpdir) { Dir.mktmpdir } + let(:options) { {tokenizer: tokenizer} } let(:schema_block) { proc {} } let(:tokenizer) { Tantiny::Tokenizer.default } @@ -17,9 +18,12 @@ def documents index.search(index.all_query) end - def commit_and_reload - index.commit - index.reload + describe "panics" do + it "doesn't panic when Option is None" do + expect { + index.__add_document("tmp", {"unkown_field" => "whatever"}, {}, {}, {}, {}, {}) + }.to raise_error(Tantiny::UnexpectedNone) + end end describe "::new" do @@ -46,6 +50,87 @@ def commit_and_reload expect(index.schema).to eq(schema) end + + context "when exclusive_writer is true" do + let(:options) { {exclusive_writer: true} } + + it "doesn't need to acquire an index writer on every change" do + expect(index).not_to receive(:acquire_index_writer) + + index << {id: 1} + index << {id: 2} + end + end + end + + describe ".transaction" do + let(:mutex) { index.instance_variable_get(:@transaction_semaphore) } + + it "synchronizes block execution between threads" do + collaborator_1 = double("Collaborator 1") + collaborator_2 = double("Collaborator 2") + + allow(mutex).to receive(:synchronize) do |&block| + collaborator_1.enter_mutex + block.call + collaborator_1.leave_mutex + end + + expect(collaborator_1).to receive(:enter_mutex).ordered + expect(collaborator_2).to receive(:hello).ordered + expect(collaborator_1).to receive(:leave_mutex).ordered + + index.transaction { collaborator_2.hello } + end + + it "synchronizes block execution between processes" do + collaborator_1 = double("Collaborator 1") + collaborator_2 = double("Collaborator 2") + + allow(Tantiny::Helpers).to receive(:with_lock) do |&block| + collaborator_1.lock + block.call + collaborator_1.unlock + end + + expect(collaborator_1).to receive(:lock).ordered + expect(collaborator_2).to receive(:hello).ordered + expect(collaborator_1).to receive(:unlock).ordered + + index.transaction { collaborator_2.hello } + end + + context "when inside a transaction" do + it "simply executes the block without synchronization" do + collaborator = double("Collaborator") + + expect(mutex).to receive(:synchronize).and_call_original.once + expect(collaborator).to receive(:hello) + + index.transaction do + index.transaction { collaborator.hello } + end + end + end + + context "when another index holds exclusive writer" do + it "raises an error" do + collaborator = double("Collaborator") + + Tantiny::Index.new(tmpdir, exclusive_writer: true, &schema_block) + + expect { + index.transaction { collaborator.hello } + }.to raise_error(Tantiny::IndexWriterBusyError) + end + end + + it "commits the changes" do + index.transaction { index << {id: "hello"} } + index.reload + + expect(index.search(index.all_query)).to contain_exactly("hello") + end end describe ".<<" do @@ -75,7 +160,7 @@ def commit_and_reload it "maps fields according to schema" do index << movie - commit_and_reload + index.reload imdb_id = movie[:imdb_id] @@ -96,7 +181,7 @@ def commit_and_reload it "allows empty fields" do index << movie.slice(:imdb_id, :title) - commit_and_reload + index.reload query = index.term_query(:title, "Hana-bi") @@ -105,33 +190,26 @@ def commit_and_reload it "works with any object" do index << OpenStruct.new(movie) - commit_and_reload + index.reload query = index.term_query(:title, "Hana-bi") expect(index.search(query).first).to eq(movie[:imdb_id]) end - it "raises error for unkown fields" do - expect { - # Currently this is the only way to cause the error. - index.__add_document("tmp", {"unkown_field" => "whatever"}, {}, {}, {}, {}, {}) - }.to raise_error(Tantiny::UnknownField) - end - end + it "wraps itself in a transaction" do + expect(index).to receive(:transaction).and_call_original - describe ".commit" do - it "commits the index" do - index << {id: 1} + index << movie + index.reload - expect { index.commit }.not_to raise_error + expect(index.search(index.all_query)).not_to be_empty end end describe ".reload" do it "reloads the index" do index << {id: 1} - index.commit expect { index.reload }.to change { documents }.from([]).to(%w[1]) end @@ -140,29 +218,43 @@ def commit_and_reload describe ".delete" do it "deletes an already commited document" do index << {id: "kek"} - commit_and_reload + index.reload expect { index.delete("kek") - commit_and_reload + index.reload }.to change { documents }.from(%w[kek]).to([]) end it "deletes uncommited document" do index << {id: "kek"} index.delete("kek") - commit_and_reload + index.reload expect(documents).to be_empty end + + it "wraps itself in a transaction" do + index << {id: "kek"} + + expect(index).to receive(:transaction).and_call_original + + index.delete("kek") + index.reload + + expect(index.search(index.all_query)).to be_empty + end end describe ".search" do let(:schema_block) { proc { text :description } } before do - (1..10).each { |id| index << {id: id, description: "hello"} } - commit_and_reload + index.transaction do + (1..10).each { |id| index << {id: id, description: "hello"} } + end + + index.reload end context "when query is a query object" do diff --git a/spec/tantiny/query_spec.rb b/spec/tantiny/query_spec.rb index c8cf1eb..8ccd7a5 100644 --- a/spec/tantiny/query_spec.rb +++ b/spec/tantiny/query_spec.rb @@ -5,7 +5,7 @@ en_stemmer = Tantiny::Tokenizer.new(:stemmer) @tmpdir = Dir.mktmpdir - @index = Tantiny::Index.new(@tmpdir) do + @index = Tantiny::Index.new(@tmpdir, exclusive_writer: true) do facet :facet string :string text :text @@ -21,13 +21,17 @@ end def add_documents(*docs) - docs.each { |d| @index << d } - @index.commit + @index.transaction do + docs.each { |d| @index << d } + end + @index.reload end def delete_documents(*docs) - docs.each { |d| @index.delete(d) } + @index.transaction do + docs.each { |d| @index.delete(d) } + end end def search(query) diff --git a/src/helpers.rs b/src/helpers.rs index cd30322..3396cb9 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,6 +1,5 @@ use std::collections::HashMap; use rutie::{AnyException, Array, Exception, RString, Hash, Integer, Float, Boolean, Module}; -use tantivy::schema::{Field}; use tantivy::tokenizer::Language; // Macro dependencies: @@ -114,12 +113,15 @@ where } } -impl TryUnwrap for Option { - fn try_unwrap(self) -> Field { +impl TryUnwrap for Option { + fn try_unwrap(self) -> T { if let Some(value) = self { value } else { - VM::raise_ex(AnyException::new("Tantiny::UnknownField", None)); + VM::raise_ex(AnyException::new( + "Tantiny::UnexpectedNone", + Some(&*format!("{}", std::any::type_name::()))) + ); self.unwrap() } diff --git a/src/index.rs b/src/index.rs index 53b4830..d69d58b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -11,9 +11,10 @@ use crate::query::{unwrap_query, RTantinyQuery}; use crate::tokenizer::{unwrap_tokenizer, RTantinyTokenizer}; pub struct TantinyIndex { - pub(crate) index_writer: IndexWriter, - pub(crate) index_reader: IndexReader, pub(crate) schema: Schema, + pub(crate) index: Index, + pub(crate) index_writer: Option, + pub(crate) index_reader: IndexReader, } scaffold!(RTantinyIndex, TantinyIndex, "Index"); @@ -22,6 +23,10 @@ pub(crate) fn unwrap_index(index: &RTantinyIndex) -> &TantinyIndex { index.get_data(&*TANTINY_INDEX_WRAPPER) } +pub(crate) fn unwrap_index_mut(index: &mut RTantinyIndex) -> &mut TantinyIndex { + index.get_data_mut(&*TANTINY_INDEX_WRAPPER) +} + #[rustfmt::skip::macros(methods)] methods!( RTantinyIndex, @@ -29,7 +34,6 @@ methods!( fn new_index( path: RString, - index_size: Integer, default_tokenizer: AnyObject, field_tokenizers: Hash, text_fields: Array, @@ -41,7 +45,6 @@ methods!( ) -> RTantinyIndex { try_unwrap_params!( path: String, - index_size: i64, default_tokenizer: RTantinyTokenizer, field_tokenizers: HashMap, text_fields: Vec, @@ -103,9 +106,7 @@ methods!( tokenizers.register(&field, unwrap_tokenizer(&tokenizer).clone()) } - let mut index_writer = index - .writer(index_size as usize) - .try_unwrap(); + let index_writer = None; let index_reader = index .reader_builder() @@ -114,7 +115,7 @@ methods!( .try_unwrap(); klass().wrap_data( - TantinyIndex { index_writer, index_reader, schema }, + TantinyIndex { index, index_writer, index_reader, schema }, &*TANTINY_INDEX_WRAPPER ) } @@ -138,9 +139,8 @@ methods!( facet_fields: HashMap ); - let internal = unwrap_index(&_itself); - let index_writer = &internal.index_writer; + let index_writer = internal.index_writer.as_ref().try_unwrap(); let schema = &internal.schema; let mut doc = Document::default(); @@ -191,7 +191,7 @@ methods!( try_unwrap_params!(id: String); let internal = unwrap_index(&_itself); - let index_writer = &internal.index_writer; + let index_writer = internal.index_writer.as_ref().unwrap(); let id_field = internal.schema.get_field("id").try_unwrap(); let doc_id = Term::from_field_text(id_field, &id); @@ -201,9 +201,34 @@ methods!( NilClass::new() } + fn acquire_index_writer( + overall_memory: Integer + ) -> NilClass { + try_unwrap_params!(overall_memory: i64); + + let internal = unwrap_index_mut(&mut _itself); + + let mut index_writer = internal.index + .writer(overall_memory as usize) + .try_unwrap(); + + internal.index_writer = Some(index_writer); + + NilClass::new() + } + + fn release_index_writer() -> NilClass { + let internal = unwrap_index_mut(&mut _itself); + + drop(internal.index_writer.as_ref().try_unwrap()); + internal.index_writer = None; + + NilClass::new() + } + fn commit() -> NilClass { - let internal = _itself.get_data_mut(&*TANTINY_INDEX_WRAPPER); - let index_writer = &mut internal.index_writer; + let internal = unwrap_index_mut(&mut _itself); + let index_writer = internal.index_writer.as_mut().try_unwrap(); index_writer.commit().try_unwrap(); @@ -254,6 +279,8 @@ pub(super) fn init() { klass.def_self("__new", new_index); klass.def("__add_document", add_document); klass.def("__delete_document", delete_document); + klass.def("__acquire_index_writer", acquire_index_writer); + klass.def("__release_index_writer", release_index_writer); klass.def("__commit", commit); klass.def("__reload", reload); klass.def("__search", search); diff --git a/src/lib.rs b/src/lib.rs index ebf8b2e..0f50666 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,6 @@ mod helpers; mod index; #[allow(improper_ctypes_definitions)] mod query; - #[allow(improper_ctypes_definitions)] mod tokenizer; diff --git a/tantiny.gemspec b/tantiny.gemspec index f99419a..a7f1b6e 100644 --- a/tantiny.gemspec +++ b/tantiny.gemspec @@ -48,4 +48,5 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency "rutie", "~> 0.0.4" spec.add_runtime_dependency "thermite", "~> 0" spec.add_runtime_dependency "rake", "~> 13.0" + spec.add_runtime_dependency "concurrent-ruby", "~> 1.0" end