From 5244572369e140305c5bcf5f63bed6d878bd5bb7 Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Mon, 7 Oct 2024 22:06:05 +0200 Subject: [PATCH] fix: do not release the gil before returning an object --- packages/vaex-core/src/hash_primitives.hpp | 84 ++++++++++--------- packages/vaex-core/src/hash_string.hpp | 96 ++++++++++++---------- 2 files changed, 96 insertions(+), 84 deletions(-) diff --git a/packages/vaex-core/src/hash_primitives.hpp b/packages/vaex-core/src/hash_primitives.hpp index 85a614a62..b9037ac84 100644 --- a/packages/vaex-core/src/hash_primitives.hpp +++ b/packages/vaex-core/src/hash_primitives.hpp @@ -385,25 +385,27 @@ class counter : public hash_base, U, Hashmap2>, public coun py::object counts() { py::array_t output_array(this->length()); auto output = output_array.template mutable_unchecked<1>(); - py::gil_scoped_release gil; - auto offsets = this->offsets(); size_t map_index = 0; int64_t natural_order = 0; // TODO: can be parallel due to non-overlapping maps - for (auto &map : this->maps) { - for (auto &el : map) { - // key_type key = el.first; - value_type value = el.second; - int64_t index = key_offset(natural_order++, map_index, el, offsets[map_index]); - output(index) = value; + { + py::gil_scoped_release gil; + auto offsets = this->offsets(); + for (auto &map : this->maps) { + for (auto &el : map) { + // key_type key = el.first; + value_type value = el.second; + int64_t index = key_offset(natural_order++, map_index, el, offsets[map_index]); + output(index) = value; + } + map_index += 1; + } + if (this->nan_count) { + output(this->nan_index()) = this->nan_count; + } + if (this->null_count) { + output(this->null_index()) = this->null_count; } - map_index += 1; - } - if (this->nan_count) { - output(this->nan_index()) = this->nan_count; - } - if (this->null_count) { - output(this->null_index()) = this->null_count; } return output_array; } @@ -540,20 +542,22 @@ class ordered_set : public hash_base, T2, Hashmap2> { auto input = values.template unchecked<1>(); auto output = result.template mutable_unchecked<1>(); size_t nmaps = this->maps.size(); - py::gil_scoped_release gil; - for (int64_t i = 0; i < size; i++) { - const key_type &value = input(i); - if (custom_isnan(value)) { - output(i) = this->nan_count > 0; - } else { - std::size_t hash = hasher_map_choice()(value); - size_t map_index = (hash % nmaps); - auto search = this->maps[map_index].find(value); - auto end = this->maps[map_index].end(); - if (search == end) { - output(i) = false; + { + py::gil_scoped_release gil; + for (int64_t i = 0; i < size; i++) { + const key_type &value = input(i); + if (custom_isnan(value)) { + output(i) = this->nan_count > 0; } else { - output(i) = true; + std::size_t hash = hasher_map_choice()(value); + size_t map_index = (hash % nmaps); + auto search = this->maps[map_index].find(value); + auto end = this->maps[map_index].end(); + if (search == end) { + output(i) = false; + } else { + output(i) = true; + } } } } @@ -646,7 +650,7 @@ class ordered_set : public hash_base, T2, Hashmap2> { const key_type &value = input[i]; // the caller is responsible for finding masked values if (custom_isnan(value)) { - if(this->null_count > 0) { + if(this->nan_count > 0) { output[i] = this->nan_value; } else { output[i] = -1; @@ -843,7 +847,7 @@ class index_hash : public hash_base, T2, Hashmap2> { for (int64_t i = 0; i < size; i++) { const key_type &key = input(i); if (custom_isnan(key)) { - if(this->nan_count) { + if(this->nan_count > 0) { output(i) = nan_value; if(nan_value == -1) { encountered_unknown = true; @@ -853,7 +857,7 @@ class index_hash : public hash_base, T2, Hashmap2> { encountered_unknown = true; } } else if (input_mask(i) == 1) { - if(this->null_count) { + if(this->null_count > 0) { output(i) = null_value; if(null_value == -1) { encountered_unknown = true; @@ -957,16 +961,18 @@ class index_hash : public hash_base, T2, Hashmap2> { py::array_t indices_array(size_output); auto output = result.template mutable_unchecked<1>(); auto output_indices = indices_array.template mutable_unchecked<1>(); - py::gil_scoped_release gil; - // int64_t offset = 0; - size_t index = 0; + { + py::gil_scoped_release gil; + // int64_t offset = 0; + size_t index = 0; - std::copy(indices.begin(), indices.end(), &output_indices(0)); + std::copy(indices.begin(), indices.end(), &output_indices(0)); - for (auto &el : found) { - std::vector &indices = el.second; - for (int64_t i : indices) { - output(index++) = i; + for (auto &el : found) { + std::vector &indices = el.second; + for (int64_t i : indices) { + output(index++) = i; + } } } return std::make_tuple(indices_array, result); diff --git a/packages/vaex-core/src/hash_string.hpp b/packages/vaex-core/src/hash_string.hpp index 432bfe641..d3b601997 100644 --- a/packages/vaex-core/src/hash_string.hpp +++ b/packages/vaex-core/src/hash_string.hpp @@ -280,25 +280,27 @@ class counter : public hash_base, T, A, V>, public counter_mixin output_array(this->length()); auto output = output_array.template mutable_unchecked<1>(); - py::gil_scoped_release gil; - auto offsets = this->offsets(); - size_t map_index = 0; - int64_t natural_order = 0; - // TODO: can be parallel due to non-overlapping maps - for (auto &map : this->maps) { - for (auto &el : map) { - key_type key = el.first; - value_type value = el.second; - output(key.index) = value; + { + py::gil_scoped_release gil; + auto offsets = this->offsets(); + size_t map_index = 0; + int64_t natural_order = 0; + // TODO: can be parallel due to non-overlapping maps + for (auto &map : this->maps) { + for (auto &el : map) { + key_type key = el.first; + value_type value = el.second; + output(key.index) = value; + } + // map_index += 1; + } + // no nans possible + // if (this->nan_count) { + // output(this->nan_index()) = this->nan_count; + // } + if (this->null_count) { + output(this->null_index()) = this->null_count; } - // map_index += 1; - } - // no nans possible - // if (this->nan_count) { - // output(this->nan_index()) = this->nan_count; - // } - if (this->null_count) { - output(this->null_index()) = this->null_count; } return output_array; } @@ -445,13 +447,28 @@ class ordered_set : public hash_base, T, T, V> { int64_t size = strings->length; py::array_t result(size); auto output = result.template mutable_unchecked<1>(); - py::gil_scoped_release gil; - size_t nmaps = this->maps.size(); - if (strings->has_null()) { - for (int64_t i = 0; i < size; i++) { - if (strings->is_null(i)) { - output(i) = this->null_count > 0; - } else { + { + py::gil_scoped_release gil; + size_t nmaps = this->maps.size(); + if (strings->has_null()) { + for (int64_t i = 0; i < size; i++) { + if (strings->is_null(i)) { + output(i) = this->null_count > 0; + } else { + const string_view &value = strings->view(i); + std::size_t hash = hasher_map_choice()(value); + size_t map_index = (hash % nmaps); + auto search = this->maps[map_index].find(value); + auto end = this->maps[map_index].end(); + if (search == end) { + output(i) = false; + } else { + output(i) = true; + } + } + } + } else { + for (int64_t i = 0; i < size; i++) { const string_view &value = strings->view(i); std::size_t hash = hasher_map_choice()(value); size_t map_index = (hash % nmaps); @@ -464,19 +481,6 @@ class ordered_set : public hash_base, T, T, V> { } } } - } else { - for (int64_t i = 0; i < size; i++) { - const string_view &value = strings->view(i); - std::size_t hash = hasher_map_choice()(value); - size_t map_index = (hash % nmaps); - auto search = this->maps[map_index].find(value); - auto end = this->maps[map_index].end(); - if (search == end) { - output(i) = false; - } else { - output(i) = true; - } - } } return result; } @@ -847,15 +851,17 @@ class index_hash : public hash_base, T, T, V> { py::array_t indices_array(size); auto output = result.template mutable_unchecked<1>(); auto output_indices = indices_array.template mutable_unchecked<1>(); - py::gil_scoped_release gil; - size_t index = 0; + { + py::gil_scoped_release gil; + size_t index = 0; - std::copy(indices.begin(), indices.end(), &output_indices(0)); + std::copy(indices.begin(), indices.end(), &output_indices(0)); - for (auto el : found) { - std::vector &indices = el.second; - for (int64_t i : indices) { - output(index++) = i; + for (auto el : found) { + std::vector &indices = el.second; + for (int64_t i : indices) { + output(index++) = i; + } } } return std::make_tuple(indices_array, result);