Skip to content

Commit

Permalink
use miniselect
Browse files Browse the repository at this point in the history
Для запроса 
%%
pragma UseBlocks;

SELECT
        RemoteIP as r
        FROM
            `yq-clickbench-local`.`hits_*.parquet`
        WITH
        (
                    format=parquet,
                    SCHEMA
                    (
                            RemoteIP INTEGER NOT NULL,
                    )
                )
      order by r limit 5
                ;
%%

время уменьшилось с 0.47c до 0,34с
  • Loading branch information
vitstn committed Feb 9, 2023
1 parent abc4875 commit 698b400
Show file tree
Hide file tree
Showing 23 changed files with 559 additions and 23 deletions.
1 change: 1 addition & 0 deletions contrib/libs/CMakeLists.darwin.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ add_subdirectory(lua)
add_subdirectory(lz4)
add_subdirectory(lzma)
add_subdirectory(lzmasdk)
add_subdirectory(miniselect)
add_subdirectory(nayuki_md5)
add_subdirectory(nghttp2)
add_subdirectory(pcre)
Expand Down
1 change: 1 addition & 0 deletions contrib/libs/CMakeLists.linux-aarch64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ add_subdirectory(lua)
add_subdirectory(lz4)
add_subdirectory(lzma)
add_subdirectory(lzmasdk)
add_subdirectory(miniselect)
add_subdirectory(nayuki_md5)
add_subdirectory(nghttp2)
add_subdirectory(pcre)
Expand Down
1 change: 1 addition & 0 deletions contrib/libs/CMakeLists.linux.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ add_subdirectory(lua)
add_subdirectory(lz4)
add_subdirectory(lzma)
add_subdirectory(lzmasdk)
add_subdirectory(miniselect)
add_subdirectory(nayuki_md5)
add_subdirectory(nghttp2)
add_subdirectory(pcre)
Expand Down
2 changes: 2 additions & 0 deletions contrib/libs/miniselect/AUTHORS
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# List of authors for copyright purposes, in no particular order
Danila Kutenin
14 changes: 14 additions & 0 deletions contrib/libs/miniselect/CMakeLists.darwin.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# This file was generated by the build system used internally in the Yandex monorepo.
# Only simple modifications are allowed (adding source-files to targets, adding simple properties
# like target_include_directories). These modifications will be ported to original
# ya.make files by maintainers. Any complex modifications which can't be ported back to the
# original buildsystem will not be accepted.



add_library(contrib-libs-miniselect INTERFACE)
target_link_libraries(contrib-libs-miniselect INTERFACE
contrib-libs-cxxsupp
yutil
)
15 changes: 15 additions & 0 deletions contrib/libs/miniselect/CMakeLists.linux-aarch64.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

# This file was generated by the build system used internally in the Yandex monorepo.
# Only simple modifications are allowed (adding source-files to targets, adding simple properties
# like target_include_directories). These modifications will be ported to original
# ya.make files by maintainers. Any complex modifications which can't be ported back to the
# original buildsystem will not be accepted.



add_library(contrib-libs-miniselect INTERFACE)
target_link_libraries(contrib-libs-miniselect INTERFACE
contrib-libs-linux-headers
contrib-libs-cxxsupp
yutil
)
15 changes: 15 additions & 0 deletions contrib/libs/miniselect/CMakeLists.linux.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

# This file was generated by the build system used internally in the Yandex monorepo.
# Only simple modifications are allowed (adding source-files to targets, adding simple properties
# like target_include_directories). These modifications will be ported to original
# ya.make files by maintainers. Any complex modifications which can't be ported back to the
# original buildsystem will not be accepted.



add_library(contrib-libs-miniselect INTERFACE)
target_link_libraries(contrib-libs-miniselect INTERFACE
contrib-libs-linux-headers
contrib-libs-cxxsupp
yutil
)
15 changes: 15 additions & 0 deletions contrib/libs/miniselect/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

# This file was generated by the build system used internally in the Yandex monorepo.
# Only simple modifications are allowed (adding source-files to targets, adding simple properties
# like target_include_directories). These modifications will be ported to original
# ya.make files by maintainers. Any complex modifications which can't be ported back to the
# original buildsystem will not be accepted.


if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND UNIX AND NOT APPLE AND NOT ANDROID)
include(CMakeLists.linux-aarch64.txt)
elseif (APPLE AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
include(CMakeLists.darwin.txt)
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND UNIX AND NOT APPLE AND NOT ANDROID)
include(CMakeLists.linux.txt)
endif()
23 changes: 23 additions & 0 deletions contrib/libs/miniselect/LICENSE_1_0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Boost Software License - Version 1.0 - August 17th, 2003

Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:

The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
274 changes: 274 additions & 0 deletions contrib/libs/miniselect/README.md

Large diffs are not rendered by default.

129 changes: 129 additions & 0 deletions contrib/libs/miniselect/include/miniselect/floyd_rivest_select.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/* Copyright Danila Kutenin, 2020-.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* https://boost.org/LICENSE_1_0.txt)
*/
#pragma once

#include <algorithm>
#include <cmath>
#include <cstddef>
#include <functional>
#include <iterator>
#include <type_traits>
#include <utility>

namespace miniselect {
namespace floyd_rivest_detail {

enum floyd_rivest_constants {
kQCap = 600,
};

template <class Compare>
struct CompareRefType {
// Pass the comparator by lvalue reference. Or in debug mode, using a
// debugging wrapper that stores a reference.
using type = typename std::add_lvalue_reference<Compare>::type;
};

template <class Iter, class Compare,
class DiffType = typename std::iterator_traits<Iter>::difference_type>
inline void floyd_rivest_select_loop(Iter begin, DiffType left, DiffType right,
DiffType k, Compare comp) {
while (right > left) {
DiffType size = right - left;
if (size > floyd_rivest_constants::kQCap) {
DiffType n = right - left + 1;
DiffType i = k - left + 1;

double z = log(n);
double s = 0.5 * exp(2 * z / 3);
double sd = 0.5 * sqrt(z * s * (n - s) / n);
if (i < n / 2) {
sd *= -1.0;
}
DiffType new_left =
std::max(left, static_cast<DiffType>(k - i * s / n + sd));
DiffType new_right =
std::min(right, static_cast<DiffType>(k + (n - i) * s / n + sd));
floyd_rivest_select_loop<Iter, Compare, DiffType>(begin, new_left,
new_right, k, comp);
}
DiffType i = left;
DiffType j = right;

std::swap(begin[left], begin[k]);
const bool to_swap = comp(begin[left], begin[right]);
if (to_swap) {
std::swap(begin[left], begin[right]);
}
// Make sure that non copyable types compile.
const auto& t = to_swap ? begin[left] : begin[right];
while (i < j) {
std::swap(begin[i], begin[j]);
i++;
j--;
while (comp(begin[i], t)) {
i++;
}
while (comp(t, begin[j])) {
j--;
}
}

if (to_swap) {
std::swap(begin[left], begin[j]);
} else {
j++;
std::swap(begin[right], begin[j]);
}

if (j <= k) {
left = j + 1;
}
if (k <= j) {
right = j - 1;
}
}
}

} // namespace floyd_rivest_detail

template <class Iter, class Compare>
inline void floyd_rivest_partial_sort(Iter begin, Iter mid, Iter end,
Compare comp) {
if (begin == end) return;
if (begin == mid) return;
using CompType = typename floyd_rivest_detail::CompareRefType<Compare>::type;
using DiffType = typename std::iterator_traits<Iter>::difference_type;
floyd_rivest_detail::floyd_rivest_select_loop<Iter, CompType>(
begin, DiffType{0}, static_cast<DiffType>(end - begin - 1),
static_cast<DiffType>(mid - begin - 1), comp);
// std::sort proved to be better than other sorts because of pivoting.
std::sort<Iter, CompType>(begin, mid, comp);
}

template <class Iter>
inline void floyd_rivest_partial_sort(Iter begin, Iter mid, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
floyd_rivest_partial_sort(begin, mid, end, std::less<T>());
}

template <class Iter, class Compare>
inline void floyd_rivest_select(Iter begin, Iter mid, Iter end, Compare comp) {
if (mid == end) return;
using CompType = typename floyd_rivest_detail::CompareRefType<Compare>::type;
using DiffType = typename std::iterator_traits<Iter>::difference_type;
floyd_rivest_detail::floyd_rivest_select_loop<Iter, CompType>(
begin, DiffType{0}, static_cast<DiffType>(end - begin - 1),
static_cast<DiffType>(mid - begin), comp);
}

template <class Iter>
inline void floyd_rivest_select(Iter begin, Iter mid, Iter end) {
typedef typename std::iterator_traits<Iter>::value_type T;
floyd_rivest_select(begin, mid, end, std::less<T>());
}

} // namespace miniselect
8 changes: 5 additions & 3 deletions ydb/library/yql/minikql/comp_nodes/mkql_block_top.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <ydb/library/yql/minikql/mkql_node_builder.h>
#include <ydb/library/yql/minikql/mkql_node_cast.h>

#include <ydb/library/yql/utils/sort.h>

namespace NKikimr {
namespace NMiniKQL {

Expand Down Expand Up @@ -87,7 +89,7 @@ class TTopBlocksWrapper : public TStatefulWideFlowBlockComputationNode<TTopBlock
}

TBlockLess cmp(KeyIndicies_, s, s.Values_);
std::nth_element(blockIndicies->begin(), blockIndicies->begin() + s.Count_, blockIndicies->end(), cmp);
NYql::FastNthElement(blockIndicies->begin(), blockIndicies->begin() + s.Count_, blockIndicies->end(), cmp);
}

// copy all to builders
Expand Down Expand Up @@ -222,9 +224,9 @@ class TTopBlocksWrapper : public TStatefulWideFlowBlockComputationNode<TTopBlock
}
} else {
if (sort) {
std::partial_sort(blockIndicies.begin(), blockIndicies.begin() + blockLen, blockIndicies.end(), cmp);
NYql::FastPartialSort(blockIndicies.begin(), blockIndicies.begin() + blockLen, blockIndicies.end(), cmp);
} else {
std::nth_element(blockIndicies.begin(), blockIndicies.begin() + blockLen, blockIndicies.end(), cmp);
NYql::FastNthElement(blockIndicies.begin(), blockIndicies.begin() + blockLen, blockIndicies.end(), cmp);
}
}

Expand Down
6 changes: 4 additions & 2 deletions ydb/library/yql/minikql/comp_nodes/mkql_heap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <ydb/library/yql/minikql/mkql_node_cast.h>
#include <ydb/library/yql/minikql/mkql_node_builder.h>

#include <ydb/library/yql/utils/sort.h>

namespace NKikimr {
namespace NMiniKQL {

Expand Down Expand Up @@ -395,11 +397,11 @@ IComputationNode* WrapStableSort(TCallable& callable, const TComputationNodeFact
}

IComputationNode* WrapNthElement(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
return WrapNth(&std::nth_element<NUdf::TUnboxedValuePod*, TComparator>, callable, ctx);
return WrapNth(&NYql::FastNthElement<NUdf::TUnboxedValuePod*, TComparator>, callable, ctx);
}

IComputationNode* WrapPartialSort(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
return WrapNth(&std::partial_sort<NUdf::TUnboxedValuePod*, TComparator>, callable, ctx);
return WrapNth(&NYql::FastPartialSort<NUdf::TUnboxedValuePod*, TComparator>, callable, ctx);
}
}
}
8 changes: 5 additions & 3 deletions ydb/library/yql/minikql/comp_nodes/mkql_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <ydb/library/yql/minikql/mkql_string_util.h>
#include <ydb/library/yql/minikql/mkql_type_builder.h>

#include <ydb/library/yql/utils/sort.h>

#include <algorithm>
#include <iterator>

Expand Down Expand Up @@ -601,7 +603,7 @@ class TKeepTopWrapper : public TMutableComputationNode<TKeepTopWrapper> {
});

Description.Prepare(ctx, items);
std::nth_element(items.begin(), items.begin() + count - 1U, items.end(), Description.MakeComparator<TKeyPayloadPairVector>(ascending));
NYql::FastNthElement(items.begin(), items.begin() + count - 1U, items.end(), Description.MakeComparator<TKeyPayloadPairVector>(ascending));
items.resize(count);

NUdf::TUnboxedValue *inplace = nullptr;
Expand Down Expand Up @@ -750,11 +752,11 @@ IComputationNode* WrapSort(TCallable& callable, const TComputationNodeFactoryCon
}

IComputationNode* WrapTop(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
return WrapNthAlgo(&std::nth_element<TKeyPayloadPairVector::iterator, TComparator>, callable, ctx);
return WrapNthAlgo(&NYql::FastNthElement<TKeyPayloadPairVector::iterator, TComparator>, callable, ctx);
}

IComputationNode* WrapTopSort(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
return WrapNthAlgo(&std::partial_sort<TKeyPayloadPairVector::iterator, TComparator>, callable, ctx);
return WrapNthAlgo(&NYql::FastPartialSort<TKeyPayloadPairVector::iterator, TComparator>, callable, ctx);
}

IComputationNode* WrapKeepTop(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
Expand Down
6 changes: 4 additions & 2 deletions ydb/library/yql/minikql/comp_nodes/mkql_wide_top_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <ydb/library/yql/minikql/defs.h>
#include <ydb/library/yql/utils/cast.h>

#include <ydb/library/yql/utils/sort.h>

namespace NKikimr {
namespace NMiniKQL {

Expand Down Expand Up @@ -71,7 +73,7 @@ using TLLVMBase = TLLVMFieldsStructure<TComputationValue<TState>>;
if (Full.size() + 1U == GetStorageSize()) {
Free.pop_back();

std::nth_element(Full.begin(), Full.begin() + Count, Full.end(), LessFunc);
NYql::FastNthElement(Full.begin(), Full.begin() + Count, Full.end(), LessFunc);
std::copy(Full.cbegin() + Count, Full.cend(), std::back_inserter(Free));
Full.resize(Count);

Expand Down Expand Up @@ -102,7 +104,7 @@ using TLLVMBase = TLLVMFieldsStructure<TComputationValue<TState>>;
Free.shrink_to_fit();

if (Full.size() > Count) {
std::nth_element(Full.begin(), Full.begin() + Count, Full.end(), LessFunc);
NYql::FastNthElement(Full.begin(), Full.begin() + Count, Full.end(), LessFunc);
Full.resize(Count);
}

Expand Down
Loading

0 comments on commit 698b400

Please sign in to comment.