-
Notifications
You must be signed in to change notification settings - Fork 0
Trieっぽい何か #2
Comments
#include <unordered_map>
#include <memory>
#include <array>
class EmojiData
{
private:
struct trie{
std::unique_ptr<std::unordered_map<std::uint32_t, trie>> nexts;
std::size_t length;
};
template<typename T, std::size_t N>
static constexpr std::size_t size(T (&)[N])noexcept{return N;}
template<typename T, std::size_t N>
static constexpr std::size_t length(const std::array<T, N>& t)noexcept{
auto n = N-1;
while(n && t[n] == 0)
--n;
return n+1;
}
trie emojiCodePoints =
[]{
trie tri = {std::make_unique<std::unordered_map<std::uint32_t, trie>>(), 0};
tri.nexts->reserve(1123+1); //激アドホック最適化 データセットが変わったらここは変えないといけない
constexpr std::array<std::uint32_t, 7> arr[] = {
#include "EmojiCodePoints.txt"
};
trie* ptr;
for(std::vector<std::uint32_t>::size_type j = 0; j != size(arr); ++j){
ptr = &tri;
const auto len = length(arr[j]);
for(std::vector<std::uint32_t>::size_type i = 0; i != len; ++i){
if(!ptr->nexts)
ptr->nexts = std::make_unique<std::unordered_map<std::uint32_t, trie>>();
ptr = &(*ptr->nexts)[arr[j][i]];
}
ptr->length = len;
}
return std::move(tri);
}();
public:
EmojiData() = default;
size_t check(std::vector<std::uint32_t>::const_iterator beg, std::vector<std::uint32_t>::const_iterator end)const
{
const trie* ptr = &emojiCodePoints;
while(true){
if(beg == end)
break;
auto it = ptr->nexts->find(*beg++);
if(it == ptr->nexts->end())
break;
ptr = &it->second;
if(!ptr->nexts)
break;
}
return ptr->length;
}
};
|
メモリについては, 以下の実装が間を取ってメモリ使用量を728KB(+320KB)に抑えたものになります とりあえず速度優先とメモリ優先で2案ということで,どうでしょう. #include <unordered_map>
#include <memory>
#include <array>
class EmojiData
{
private:
struct trie{
std::unique_ptr<std::unordered_map<std::uint32_t, trie>> nexts;
std::size_t length;
};
template<typename T, std::size_t N>
static constexpr std::size_t size(T (&)[N])noexcept{return N;}
template<typename T, std::size_t N>
static constexpr std::size_t length(const std::array<T, N>& t)noexcept{
auto n = N-1;
while(n && t[n] == 0)
--n;
return n+1;
}
trie trieHead;
std::array<std::uint32_t, 1053> emojiCodePointLength1;
public:
EmojiData():trieHead{std::make_unique<std::unordered_map<std::uint32_t, trie>>(), 0}{
trieHead.nexts->reserve(123+1);
constexpr std::array<std::uint32_t, 7> arr[] = {
#include "EmojiCodePoints.txt"
};
auto length1 = emojiCodePointLength1.begin();
trie* ptr;
for(std::vector<std::uint32_t>::size_type j = 0; j != size(arr); ++j){
const auto len = length(arr[j]);
if(len == 1){
*length1++ = arr[j][0];
continue;
}
ptr = &trieHead;
for(std::vector<std::uint32_t>::size_type i = 0; i != len; ++i){
if(!ptr->nexts)
ptr->nexts = std::make_unique<std::unordered_map<std::uint32_t, trie>>();
ptr = &(*ptr->nexts)[arr[j][i]];
}
ptr->length = len;
}
std::sort(emojiCodePointLength1.begin(), emojiCodePointLength1.end());
for(auto&& x : emojiCodePointLength1){
auto it = trieHead.nexts->find(x);
if(it != trieHead.nexts->end())
it->second.length = 1;
}
}
size_t check(std::vector<std::uint32_t>::const_iterator beg, std::vector<std::uint32_t>::const_iterator end)const
{
const trie* ptr = &trieHead;
if(beg == end)
return 0;
auto it = ptr->nexts->find(*beg);
if(it == ptr->nexts->end())
return std::binary_search(emojiCodePointLength1.begin(), emojiCodePointLength1.end(), *beg) ? 1 : 0;
ptr = &it->second;
if(!ptr->nexts)
return ptr->length;
++beg;
while(beg != end){
auto it = ptr->nexts->find(*beg);
if(it == ptr->nexts->end())
break;
ptr = &it->second;
if(!ptr->nexts)
break;
++beg;
}
return ptr->length;
}
}; |
複数の手法の検討と実測値の調査をありがとうございます。 やはり #1 のコメントにもあるように、性能を限界まで追求すると |
マジックナンバーは基本的に
という感じです. |
というわけでコンパイル時にほぼすべてのマジックナンバーになってた部分を導出してみました どちらのコードも残るマジックナンバーは「 #include <unordered_map>
#include <memory>
#include <array>
class EmojiDataHelper{
friend class EmojiData;
static constexpr std::size_t LongestCodePointLength = 7;
template<typename T, std::size_t N>
static constexpr std::size_t size(T (&)[N])noexcept{return N;}
template<typename T, std::size_t N>
static constexpr std::size_t length(const std::array<T, N>& t)noexcept{
auto n = N-1;
while(n && t[n] == 0)
--n;
return n+1;
}
static constexpr std::size_t codePointsNum()noexcept{
constexpr std::array<std::uint32_t, LongestCodePointLength> arr[] = {
//#include "EmojiCodePoints.txt"
#include "EmojiCodePoints_sortedByValue.txt"
};
return size(arr);
}
template<std::size_t N = codePointsNum()>
static constexpr std::array<std::array<std::uint32_t, LongestCodePointLength>, N> emojiCodePoints(){
return{{
//#include "EmojiCodePoints.txt"
#include "EmojiCodePoints_sortedByValue.txt"
}};
}
static constexpr std::size_t trieHeadSize(){
constexpr auto arr = emojiCodePoints();
std::uint32_t prev = 0;
std::uint32_t count = 1;
for(std::size_t i = 0; i != arr.size()-1/*for last comma*/; ++i){
const auto len = length(arr[i]);
if(prev == 0){
prev = arr[i][0];
continue;
}
if(prev != arr[i][0]){
prev = arr[i][0];
++count;
}
}
return count;
}
};
class EmojiData
{
private:
struct trie{
std::unique_ptr<std::unordered_map<std::uint32_t, trie>> nexts;
std::size_t length;
};
template<typename T, std::size_t N>
static constexpr std::size_t size(T (&)[N])noexcept{return N;}
template<typename T, std::size_t N>
static constexpr std::size_t length(const std::array<T, N>& t)noexcept{
auto n = N-1;
while(n && t[n] == 0)
--n;
return n+1;
}
trie emojiCodePoints =
[]{
trie tri = {std::make_unique<std::unordered_map<std::uint32_t, trie>>(), 0};
tri.nexts->reserve(EmojiDataHelper::trieHeadSize()+1/*workaround*/);
constexpr auto arr = EmojiDataHelper::emojiCodePoints();
trie* ptr;
for(std::size_t j = 0; j != arr.size()-1/*for last comma*/; ++j){
ptr = &tri;
const auto len = length(arr[j]);
for(std::size_t i = 0; i != len; ++i){
if(!ptr->nexts)
ptr->nexts = std::make_unique<std::unordered_map<std::uint32_t, trie>>();
ptr = &(*ptr->nexts)[arr[j][i]];
}
ptr->length = len;
}
return std::move(tri);
}();
public:
EmojiData() = default;
size_t check(std::vector<std::uint32_t>::const_iterator beg, std::vector<std::uint32_t>::const_iterator end)const
{
const trie* ptr = &emojiCodePoints;
while(beg != end){
auto it = ptr->nexts->find(*beg);
if(it == ptr->nexts->end())
break;
ptr = &it->second;
if(!ptr->nexts)
break;
++beg;
}
return ptr->length;
}
}; class EmojiDataHelper{
friend class EmojiData;
static constexpr std::size_t LongestCodePointLength = 7;
template<typename T, std::size_t N>
static constexpr std::size_t size(T (&)[N])noexcept{return N;}
template<typename T, std::size_t N>
static constexpr std::size_t length(const std::array<T, N>& t)noexcept{
auto n = N-1;
while(n && t[n] == 0)
--n;
return n+1;
}
static constexpr std::size_t codePointsNum()noexcept{
constexpr std::array<std::uint32_t, LongestCodePointLength> arr[] = {
//#include "EmojiCodePoints.txt"
#include "EmojiCodePoints_sortedByValue.txt"
};
return size(arr);
}
template<std::size_t N = codePointsNum()>
static constexpr std::array<std::array<std::uint32_t, LongestCodePointLength>, N> emojiCodePoints(){
return{{
//#include "EmojiCodePoints.txt"
#include "EmojiCodePoints_sortedByValue.txt"
}};
}
static constexpr std::size_t trieHeadSize(){
constexpr auto arr = emojiCodePoints();
std::uint32_t prev = 0;
std::uint32_t count = 1;
for(std::size_t i = 0; i != arr.size()-1/*for last comma*/; ++i){
const auto len = length(arr[i]);
if(len == 1)
continue;
if(prev == 0){
prev = arr[i][0];
continue;
}
if(prev != arr[i][0]){
prev = arr[i][0];
++count;
}
}
return count;
}
static constexpr std::size_t length1Size(){
constexpr auto arr = emojiCodePoints();
std::uint32_t count = 0;
for(std::size_t i = 0; i != arr.size()-1/*for last comma*/; ++i)
if(length(arr[i]) == 1)
++count;
return count;
}
};
class EmojiData
{
private:
struct trie{
std::unique_ptr<std::unordered_map<std::uint32_t, trie>> nexts;
std::size_t length;
};
trie trieHead;
std::array<std::uint32_t, EmojiDataHelper::length1Size()> emojiCodePointLength1;
public:
EmojiData():trieHead{std::make_unique<std::unordered_map<std::uint32_t, trie>>(), 0}{
static constexpr auto trieHeadSize = EmojiDataHelper::trieHeadSize();
trieHead.nexts->reserve(trieHeadSize+1/*workaround*/);
constexpr auto arr = EmojiDataHelper::emojiCodePoints();
auto length1 = emojiCodePointLength1.begin();
trie* ptr;
for(std::size_t j = 0; j != arr.size()-1/*for last comma*/; ++j){
const auto len = EmojiDataHelper::length(arr[j]);
if(len == 1){
*length1++ = arr[j][0];
continue;
}
ptr = &trieHead;
for(std::size_t i = 0; i != len; ++i){
if(!ptr->nexts)
ptr->nexts = std::make_unique<std::unordered_map<std::uint32_t, trie>>();
ptr = &(*ptr->nexts)[arr[j][i]];
}
ptr->length = len;
}
//std::sort(emojiCodePointLength1.begin(), emojiCodePointLength1.end());
for(auto&& x : emojiCodePointLength1){
auto it = trieHead.nexts->find(x);
if(it != trieHead.nexts->end())
it->second.length = 1;
}
}
size_t check(std::vector<std::uint32_t>::const_iterator beg, std::vector<std::uint32_t>::const_iterator end)const
{
const trie* ptr = &trieHead;
if(beg == end)
return 0;
auto it = ptr->nexts->find(*beg);
if(it == ptr->nexts->end())
return std::binary_search(emojiCodePointLength1.begin(), emojiCodePointLength1.end(), *beg) ? 1 : 0;
ptr = &it->second;
if(!ptr->nexts)
return ptr->length;
++beg;
while(beg != end){
auto it = ptr->nexts->find(*beg);
if(it == ptr->nexts->end())
break;
ptr = &it->second;
if(!ptr->nexts)
break;
++beg;
}
return ptr->length;
}
}; |
現状とりあえず仮組みしただけって感じなので変数名とかがメチャクチャ適当ですが…
unique_ptr
とunorderd_map
を組み合わせてるので断片化が激しそう…std::vector<std::vector<std::uint32_t>>
を要素数で降順ソートかけたやつを上から総なめする)に比べると20倍ぐらい速い.初期化は3倍ぐらい遅いけど,使い方によっては誤差かなと思います.The text was updated successfully, but these errors were encountered: