From b9c334a4c1e7056b3b5c2c85d981728653110e35 Mon Sep 17 00:00:00 2001 From: "ataymano@microsoft.com" Date: Tue, 4 Jun 2019 13:15:09 -0400 Subject: [PATCH] vw.h example construction helpers first version --- test/unit_test/feature_space_test.cc | 57 +++++++++++++++++++++++ test/unit_test/unit_test.vcxproj | 1 + test/unit_test/unit_test.vcxproj.filters | 3 ++ test/unit_test/vwdll_test.cc | 24 +++++----- vowpalwabbit/feature_space.cc | 58 ++++++++++++++++++++++++ vowpalwabbit/parser.cc | 28 +++++------- vowpalwabbit/vw.h | 52 ++++++++++++++++++--- vowpalwabbit/vw_core.vcxproj | 6 +-- vowpalwabbit/vwdll.cpp | 47 ++++++++++--------- vowpalwabbit/vwdll.h | 16 ++++--- 10 files changed, 224 insertions(+), 68 deletions(-) create mode 100644 test/unit_test/feature_space_test.cc create mode 100644 vowpalwabbit/feature_space.cc diff --git a/test/unit_test/feature_space_test.cc b/test/unit_test/feature_space_test.cc new file mode 100644 index 00000000000..d4889bb198c --- /dev/null +++ b/test/unit_test/feature_space_test.cc @@ -0,0 +1,57 @@ +#ifndef STATIC_LINK_VW +#define BOOST_TEST_DYN_LINK +#endif + +#include + +#include "vw.h" + +template +void check_weights_equal(T& first, T& second) +{ + auto secondIt = second.begin(); + for (auto firstIt : first) + { + BOOST_CHECK_EQUAL(firstIt, *secondIt); + ++secondIt; + } + BOOST_CHECK_EQUAL(secondIt == second.end(), true); +} + +BOOST_AUTO_TEST_CASE(parsed_and_constructed_example_parity) +{ + vw* vw1 = VW::initialize("-q st --noconstant --quiet"); + vw* vw2 = VW::initialize("-q st --noconstant --quiet"); + + auto example_parsed = VW::read_example(*vw1, "1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme"); + auto fs = VW::feature_space(*vw2, 2); + fs[0].reset(3); + fs[0].set_name("s"); + fs[0].set(0, "p^the_man", 1.0f); + fs[0].set(1, "w^the", 1.0f); + fs[0].set(2, "w^man", 1.0f); + + fs[1].reset(3); + fs[1].set_name("t"); + fs[1].set(0, "p^un_homme", 1.0f); + fs[1].set(1, "w^un", 1.0f); + fs[1].set(2, "w^homme", 1.0f); + auto example_constructed = VW::import_example(*vw2, "1", fs); + + vw1->learn(*example_parsed); + vw2->learn(*example_constructed); + + BOOST_CHECK_EQUAL(vw1->weights.sparse, vw2->weights.sparse); + + if (vw1->weights.sparse) + { + check_weights_equal(vw1->weights.sparse_weights, vw2->weights.sparse_weights); + } + else + { + check_weights_equal(vw1->weights.dense_weights, vw2->weights.dense_weights); + } + + VW::finish(*vw1); + VW::finish(*vw2); +} diff --git a/test/unit_test/unit_test.vcxproj b/test/unit_test/unit_test.vcxproj index 9f85e4cb7ca..6a498747098 100644 --- a/test/unit_test/unit_test.vcxproj +++ b/test/unit_test/unit_test.vcxproj @@ -21,6 +21,7 @@ + diff --git a/test/unit_test/unit_test.vcxproj.filters b/test/unit_test/unit_test.vcxproj.filters index d4db85dafa4..3364d45bf24 100644 --- a/test/unit_test/unit_test.vcxproj.filters +++ b/test/unit_test/unit_test.vcxproj.filters @@ -39,6 +39,9 @@ Source Files + + Source Files + diff --git a/test/unit_test/vwdll_test.cc b/test/unit_test/vwdll_test.cc index 3d67aa86c13..21ee5b8eed1 100644 --- a/test/unit_test/vwdll_test.cc +++ b/test/unit_test/vwdll_test.cc @@ -30,23 +30,23 @@ BOOST_AUTO_TEST_CASE(vw_dll_parsed_and_constructed_example_parity) //construct example VW_HANDLE handle2 = VW_InitializeA("-q st --noconstant --quiet"); VW_EXAMPLE example_constructed; - auto fs = VW_InitializeFeatureSpaces(2); + auto fs = VW_InitializeFeatureSpace(handle2, 2); - auto first = VW_GetFeatureSpace(fs, 0); + auto first = VW_GetPrimitiveFeatureSpace(fs, 0); VW_InitFeatures(first, 3); - auto shash = VW_SetFeatureSpace(handle2, first, "s"); - VW_SetFeature(VW_GetFeature(first, 0), VW_HashFeatureA(handle2, "p^the_man", shash), 1.0f); - VW_SetFeature(VW_GetFeature(first, 1), VW_HashFeatureA(handle2, "w^the", shash), 1.0f); - VW_SetFeature(VW_GetFeature(first, 2), VW_HashFeatureA(handle2, "w^man", shash), 1.0f); + VW_SetFeatureSpaceA(first, "s"); + VW_SetFeatureA(first, 0, "p^the_man", 1.0f); + VW_SetFeatureA(first, 1, "w^the", 1.0f); + VW_SetFeatureA(first, 2, "w^man", 1.0f); - auto second = VW_GetFeatureSpace(fs, 1); + auto second = VW_GetPrimitiveFeatureSpace(fs, 1); VW_InitFeatures(second, 3); - auto thash = VW_SetFeatureSpace(handle2, second, "t"); - VW_SetFeature(VW_GetFeature(second, 0), VW_HashFeatureA(handle2, "p^un_homme", thash), 1.0f); - VW_SetFeature(VW_GetFeature(second, 1), VW_HashFeatureA(handle2, "w^un", thash), 1.0f); - VW_SetFeature(VW_GetFeature(second, 2), VW_HashFeatureA(handle2, "w^homme", thash), 1.0f); + VW_SetFeatureSpaceA(second, "t"); + VW_SetFeatureA(second, 0, "p^un_homme", 1.0f); + VW_SetFeatureA(second, 1, "w^un", 1.0f); + VW_SetFeatureA(second, 2, "w^homme", 1.0f); - example_constructed = VW_ImportExample(handle2, "1", fs, 2); + example_constructed = VW_ImportExample(handle2, "1", fs); // learn both diff --git a/vowpalwabbit/feature_space.cc b/vowpalwabbit/feature_space.cc new file mode 100644 index 00000000000..b5505eb3196 --- /dev/null +++ b/vowpalwabbit/feature_space.cc @@ -0,0 +1,58 @@ +#include "global_data.h" +#include "vw.h" + +#include + +namespace VW { + primitive_feature_space::primitive_feature_space(::vw& _all) : all(_all) {} + + primitive_feature_space::primitive_feature_space(::vw& _all, const std::string& _name, size_t _len) + : all(_all), name(_name[0]), hash(hash_space(all, _name)), fs(_len) + { + } + + primitive_feature_space::primitive_feature_space( + ::vw& _all, const std::string& _name, std::initializer_list features) + : primitive_feature_space(_all, _name, features.size()) + { + size_t i = 0; + for (const auto& f : features) { + fs[i] = f; + } + } + + primitive_feature_space::primitive_feature_space( + ::vw& _all, const std::string& _name, std::initializer_list> features) + : primitive_feature_space(_all, _name, features.size()) + { + size_t i = 0; + for (const auto& f : features) + { + set(i, std::get<0>(f), std::get<1>(f)); + } + } + + void primitive_feature_space::reset(size_t _len) { fs.resize(_len); } + + const feature& primitive_feature_space::operator[](size_t index) const { return fs[index]; } + feature& primitive_feature_space::operator[](size_t index) { return fs[index]; } + + void primitive_feature_space::set(size_t index, const std::string& feature_name, float value) + { + fs[index].weight_index = hash_feature(all, feature_name, hash); + fs[index].x = value; + } + + void primitive_feature_space::set_name(const std::string& _name) + { + name = _name[0]; + hash = hash_space(all, _name); + } + unsigned char primitive_feature_space::get_name() const { return name; } + size_t primitive_feature_space::size() const { return fs.size(); } + + feature_space::feature_space(vw& _all, size_t _size) : all(_all), fspaces(_size, all) {} + + const primitive_feature_space& feature_space::operator[](size_t index) const { return fspaces[index]; } + primitive_feature_space& feature_space::operator[](size_t index) { return fspaces[index]; } +} diff --git a/vowpalwabbit/parser.cc b/vowpalwabbit/parser.cc index f169e981c6a..22a5c2a26b3 100644 --- a/vowpalwabbit/parser.cc +++ b/vowpalwabbit/parser.cc @@ -795,7 +795,7 @@ void add_label(example* ec, float label, float weight, float base) ec->weight = weight; } -example* import_example(vw& all, string label, primitive_feature_space* features, size_t len) +example* import_example(vw& all, string label, feature_space& features) { example* ret = &get_unused_example(&all); all.p->lp.default_label(&ret->l); @@ -803,12 +803,12 @@ example* import_example(vw& all, string label, primitive_feature_space* features if (label.length() > 0) parse_example_label(all, *ret, label); - for (size_t i = 0; i < len; i++) + for (size_t i = 0; i < features.size(); i++) { - unsigned char index = features[i].name; + unsigned char index = features[i].get_name(); ret->indices.push_back(index); - for (size_t j = 0; j < features[i].len; j++) - ret->feature_space[index].push_back(features[i].fs[j].x, features[i].fs[j].weight_index); + for (size_t j = 0; j < features[i].size(); j++) + ret->feature_space[index].push_back(features[i][j].x, features[i][j].weight_index); } setup_example(all, ret); @@ -816,19 +816,19 @@ example* import_example(vw& all, string label, primitive_feature_space* features return ret; } -primitive_feature_space* export_example(vw& all, example* ec, size_t& len) +feature_space* export_example(vw& all, example* ec, size_t& len) { len = ec->indices.size(); - primitive_feature_space* fs_ptr = new primitive_feature_space[len]; + feature_space* fs_ptr = new feature_space(all, len); + feature_space& fs = *fs_ptr; int fs_count = 0; for (size_t idx = 0; idx < len; ++idx) { namespace_index i = ec->indices[idx]; - fs_ptr[fs_count].name = i; - fs_ptr[fs_count].len = ec->feature_space[i].size(); - fs_ptr[fs_count].fs = new feature[fs_ptr[fs_count].len]; + fs[fs_count].set_name(std::string(1, i)); + fs[fs_count].reset(ec->feature_space[i].size()); uint32_t stride_shift = all.weights.stride_shift(); int f_count = 0; @@ -836,7 +836,7 @@ primitive_feature_space* export_example(vw& all, example* ec, size_t& len) { feature t = {f.value(), f.index()}; t.weight_index >>= stride_shift; - fs_ptr[fs_count].fs[f_count] = t; + fs[fs_count][f_count] = t; f_count++; } fs_count++; @@ -844,12 +844,6 @@ primitive_feature_space* export_example(vw& all, example* ec, size_t& len) return fs_ptr; } -void releaseFeatureSpace(primitive_feature_space* features, size_t len) -{ - for (size_t i = 0; i < len; i++) delete[] features[i].fs; - delete (features); -} - void parse_example_label(vw& all, example& ec, string label) { v_array words = v_init(); diff --git a/vowpalwabbit/vw.h b/vowpalwabbit/vw.h index 3a618d7375c..d26e8d81311 100644 --- a/vowpalwabbit/vw.h +++ b/vowpalwabbit/vw.h @@ -53,9 +53,49 @@ bool is_ring_example(vw& all, example* ae); struct primitive_feature_space // just a helper definition. { - unsigned char name; - feature* fs; - size_t len; + private: + vw& all; + unsigned char name{0}; + uint64_t hash{0}; + std::vector fs{0}; + + public: + primitive_feature_space(vw& _all); + primitive_feature_space(vw& _all, const std::string& _name, size_t _len); + primitive_feature_space(::vw& _all, const std::string& _name, std::initializer_list features); + primitive_feature_space::primitive_feature_space( + ::vw& _all, const std::string& _name, std::initializer_list> features); + primitive_feature_space(const primitive_feature_space& other) = default; + primitive_feature_space& operator=(const primitive_feature_space& other) = default; + + void reset(size_t _len); + + const feature& operator[](size_t index) const; + feature& operator[](size_t index); + + void set(size_t index, const std::string& feature_name, float value); + + void set_name(const std::string& _name); + unsigned char get_name() const; + size_t size() const; +}; + +class feature_space +{ + private: + vw& all; + std::vector fspaces; + + public: + feature_space(vw& _all, size_t _size); + + feature_space(const feature_space& other) = default; + feature_space& operator=(const feature_space& other) = default; + + const primitive_feature_space& operator[](size_t index) const; + primitive_feature_space& operator[](size_t index); + + size_t size() const { return fspaces.size(); } }; // The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by @@ -69,7 +109,7 @@ example* read_example(vw& all, std::string example_line); // The more complex way to create an example. // after you create and fill feature_spaces, get an example with everything filled in. -example* import_example(vw& all, std::string label, primitive_feature_space* features, size_t len); +example* import_example(vw& all, std::string label, feature_space& features); // callers must free memory using release_example // this interface must be used with care as finish_example is a no-op for these examples. @@ -115,9 +155,7 @@ void copy_example_data(bool audit, example*, example*); // metadata + features, void clear_example_data(example&); // don't clear the label void move_feature_namespace(example* dst, example* src, namespace_index c); -// after export_example, must call releaseFeatureSpace to free native memory -primitive_feature_space* export_example(vw& all, example* e, size_t& len); -void releaseFeatureSpace(primitive_feature_space* features, size_t len); +feature_space* export_example(vw& all, example* e, size_t& len); void save_predictor(vw& all, std::string reg_name); void save_predictor(vw& all, io_buf& buf); diff --git a/vowpalwabbit/vw_core.vcxproj b/vowpalwabbit/vw_core.vcxproj index 5212ffa8ef8..11d019a0658 100644 --- a/vowpalwabbit/vw_core.vcxproj +++ b/vowpalwabbit/vw_core.vcxproj @@ -18,7 +18,6 @@ x64 - {1E205806-7F80-47DD-A38D-FC08083F3593} Win32Proj @@ -41,7 +40,6 @@ - true NativeRecommendedRules.ruleset @@ -103,7 +101,6 @@ win32\make_config_h.exe - $(SolutionDir)out\target\$(Configuration)\$(PlatformShortName)\ $(SolutionDir)out\int\$(Configuration)\$(PlatformShortName)\$(ProjectName)\ @@ -215,6 +212,7 @@ + @@ -327,4 +325,4 @@ - + \ No newline at end of file diff --git a/vowpalwabbit/vwdll.cpp b/vowpalwabbit/vwdll.cpp index 24a38926171..6a4e9f07117 100644 --- a/vowpalwabbit/vwdll.cpp +++ b/vowpalwabbit/vwdll.cpp @@ -77,21 +77,21 @@ VW_DLL_MEMBER void VW_CALLING_CONV VW_Finish(VW_HANDLE handle) VW::finish(*pointer); } -VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ImportExample(VW_HANDLE handle, const char * label, VW_FEATURE_SPACE features, size_t len) +VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ImportExample(VW_HANDLE handle, const char * label, VW_FEATURE_SPACE features) { vw * pointer = static_cast(handle); - VW::primitive_feature_space * f = reinterpret_cast( features ); - return static_cast(VW::import_example(*pointer, label, f, len)); + VW::feature_space * f = reinterpret_cast( features ); + return static_cast(VW::import_example(*pointer, label, *f)); } -VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_InitializeFeatureSpaces(size_t len) -{ - return static_cast(new VW::primitive_feature_space[len]); +VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_InitializeFeatureSpace(VW_HANDLE handle, size_t len) +{ vw* pointer = static_cast(handle); + return static_cast(new VW::feature_space(*pointer, len)); } -VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_GetFeatureSpace(VW_FEATURE_SPACE first, size_t index) +VW_DLL_MEMBER VW_PRIMITIVE_FEATURE_SPACE VW_CALLING_CONV VW_GetPrimitiveFeatureSpace(VW_FEATURE_SPACE fs, size_t index) { - VW::primitive_feature_space* f = reinterpret_cast(first); - return static_cast(&f[index]); + VW::feature_space* f = reinterpret_cast(fs); + return static_cast(&((*f)[index])); } VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_ExportExample(VW_HANDLE handle, VW_EXAMPLE e, size_t * plen) @@ -101,8 +101,8 @@ VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_ExportExample(VW_HANDLE handle } VW_DLL_MEMBER void VW_CALLING_CONV VW_ReleaseFeatureSpace(VW_FEATURE_SPACE features, size_t len) -{ VW::primitive_feature_space * f = reinterpret_cast( features ); - VW::releaseFeatureSpace(f, len); +{ VW::feature_space * f = reinterpret_cast( features ); + delete f; } #ifdef USE_CODECVT VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ReadExample(VW_HANDLE handle, const char16_t * line) @@ -175,30 +175,33 @@ VW_DLL_MEMBER float VW_CALLING_CONV VW_GetConfidence(VW_EXAMPLE e) { return VW::get_confidence(static_cast(e)); } -VW_DLL_MEMBER size_t VW_CALLING_CONV VW_SetFeatureSpace(VW_HANDLE handle, VW_FEATURE_SPACE feature_space, const char* name) +VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeatureSpaceA(VW_PRIMITIVE_FEATURE_SPACE feature_space, const char* name) { VW::primitive_feature_space* f = reinterpret_cast(feature_space); - f->name = *name; - return VW_HashSpaceA(handle, name); + string space_name(name); + f->set_name(space_name); +} + +VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeatureSpace(VW_PRIMITIVE_FEATURE_SPACE feature_space, const char16_t* name) +{ + VW_SetFeatureSpaceA(feature_space, utf16_to_utf8(name).c_str()); } VW_DLL_MEMBER void VW_CALLING_CONV VW_InitFeatures(VW_FEATURE_SPACE feature_space, size_t features_count) { VW::primitive_feature_space* fs = reinterpret_cast(feature_space); - fs->fs = new feature[features_count]; - fs->len = features_count; + fs->reset(features_count); } -VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeature(VW_FEATURE_SPACE feature_space, size_t index) +VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeatureA(VW_PRIMITIVE_FEATURE_SPACE feature_space, size_t index, const char* name, float value) { VW::primitive_feature_space* fs = reinterpret_cast(feature_space); - return &(fs->fs[index]); + fs->set(index, std::string(name), value); } -VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeature(VW_FEATURE f, size_t feature_hash, float value) +VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeature( + VW_PRIMITIVE_FEATURE_SPACE feature_space, size_t index, const char16_t* name, float value) { - feature* _feature = reinterpret_cast(f); - _feature->weight_index = feature_hash; - _feature->x = value; + VW_SetFeatureA(feature_space, index, utf16_to_utf8(name).c_str(), value); } VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeatures(VW_HANDLE handle, VW_EXAMPLE e, size_t* plen) diff --git a/vowpalwabbit/vwdll.h b/vowpalwabbit/vwdll.h index 135a9ce1328..98edf3e6121 100644 --- a/vowpalwabbit/vwdll.h +++ b/vowpalwabbit/vwdll.h @@ -55,6 +55,7 @@ extern "C" typedef void* VW_EXAMPLE; typedef void* VW_LABEL; typedef void* VW_FEATURE_SPACE; + typedef void* VW_PRIMITIVE_FEATURE_SPACE; typedef void* VW_FEATURE; typedef void* VW_IOBUF; @@ -73,10 +74,10 @@ extern "C" VW_DLL_MEMBER void VW_CALLING_CONV VW_Finish(VW_HANDLE handle); VW_DLL_MEMBER VW_EXAMPLE VW_CALLING_CONV VW_ImportExample( - VW_HANDLE handle, const char* label, VW_FEATURE_SPACE features, size_t len); + VW_HANDLE handle, const char* label, VW_FEATURE_SPACE features); - VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_InitializeFeatureSpaces(size_t len); - VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_GetFeatureSpace(VW_FEATURE_SPACE first, size_t index); + VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_InitializeFeatureSpace(VW_HANDLE handle, size_t len); + VW_DLL_MEMBER VW_PRIMITIVE_FEATURE_SPACE VW_CALLING_CONV VW_GetPrimitiveFeatureSpace(VW_FEATURE_SPACE fs, size_t index); VW_DLL_MEMBER VW_FEATURE_SPACE VW_CALLING_CONV VW_ExportExample(VW_HANDLE handle, VW_EXAMPLE e, size_t* plen); VW_DLL_MEMBER void VW_CALLING_CONV VW_ReleaseFeatureSpace(VW_FEATURE_SPACE features, size_t len); #ifdef USE_CODECVT @@ -102,10 +103,13 @@ extern "C" VW_DLL_MEMBER const char* VW_CALLING_CONV VW_GetTag(VW_EXAMPLE e); VW_DLL_MEMBER size_t VW_CALLING_CONV VW_GetFeatureNumber(VW_EXAMPLE e); VW_DLL_MEMBER float VW_CALLING_CONV VW_GetConfidence(VW_EXAMPLE e); - VW_DLL_MEMBER size_t VW_CALLING_CONV VW_SetFeatureSpace(VW_HANDLE handle, VW_FEATURE_SPACE feature_space, const char* name); + VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeatureSpaceA(VW_PRIMITIVE_FEATURE_SPACE feature_space, const char* name); + VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeatureSpace(VW_PRIMITIVE_FEATURE_SPACE feature_space, const char16_t* name); VW_DLL_MEMBER void VW_CALLING_CONV VW_InitFeatures(VW_FEATURE_SPACE feature_space, size_t features_count); - VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeature(VW_FEATURE_SPACE feature_space, size_t index); - VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeature(VW_FEATURE feature, size_t feature_hash, float value); + VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeatureA( + VW_PRIMITIVE_FEATURE_SPACE feature_space, size_t index, const char* name, float value); + VW_DLL_MEMBER void VW_CALLING_CONV VW_SetFeature( + VW_PRIMITIVE_FEATURE_SPACE feature_space, size_t index, const char16_t* name, float value); VW_DLL_MEMBER VW_FEATURE VW_CALLING_CONV VW_GetFeatures(VW_HANDLE handle, VW_EXAMPLE e, size_t* plen); VW_DLL_MEMBER void VW_CALLING_CONV VW_ReturnFeatures(VW_FEATURE f); #ifdef USE_CODECVT