Skip to content

Commit

Permalink
vw.h example construction helpers first version
Browse files Browse the repository at this point in the history
  • Loading branch information
ataymano@microsoft.com authored and ataymano@microsoft.com committed Jun 4, 2019
1 parent dfe5345 commit b9c334a
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 68 deletions.
57 changes: 57 additions & 0 deletions test/unit_test/feature_space_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#ifndef STATIC_LINK_VW
#define BOOST_TEST_DYN_LINK
#endif

#include <boost/test/unit_test.hpp>

#include "vw.h"

template <class T>
void check_weights_equal(T& first, T& second)
{
auto secondIt = second.begin();
for (auto firstIt : first)
{
BOOST_CHECK_EQUAL(firstIt, *secondIt);
++secondIt;
}
BOOST_CHECK_EQUAL(secondIt == second.end(), true);
}

BOOST_AUTO_TEST_CASE(parsed_and_constructed_example_parity)
{
vw* vw1 = VW::initialize("-q st --noconstant --quiet");
vw* vw2 = VW::initialize("-q st --noconstant --quiet");

auto example_parsed = VW::read_example(*vw1, "1 |s p^the_man w^the w^man |t p^un_homme w^un w^homme");
auto fs = VW::feature_space(*vw2, 2);
fs[0].reset(3);
fs[0].set_name("s");
fs[0].set(0, "p^the_man", 1.0f);
fs[0].set(1, "w^the", 1.0f);
fs[0].set(2, "w^man", 1.0f);

fs[1].reset(3);
fs[1].set_name("t");
fs[1].set(0, "p^un_homme", 1.0f);
fs[1].set(1, "w^un", 1.0f);
fs[1].set(2, "w^homme", 1.0f);
auto example_constructed = VW::import_example(*vw2, "1", fs);

vw1->learn(*example_parsed);
vw2->learn(*example_constructed);

BOOST_CHECK_EQUAL(vw1->weights.sparse, vw2->weights.sparse);

if (vw1->weights.sparse)
{
check_weights_equal(vw1->weights.sparse_weights, vw2->weights.sparse_weights);
}
else
{
check_weights_equal(vw1->weights.dense_weights, vw2->weights.dense_weights);
}

VW::finish(*vw1);
VW::finish(*vw2);
}
1 change: 1 addition & 0 deletions test/unit_test/unit_test.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
<ItemGroup>
<ClCompile Include="cb_explore_adf_test.cc" />
<ClCompile Include="explore_test.cc" />
<ClCompile Include="feature_space_test.cc" />
<ClCompile Include="main.cc" />
<ClCompile Include="object_pool_test.cc" />
<ClCompile Include="options_boost_po_test.cc" />
Expand Down
3 changes: 3 additions & 0 deletions test/unit_test/unit_test.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
<ClCompile Include="vwdll_test.cc">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="feature_space_test.cc">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
Expand Down
24 changes: 12 additions & 12 deletions test/unit_test/vwdll_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@ BOOST_AUTO_TEST_CASE(vw_dll_parsed_and_constructed_example_parity)
//construct example
VW_HANDLE handle2 = VW_InitializeA("-q st --noconstant --quiet");
VW_EXAMPLE example_constructed;
auto fs = VW_InitializeFeatureSpaces(2);
auto fs = VW_InitializeFeatureSpace(handle2, 2);

auto first = VW_GetFeatureSpace(fs, 0);
auto first = VW_GetPrimitiveFeatureSpace(fs, 0);
VW_InitFeatures(first, 3);
auto shash = VW_SetFeatureSpace(handle2, first, "s");
VW_SetFeature(VW_GetFeature(first, 0), VW_HashFeatureA(handle2, "p^the_man", shash), 1.0f);
VW_SetFeature(VW_GetFeature(first, 1), VW_HashFeatureA(handle2, "w^the", shash), 1.0f);
VW_SetFeature(VW_GetFeature(first, 2), VW_HashFeatureA(handle2, "w^man", shash), 1.0f);
VW_SetFeatureSpaceA(first, "s");
VW_SetFeatureA(first, 0, "p^the_man", 1.0f);
VW_SetFeatureA(first, 1, "w^the", 1.0f);
VW_SetFeatureA(first, 2, "w^man", 1.0f);

auto second = VW_GetFeatureSpace(fs, 1);
auto second = VW_GetPrimitiveFeatureSpace(fs, 1);
VW_InitFeatures(second, 3);
auto thash = VW_SetFeatureSpace(handle2, second, "t");
VW_SetFeature(VW_GetFeature(second, 0), VW_HashFeatureA(handle2, "p^un_homme", thash), 1.0f);
VW_SetFeature(VW_GetFeature(second, 1), VW_HashFeatureA(handle2, "w^un", thash), 1.0f);
VW_SetFeature(VW_GetFeature(second, 2), VW_HashFeatureA(handle2, "w^homme", thash), 1.0f);
VW_SetFeatureSpaceA(second, "t");
VW_SetFeatureA(second, 0, "p^un_homme", 1.0f);
VW_SetFeatureA(second, 1, "w^un", 1.0f);
VW_SetFeatureA(second, 2, "w^homme", 1.0f);

example_constructed = VW_ImportExample(handle2, "1", fs, 2);
example_constructed = VW_ImportExample(handle2, "1", fs);


// learn both
Expand Down
58 changes: 58 additions & 0 deletions vowpalwabbit/feature_space.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "global_data.h"
#include "vw.h"

#include <string>

namespace VW {
primitive_feature_space::primitive_feature_space(::vw& _all) : all(_all) {}

primitive_feature_space::primitive_feature_space(::vw& _all, const std::string& _name, size_t _len)
: all(_all), name(_name[0]), hash(hash_space(all, _name)), fs(_len)
{
}

primitive_feature_space::primitive_feature_space(
::vw& _all, const std::string& _name, std::initializer_list<feature> features)
: primitive_feature_space(_all, _name, features.size())
{
size_t i = 0;
for (const auto& f : features) {
fs[i] = f;
}
}

primitive_feature_space::primitive_feature_space(
::vw& _all, const std::string& _name, std::initializer_list<std::tuple<const char*, float>> features)
: primitive_feature_space(_all, _name, features.size())
{
size_t i = 0;
for (const auto& f : features)
{
set(i, std::get<0>(f), std::get<1>(f));
}
}

void primitive_feature_space::reset(size_t _len) { fs.resize(_len); }

const feature& primitive_feature_space::operator[](size_t index) const { return fs[index]; }
feature& primitive_feature_space::operator[](size_t index) { return fs[index]; }

void primitive_feature_space::set(size_t index, const std::string& feature_name, float value)
{
fs[index].weight_index = hash_feature(all, feature_name, hash);
fs[index].x = value;
}

void primitive_feature_space::set_name(const std::string& _name)
{
name = _name[0];
hash = hash_space(all, _name);
}
unsigned char primitive_feature_space::get_name() const { return name; }
size_t primitive_feature_space::size() const { return fs.size(); }

feature_space::feature_space(vw& _all, size_t _size) : all(_all), fspaces(_size, all) {}

const primitive_feature_space& feature_space::operator[](size_t index) const { return fspaces[index]; }
primitive_feature_space& feature_space::operator[](size_t index) { return fspaces[index]; }
}
28 changes: 11 additions & 17 deletions vowpalwabbit/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -795,61 +795,55 @@ void add_label(example* ec, float label, float weight, float base)
ec->weight = weight;
}

example* import_example(vw& all, string label, primitive_feature_space* features, size_t len)
example* import_example(vw& all, string label, feature_space& features)
{
example* ret = &get_unused_example(&all);
all.p->lp.default_label(&ret->l);

if (label.length() > 0)
parse_example_label(all, *ret, label);

for (size_t i = 0; i < len; i++)
for (size_t i = 0; i < features.size(); i++)
{
unsigned char index = features[i].name;
unsigned char index = features[i].get_name();
ret->indices.push_back(index);
for (size_t j = 0; j < features[i].len; j++)
ret->feature_space[index].push_back(features[i].fs[j].x, features[i].fs[j].weight_index);
for (size_t j = 0; j < features[i].size(); j++)
ret->feature_space[index].push_back(features[i][j].x, features[i][j].weight_index);
}

setup_example(all, ret);
all.p->end_parsed_examples++;
return ret;
}

primitive_feature_space* export_example(vw& all, example* ec, size_t& len)
feature_space* export_example(vw& all, example* ec, size_t& len)
{
len = ec->indices.size();
primitive_feature_space* fs_ptr = new primitive_feature_space[len];
feature_space* fs_ptr = new feature_space(all, len);
feature_space& fs = *fs_ptr;

int fs_count = 0;

for (size_t idx = 0; idx < len; ++idx)
{
namespace_index i = ec->indices[idx];
fs_ptr[fs_count].name = i;
fs_ptr[fs_count].len = ec->feature_space[i].size();
fs_ptr[fs_count].fs = new feature[fs_ptr[fs_count].len];
fs[fs_count].set_name(std::string(1, i));
fs[fs_count].reset(ec->feature_space[i].size());

uint32_t stride_shift = all.weights.stride_shift();
int f_count = 0;
for (features::iterator& f : ec->feature_space[i])
{
feature t = {f.value(), f.index()};
t.weight_index >>= stride_shift;
fs_ptr[fs_count].fs[f_count] = t;
fs[fs_count][f_count] = t;
f_count++;
}
fs_count++;
}
return fs_ptr;
}

void releaseFeatureSpace(primitive_feature_space* features, size_t len)
{
for (size_t i = 0; i < len; i++) delete[] features[i].fs;
delete (features);
}

void parse_example_label(vw& all, example& ec, string label)
{
v_array<substring> words = v_init<substring>();
Expand Down
52 changes: 45 additions & 7 deletions vowpalwabbit/vw.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,49 @@ bool is_ring_example(vw& all, example* ae);

struct primitive_feature_space // just a helper definition.
{
unsigned char name;
feature* fs;
size_t len;
private:
vw& all;
unsigned char name{0};
uint64_t hash{0};
std::vector<feature> fs{0};

public:
primitive_feature_space(vw& _all);
primitive_feature_space(vw& _all, const std::string& _name, size_t _len);
primitive_feature_space(::vw& _all, const std::string& _name, std::initializer_list<feature> features);
primitive_feature_space::primitive_feature_space(
::vw& _all, const std::string& _name, std::initializer_list<std::tuple<const char*, float>> features);
primitive_feature_space(const primitive_feature_space& other) = default;
primitive_feature_space& operator=(const primitive_feature_space& other) = default;

void reset(size_t _len);

const feature& operator[](size_t index) const;
feature& operator[](size_t index);

void set(size_t index, const std::string& feature_name, float value);

void set_name(const std::string& _name);
unsigned char get_name() const;
size_t size() const;
};

class feature_space
{
private:
vw& all;
std::vector<primitive_feature_space> fspaces;

public:
feature_space(vw& _all, size_t _size);

feature_space(const feature_space& other) = default;
feature_space& operator=(const feature_space& other) = default;

const primitive_feature_space& operator[](size_t index) const;
primitive_feature_space& operator[](size_t index);

size_t size() const { return fspaces.size(); }
};

// The next commands deal with creating examples. Caution: VW does not all allow creation of many examples at once by
Expand All @@ -69,7 +109,7 @@ example* read_example(vw& all, std::string example_line);
// The more complex way to create an example.

// after you create and fill feature_spaces, get an example with everything filled in.
example* import_example(vw& all, std::string label, primitive_feature_space* features, size_t len);
example* import_example(vw& all, std::string label, feature_space& features);

// callers must free memory using release_example
// this interface must be used with care as finish_example is a no-op for these examples.
Expand Down Expand Up @@ -115,9 +155,7 @@ void copy_example_data(bool audit, example*, example*); // metadata + features,
void clear_example_data(example&); // don't clear the label
void move_feature_namespace(example* dst, example* src, namespace_index c);

// after export_example, must call releaseFeatureSpace to free native memory
primitive_feature_space* export_example(vw& all, example* e, size_t& len);
void releaseFeatureSpace(primitive_feature_space* features, size_t len);
feature_space* export_example(vw& all, example* e, size_t& len);

void save_predictor(vw& all, std::string reg_name);
void save_predictor(vw& all, io_buf& buf);
Expand Down
6 changes: 2 additions & 4 deletions vowpalwabbit/vw_core.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>

<PropertyGroup Label="Globals">
<ProjectGuid>{1E205806-7F80-47DD-A38D-FC08083F3593}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
Expand All @@ -41,7 +40,6 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<Import Project="$(ProjectDir)Build.props" />

<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<LinkIncremental>true</LinkIncremental>
<CodeAnalysisRuleSet>NativeRecommendedRules.ruleset</CodeAnalysisRuleSet>
Expand Down Expand Up @@ -103,7 +101,6 @@
<Command>win32\make_config_h.exe</Command>
</PreBuildEvent>
</ItemDefinitionGroup>

<PropertyGroup>
<OutDir>$(SolutionDir)out\target\$(Configuration)\$(PlatformShortName)\</OutDir>
<IntDir>$(SolutionDir)out\int\$(Configuration)\$(PlatformShortName)\$(ProjectName)\</IntDir>
Expand Down Expand Up @@ -215,6 +212,7 @@
<ItemGroup>
<ClCompile Include="active_cover.cc" />
<ClCompile Include="action_score.cc" />
<ClCompile Include="feature_space.cc" />
<ClCompile Include="options_boost_po.cc" />
<ClCompile Include="options_serializer_boost_po.cc" />
<ClCompile Include="autolink.cc" />
Expand Down Expand Up @@ -327,4 +325,4 @@
<Error Condition="!Exists('$(SolutionDir)packages\zlib.v140.windesktop.msvcstl.static.rt-dyn.1.2.8.8\build\native\zlib.v140.windesktop.msvcstl.static.rt-dyn.targets')" Text="$([System.String]::Format('$(ErrorText)', '$(SolutionDir)packages\zlib.v140.windesktop.msvcstl.static.rt-dyn.1.2.8.8\build\native\zlib.v140.windesktop.msvcstl.static.rt-dyn.targets'))" />
</Target>
<Import Project="..\sdl\SDL-7.0-NativeAnalysis.targets" />
</Project>
</Project>
Loading

0 comments on commit b9c334a

Please sign in to comment.