Skip to content

Commit

Permalink
Model.Analogy returned back
Browse files Browse the repository at this point in the history
Signed-off-by: Dusan Malusev <dusan@dusanmalusev.dev>
  • Loading branch information
CodeLieutenant committed Jul 8, 2023
1 parent 9fe7412 commit 50fcf9f
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 71 deletions.
1 change: 0 additions & 1 deletion analog.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (

type Analog struct {
Name string `json:"name"`
Index int `json:"index"`
Probability float32 `json:"probability"`
}

Expand Down
66 changes: 38 additions & 28 deletions cbits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@

#include <stdlib.h>

#include <loss.cc>
#include <args.cc>
#include <autotune.cc>
#include <densematrix.cc>
#include <dictionary.cc>
#include <loss.cc>
#include <matrix.cc>
#include <meter.cc>
#include <model.cc>
Expand All @@ -24,6 +24,20 @@

#include "cbits.h"

#define FREE_STRING(str) \
do \
{ \
if (str.data != nullptr) \
free(str.data); \
str.data = nullptr; \
str.size = 0; \
} while (0)

#define LABEL_PREFIX ("__label__")
#define LABEL_PREFIX_SIZE (sizeof(LABEL_PREFIX) - 1)

using Predictions = std::vector<std::pair<fasttext::real, std::string>>;

struct membuf : std::streambuf
{
membuf(FastText_String_t query)
Expand Down Expand Up @@ -76,44 +90,42 @@ FastText_Predict_t FastText_Predict(const FastText_Handle_t handle, FastText_Str
membuf sbuf(query);
std::istream in(&sbuf);

auto predictions = new std::vector<std::pair<fasttext::real, std::string>>();
auto predictions = new Predictions((size_t)k);
model->predictLine(in, *predictions, k, threshold);

free(query.data);
query.data = nullptr;
query.size = 0;
FREE_STRING(query);

return FastText_Predict_t{
predictions->size(),
(void *)predictions,
};
}

// char *FastText_Analogy(const FastText_Handle_t handle, const char *query, size_t length)
// {
// return "";

// // auto model = reinterpret_cast<fasttext::FastText *>(handle);
FastText_Predict_t FastText_Analogy(const FastText_Handle_t handle, FastText_String_t word1, FastText_String_t word2,
FastText_String_t word3, int32_t k)
{
const auto model = reinterpret_cast<fasttext::FastText *>(handle);
Predictions predictions = model->getAnalogies(k, word1.data, word2.data, word3.data);

// // model->getAnalogies(1, query, 10);
FREE_STRING(word1);
FREE_STRING(word2);
FREE_STRING(word3);

// // size_t ii = 0;
// // auto res = json::array();
auto vec = new Predictions(std::move(predictions));

// // return strdup(res.dump().c_str());
// }
return FastText_Predict_t{
vec->size(),
(void *)vec,
};
}

FastText_FloatVector_t FastText_Wordvec(const FastText_Handle_t handle, FastText_String_t word)
{
const auto model = reinterpret_cast<fasttext::FastText *>(handle);
int64_t dimensions = model->getDimension();

auto vec = new fasttext::Vector(dimensions);
model->getWordVector(*vec, std::string(word.data, word.size));

free(word.data);
word.data = nullptr;
word.size = 0;
model->getWordVector(*vec, word.data);
FREE_STRING(word);

return FastText_FloatVector_t{
vec->data(),
Expand All @@ -131,9 +143,7 @@ FastText_FloatVector_t FastText_Sentencevec(const FastText_Handle_t handle, Fast

auto vec = new fasttext::Vector(model->getDimension());
model->getSentenceVector(in, *vec);
free(sentence.data);
sentence.data = nullptr;
sentence.size = 0;
FREE_STRING(sentence);

return FastText_FloatVector_t{
vec->data(),
Expand All @@ -150,18 +160,18 @@ void FastText_FreeFloatVector(FastText_FloatVector_t vector)

void FastText_FreePredict(FastText_Predict_t predict)
{
auto vec = reinterpret_cast<std::vector<std::pair<fasttext::real, std::string>> *>(predict.data);
auto vec = reinterpret_cast<Predictions *>(predict.data);
delete vec;
}

FastText_PredictItem_t FastText_PredictItemAt(FastText_Predict_t predict, size_t idx)
{
const auto vec = reinterpret_cast<std::vector<std::pair<fasttext::real, std::string>> *>(predict.data);
const auto vec = reinterpret_cast<Predictions *>(predict.data);
const auto &data = vec->at(idx);

auto str = FastText_String_t{
data.second.size() - sizeof("__label__") + 1,
(char *)(data.second.c_str() + sizeof("__label__") - 1),
data.second.size() - (LABEL_PREFIX_SIZE),
(char *)(data.second.c_str() + LABEL_PREFIX_SIZE),
};

return FastText_PredictItem_t{
Expand Down
33 changes: 16 additions & 17 deletions cbits.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,6 @@ extern "C"

typedef void *FastText_Handle_t;

typedef struct
{
enum
{
SUCCESS = 0,
ERROR = 1,
} status;

union {
FastText_Handle_t handle;
char *error;
};
} FastText_Result_t;

typedef struct
{
float *data;
Expand All @@ -48,16 +34,29 @@ extern "C"
void *data;
} FastText_Predict_t;

typedef struct
{
enum
{
SUCCESS = 0,
ERROR = 1,
} status;

union {
FastText_Handle_t handle;
char *error;
};
} FastText_Result_t;

FastText_Result_t FastText_NewHandle(const char *path);
void FastText_DeleteHandle(const FastText_Handle_t handle);
FastText_Predict_t FastText_Predict(const FastText_Handle_t handle, FastText_String_t query, int k,
float threshold);
FastText_Predict_t FastText_PredictOne(const FastText_Handle_t handle, FastText_String_t query, float threshold);

FastText_FloatVector_t FastText_Wordvec(const FastText_Handle_t handle, FastText_String_t word);
FastText_FloatVector_t FastText_Sentencevec(const FastText_Handle_t handle, FastText_String_t sentance);

// char *FastText_Analogy(const FastText_Handle_t handle, FastText_String_t query);
FastText_Predict_t FastText_Analogy(const FastText_Handle_t handle, FastText_String_t word1,
FastText_String_t word2, FastText_String_t word3, int32_t k);

void FastText_FreeFloatVector(FastText_FloatVector_t vector);
void FastText_FreePredict(FastText_Predict_t predict);
Expand Down
74 changes: 49 additions & 25 deletions fasttext.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,13 @@ func (handle Model) MultiLinePredict(query string, k int32, threshoad float32) [
}

func (handle Model) PredictOne(query string, threshoad float32) Prediction {
r := C.FastText_PredictOne(handle.p, C.FastText_String_t{
data: C.CString(query),
size: C.size_t(len(query)),
}, C.float(threshoad))
r := C.FastText_PredictOne(
handle.p,
C.FastText_String_t{
data: C.CString(query),
},
C.float(threshoad),
)
defer C.FastText_FreePredict(r)

cPredic := C.FastText_PredictItemAt(r, C.size_t(0))
Expand All @@ -88,10 +91,15 @@ func (handle Model) PredictOne(query string, threshoad float32) Prediction {

// Perform model prediction
func (handle Model) Predict(query string, k int32, threshoad float32) Predictions {
r := C.FastText_Predict(handle.p, C.FastText_String_t{
data: C.CString(query),
size: C.size_t(len(query)),
}, C.int(k), C.float(threshoad))
r := C.FastText_Predict(
handle.p,
C.FastText_String_t{
data: C.CString(query),
size: C.size_t(len(query)),
},
C.int(k),
C.float(threshoad),
)
defer C.FastText_FreePredict(r)

predictions := make(Predictions, r.size)
Expand All @@ -108,28 +116,44 @@ func (handle Model) Predict(query string, k int32, threshoad float32) Prediction
return predictions
}

// func (handle Model) Analogy(query string) (Analogs, error) {
// cquery := C.CString(query)
// defer C.free(unsafe.Pointer(cquery))
func (handle Model) Analogy(word1, word2, word3 string, k int32) Analogs {
r := C.FastText_Analogy(
handle.p,
C.FastText_String_t{
data: C.CString(word1),
},
C.FastText_String_t{
data: C.CString(word2),
},
C.FastText_String_t{
data: C.CString(word3),
},
C.int32_t(k),
)

defer C.FastText_FreePredict(r)

analogs := make(Analogs, r.size)

// r := C.Analogy(handle.handle, cquery, C.size_t(len(query)))
// defer C.free(unsafe.Pointer(r))
// js := C.GoString(r)
for i := uint64(0); i < uint64(r.size); i++ {
cPredic := C.FastText_PredictItemAt(r, C.size_t(i))

// analogies := []Analog{}
// err := json.Unmarshal([]byte(js), &analogies)
// if err != nil {
// return nil, err
// }
analogs[i] = Analog{
Name: C.GoStringN(cPredic.label.data, C.int(cPredic.label.size)),
Probability: float32(cPredic.probability),
}
}

// return analogies, nil
// }
return analogs
}

func (handle Model) Wordvec(word string) []float32 {
r := C.FastText_Wordvec(handle.p, C.FastText_String_t{
data: C.CString(word),
size: C.size_t(len(word)),
})
r := C.FastText_Wordvec(
handle.p,
C.FastText_String_t{
data: C.CString(word),
},
)
defer C.FastText_FreeFloatVector(r)

vectors := make([]float32, r.size)
Expand Down

0 comments on commit 50fcf9f

Please sign in to comment.