Skip to content

Commit

Permalink
Added support for different prediction types - resolves #10
Browse files Browse the repository at this point in the history
Co-authored-by: Nuno Silva <nunosilva800@gmail.com>
  • Loading branch information
ankane and nunosilva800 committed Dec 16, 2024
1 parent fca59ef commit f500d5f
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## 0.4.0 (unreleased)

- Added support for different prediction types
- Added support for hashes and Rover data frames to `predict` method
- Added support for hashes to `Dataset`
- Changed `Dataset` to use column names for feature names with Rover and Daru
Expand Down
51 changes: 46 additions & 5 deletions lib/lightgbm/booster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ def num_trees
out.read_int
end

# TODO support different prediction types
def predict(input, start_iteration: nil, num_iteration: nil, **params)
def predict(input, start_iteration: nil, num_iteration: nil, raw_score: false, pred_leaf: false, pred_contrib: false, **params)
input =
if daru?(input)
input[*cached_feature_name].map_rows(&:to_a)
Expand All @@ -157,23 +156,59 @@ def predict(input, start_iteration: nil, num_iteration: nil, **params)
input.to_a
end

predict_type = FFI::C_API_PREDICT_NORMAL
if raw_score
predict_type = FFI::C_API_PREDICT_RAW_SCORE
end
if pred_leaf
predict_type = FFI::C_API_PREDICT_LEAF_INDEX
end
if pred_contrib
predict_type = FFI::C_API_PREDICT_CONTRIB
end

singular = !input.first.is_a?(Array)
input = [input] if singular

start_iteration ||= 0
num_iteration ||= best_iteration
num_class = self.num_class

n_preds =
num_preds(
start_iteration,
num_iteration,
input.count,
predict_type
)

flat_input = input.flatten
handle_missing(flat_input)
data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
data.write_array_of_double(flat_input)

out_len = ::FFI::MemoryPointer.new(:int64)
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, start_iteration, num_iteration, params_str(params), out_len, out_result)
out_result = ::FFI::MemoryPointer.new(:double, n_preds)
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, predict_type, start_iteration, num_iteration, params_str(params), out_len, out_result)

if n_preds != out_len.read_int64
raise Error, "Wrong length for predict results"
end

out = out_result.read_array_of_double(out_len.read_int64)
out = out.each_slice(num_class).to_a if num_class > 1

if pred_leaf
out = out.map(&:to_i)
end

nrow = input.count
if out.size != nrow
if out.size % nrow == 0
out = out.each_slice(out.size / input.count).to_a
else
raise Error, "Length of predict result (#{out.size}) cannot be divide nrow (#{nrow})"
end
end

singular ? out.first : out
end
Expand Down Expand Up @@ -250,6 +285,12 @@ def num_class
out.read_int
end

def num_preds(start_iteration, num_iteration, nrow, predict_type)
out = ::FFI::MemoryPointer.new(:int64)
check_result FFI.LGBM_BoosterCalcNumPredict(handle_pointer, nrow, predict_type, start_iteration, num_iteration, out)
out.read_int64
end

def sorted_feature_values(input_hash)
input_hash.transform_keys(&:to_s).fetch_values(*cached_feature_name)
end
Expand Down
6 changes: 6 additions & 0 deletions lib/lightgbm/ffi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ module FFI
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
# keep same order

C_API_PREDICT_NORMAL = 0
C_API_PREDICT_RAW_SCORE = 1
C_API_PREDICT_LEAF_INDEX = 2
C_API_PREDICT_CONTRIB = 3

# error
attach_function :LGBM_GetLastError, %i[], :string

Expand Down Expand Up @@ -48,6 +53,7 @@ module FFI
attach_function :LGBM_BoosterGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int
attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
attach_function :LGBM_BoosterGetEval, %i[pointer int pointer pointer], :int
attach_function :LGBM_BoosterCalcNumPredict, %i[pointer int int int int pointer], :int
attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int int string pointer pointer], :int
attach_function :LGBM_BoosterSaveModel, %i[pointer int int int string], :int
attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int int64 pointer pointer], :int
Expand Down
34 changes: 34 additions & 0 deletions test/booster_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,40 @@ def test_predict_rover
end
end

def test_predict_raw_score
x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
expected = [0.9823112229173586, 0.9583143724610858]

y_pred = booster.predict(x_test, raw_score: true)
assert_elements_in_delta expected, y_pred.first(2)

y_pred = booster.predict(x_test[0], raw_score: true)
assert_in_delta expected[0], y_pred
end

def test_predict_pred_leaf
x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
expected = [[9, 8, 8, 11, 8, 6, 10, 12, 1, 10, 9, 10, 12, 5, 11, 9, 6, 4, 5, 12, 9, 11, 9, 11, 2, 10, 2, 10, 3, 5, 10, 6, 1, 5, 10, 10, 9, 4, 5, 4, 6, 5, 6, 6, 4, 6, 4, 10, 10, 3, 4, 4, 6, 3, 9, 11, 5, 4, 3, 6, 7, 3, 6, 7, 5, 10, 10, 6, 4, 5, 5, 9, 6, 6, 2, 2, 4, 9, 4, 3, 9, 4, 6, 11, 5, 5, 0, 9, 12, 10, 12, 4, 0, 8, 4, 8, 11, 0, 3, 10], [6, 1, 9, 7, 9, 8, 1, 7, 5, 1, 1, 1, 9, 10, 1, 1, 10, 9, 1, 11, 8, 2, 10, 3, 5, 10, 6, 0, 2, 5, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 10, 0, 0, 2, 0, 0, 9, 2, 9, 3, 1, 2, 2, 7, 9, 10, 1, 4, 4, 9, 10, 0, 1, 3, 11, 2, 5, 1, 1, 7, 8, 5, 1, 10, 10, 5, 4, 1, 10, 2, 1, 4, 2, 2, 2, 2, 10, 2, 9, 2, 11, 2, 5, 1, 11, 2, 9, 7, 7]]

y_pred = booster.predict(x_test, pred_leaf: true)
assert_equal expected, y_pred.first(2)

y_pred = booster.predict(x_test[0], pred_leaf: true)
assert_equal expected[0], y_pred
end

def test_predict_pred_contrib
x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
expected = [[-0.0733949225678886, -0.24289592050101766, 0.24183795683166504, 0.063430775771174, 0.9933333333834246], [0.1094902954684793, -0.2810485083947154, 0.26691627597706397, -0.13037702397316747, 0.9933333333834246]]

y_pred = booster.predict(x_test, pred_contrib: true)
assert_elements_in_delta expected[0], y_pred[0]
assert_elements_in_delta expected[1], y_pred[1]

y_pred = booster.predict(x_test[0], pred_contrib: true)
assert_elements_in_delta expected[0], y_pred
end

def test_model_to_string
assert booster.model_to_string
end
Expand Down
3 changes: 3 additions & 0 deletions test/support/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@
bst = lgb.Booster(model_file='test/support/model.txt')
print('x', X_train[:2].to_numpy().tolist())
print('predict', bst.predict(X_train)[:2].tolist())
print('raw_score', bst.predict(X_train, raw_score=True)[:2].tolist())
print('pred_leaf', bst.predict(X_train, pred_leaf=True)[:2].tolist())
print('pred_contrib', bst.predict(X_train, pred_contrib=True)[:2].tolist())
print('feature_importance', bst.feature_importance().tolist())
print('feature_name', bst.feature_name())

0 comments on commit f500d5f

Please sign in to comment.