From 442db7ef48ccd0618332d0b8cd1b4bdf3c228097 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 21 Jun 2019 16:14:08 +0100 Subject: [PATCH 01/51] Some basic changes to the plot of the trees to make them readable. --- python-package/lightgbm/plotting.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 8b118abe776a..f9afed389c87 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -394,9 +394,9 @@ def add(root, parent=None, decision=None): label += r'\n{0}: {1}'.format(info, _float2str(root[info], precision)) elif info == 'internal_count': label += r'\n{0}: {1}'.format(info, root[info]) - graph.node(name, label=label) + graph.node(name, label=label, shape="rectangle") if root['decision_type'] == '<=': - l_dec, r_dec = '<=', '>' + l_dec, r_dec = '≤', '>' elif root['decision_type'] == '==': l_dec, r_dec = 'is', "isn't" else: @@ -416,6 +416,7 @@ def add(root, parent=None, decision=None): graph.edge(parent, name, decision) graph = Digraph(**kwargs) + graph.attr("graph", nodesep="0.05", ranksep="0.1", rankdir="LR") add(tree_info['tree_structure']) return graph @@ -497,7 +498,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, def plot_tree(booster, ax=None, tree_index=0, figsize=None, old_graph_attr=None, old_node_attr=None, old_edge_attr=None, - show_info=None, precision=None, **kwargs): + show_info=None, precision=3, **kwargs): """Plot specified tree. Note From 0ade18f841c6a46a7ff57beb0c501792c6ce9f42 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 13:53:55 +0100 Subject: [PATCH 02/51] Squeezed the information in the nodes. --- python-package/lightgbm/plotting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index f9afed389c87..a22b07ad304b 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -385,10 +385,10 @@ def add(root, parent=None, decision=None): if 'split_index' in root: # non-leaf name = 'split{0}'.format(root['split_index']) if feature_names is not None: - label = 'split_feature_name: {0}'.format(feature_names[root['split_feature']]) + label = '{0} = '.format(feature_names[root['split_feature']]) else: - label = 'split_feature_index: {0}'.format(root['split_feature']) - label += r'\nthreshold: {0}'.format(_float2str(root['threshold'], precision)) + label = '{0} = '.format(root['split_feature']) + label += r'{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: label += r'\n{0}: {1}'.format(info, _float2str(root[info], precision)) From 36551993f07422ef51b7535dabda4f6c69ea0b98 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 14:04:55 +0100 Subject: [PATCH 03/51] Added colouring when a dictionnary mapping the features to the constraints is passed. --- python-package/lightgbm/plotting.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index a22b07ad304b..e8454055256c 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -369,7 +369,7 @@ def plot_metric(booster, metric=None, dataset_names=None, return ax -def _to_graphviz(tree_info, show_info, feature_names, precision=None, **kwargs): +def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraints={}, **kwargs): """Convert specified tree to graphviz instance. See: @@ -394,7 +394,18 @@ def add(root, parent=None, decision=None): label += r'\n{0}: {1}'.format(info, _float2str(root[info], precision)) elif info == 'internal_count': label += r'\n{0}: {1}'.format(info, root[info]) - graph.node(name, label=label, shape="rectangle") + + fillcolor="white" + style = "" + if feature_names is not None and feature_names[root['split_feature']] in constraints: + if constraints[feature_names[root['split_feature']]] == 1: + fillcolor = "#ddffdd" + style = "filled" + style + if constraints[feature_names[root['split_feature']]] == -1: + fillcolor = "#ffdddd" + style = "filled" + style + + graph.node(name, label=label, shape="rectangle", style=style, fillcolor=fillcolor) if root['decision_type'] == '<=': l_dec, r_dec = '≤', '>' elif root['decision_type'] == '==': @@ -422,7 +433,7 @@ def add(root, parent=None, decision=None): return graph -def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, +def create_tree_digraph(booster, tree_index=0, constraints = {}, show_info=None, precision=None, old_name=None, old_comment=None, old_filename=None, old_directory=None, old_format=None, old_engine=None, old_encoding=None, old_graph_attr=None, old_node_attr=None, old_edge_attr=None, old_body=None, old_strict=False, **kwargs): @@ -491,7 +502,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, if show_info is None: show_info = [] - graph = _to_graphviz(tree_info, show_info, feature_names, precision, **kwargs) + graph = _to_graphviz(tree_info, show_info, feature_names, precision, constraints, **kwargs) return graph From 1a8ec2ca705b27ea6d12519f6d410afc68fdf945 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 14:12:48 +0100 Subject: [PATCH 04/51] Fix spaces. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index e8454055256c..0dccf193e91e 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -395,7 +395,7 @@ def add(root, parent=None, decision=None): elif info == 'internal_count': label += r'\n{0}: {1}'.format(info, root[info]) - fillcolor="white" + fillcolor = "white" style = "" if feature_names is not None and feature_names[root['split_feature']] in constraints: if constraints[feature_names[root['split_feature']]] == 1: From 7bef020a00c8d7ba9dc1b1aeabd73bba6c3c13ef Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 14:25:31 +0100 Subject: [PATCH 05/51] Added data percentage as an option in the nodes. --- python-package/lightgbm/plotting.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 0dccf193e91e..e8d7a5466a7a 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -380,7 +380,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraint else: raise ImportError('You must install graphviz to plot tree.') - def add(root, parent=None, decision=None): + def add(root, total_count, parent=None, decision=None): """Recursively add node or edge.""" if 'split_index' in root: # non-leaf name = 'split{0}'.format(root['split_index']) @@ -394,6 +394,8 @@ def add(root, parent=None, decision=None): label += r'\n{0}: {1}'.format(info, _float2str(root[info], precision)) elif info == 'internal_count': label += r'\n{0}: {1}'.format(info, root[info]) + elif info == "data_percentage": + label += r'\n{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) fillcolor = "white" style = "" @@ -412,8 +414,8 @@ def add(root, parent=None, decision=None): l_dec, r_dec = 'is', "isn't" else: raise ValueError('Invalid decision type in tree model.') - add(root['left_child'], name, l_dec) - add(root['right_child'], name, r_dec) + add(root['left_child'], total_count, name, l_dec) + add(root['right_child'], total_count, name, r_dec) else: # leaf name = 'leaf{0}'.format(root['leaf_index']) label = 'leaf_index: {0}'.format(root['leaf_index']) @@ -428,7 +430,7 @@ def add(root, parent=None, decision=None): graph = Digraph(**kwargs) graph.attr("graph", nodesep="0.05", ranksep="0.1", rankdir="LR") - add(tree_info['tree_structure']) + add(tree_info['tree_structure'], tree_info['tree_structure']["internal_count"]) return graph From 6d0de1ab8680f802c5ee0c994cced14242231b19 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 14:28:34 +0100 Subject: [PATCH 06/51] Squeezed the information in the leaves. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index e8d7a5466a7a..3172c795fc51 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -418,8 +418,8 @@ def add(root, total_count, parent=None, decision=None): add(root['right_child'], total_count, name, r_dec) else: # leaf name = 'leaf{0}'.format(root['leaf_index']) - label = 'leaf_index: {0}'.format(root['leaf_index']) - label += r'\nleaf_value: {0}'.format(_float2str(root['leaf_value'], precision)) + label = 'leaf {0}: '.format(root['leaf_index']) + label += r'{0}'.format(_float2str(root['leaf_value'], precision)) if 'leaf_count' in show_info: label += r'\nleaf_count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: From 7e1f705bbf414c7af067e29d35ce74b70d96de63 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 14:37:50 +0100 Subject: [PATCH 07/51] Important information is now in bold. --- python-package/lightgbm/plotting.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 3172c795fc51..a01109c36b7f 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -385,17 +385,17 @@ def add(root, total_count, parent=None, decision=None): if 'split_index' in root: # non-leaf name = 'split{0}'.format(root['split_index']) if feature_names is not None: - label = '{0} = '.format(feature_names[root['split_feature']]) + label = '{0} = '.format(feature_names[root['split_feature']]) else: - label = '{0} = '.format(root['split_feature']) + label = '{0} = '.format(root['split_feature']) label += r'{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: - label += r'\n{0}: {1}'.format(info, _float2str(root[info], precision)) + label += r'
{0}: {1}'.format(info, _float2str(root[info], precision)) elif info == 'internal_count': - label += r'\n{0}: {1}'.format(info, root[info]) + label += r'
{0}: {1}'.format(info, root[info]) elif info == "data_percentage": - label += r'\n{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) + label += r'
{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) fillcolor = "white" style = "" @@ -406,7 +406,7 @@ def add(root, total_count, parent=None, decision=None): if constraints[feature_names[root['split_feature']]] == -1: fillcolor = "#ffdddd" style = "filled" + style - + label = "<" + label + ">" graph.node(name, label=label, shape="rectangle", style=style, fillcolor=fillcolor) if root['decision_type'] == '<=': l_dec, r_dec = '≤', '>' @@ -419,11 +419,12 @@ def add(root, total_count, parent=None, decision=None): else: # leaf name = 'leaf{0}'.format(root['leaf_index']) label = 'leaf {0}: '.format(root['leaf_index']) - label += r'{0}'.format(_float2str(root['leaf_value'], precision)) + label += r'{0}'.format(_float2str(root['leaf_value'], precision)) if 'leaf_count' in show_info: - label += r'\nleaf_count: {0}'.format(root['leaf_count']) + label += r'
leaf_count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: - label += r'\nleaf_weight: {0}'.format(_float2str(root['leaf_weight'], precision)) + label += r'
leaf_weight: {0}'.format(_float2str(root['leaf_weight'], precision)) + label = "<" + label + ">" graph.node(name, label=label) if parent is not None: graph.edge(parent, name, decision) From 0e713b7a4ced6f87c8edc7c8a21fa9aa661af8e7 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 15:26:05 +0100 Subject: [PATCH 08/51] Added a legend for the color of monotone splits. --- python-package/lightgbm/plotting.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index a01109c36b7f..1bd946bd3e1f 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -432,6 +432,23 @@ def add(root, total_count, parent=None, decision=None): graph = Digraph(**kwargs) graph.attr("graph", nodesep="0.05", ranksep="0.1", rankdir="LR") add(tree_info['tree_structure'], tree_info['tree_structure']["internal_count"]) + if constraints: + legend = """< + + + + + + + + + + + + +
Legend
Monotonic increasing
Monotonic decreasing
+ >""" + graph.node("legend", label=legend, shape="rectangle", color = "white") return graph From 49ca769aec79874869c5f0a5a9921cbdc63fda62 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 15:26:30 +0100 Subject: [PATCH 09/51] Changed "split_gain" to "gain" and "internal_value" to "value". --- python-package/lightgbm/plotting.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 1bd946bd3e1f..240b4e5d24e3 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -391,7 +391,12 @@ def add(root, total_count, parent=None, decision=None): label += r'{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: - label += r'
{0}: {1}'.format(info, _float2str(root[info], precision)) + output = info + if info == "split_gain": + output = "gain" + elif info == "internal_value": + output = "value" + label += r'
{0}: {1}'.format(output, _float2str(root[info], precision)) elif info == 'internal_count': label += r'
{0}: {1}'.format(info, root[info]) elif info == "data_percentage": From 8cb62a9431fc3381d1638bba943021e1cfd10700 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 15:28:07 +0100 Subject: [PATCH 10/51] Sqeezed leaves a bit more. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 240b4e5d24e3..51b0e701f015 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -423,8 +423,8 @@ def add(root, total_count, parent=None, decision=None): add(root['right_child'], total_count, name, r_dec) else: # leaf name = 'leaf{0}'.format(root['leaf_index']) - label = 'leaf {0}: '.format(root['leaf_index']) - label += r'{0}'.format(_float2str(root['leaf_value'], precision)) + label = 'leaf {0}:'.format(root['leaf_index']) + label += r'
{0}'.format(_float2str(root['leaf_value'], precision)) if 'leaf_count' in show_info: label += r'
leaf_count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: From 45f4d0ed63ed19e3f400e319f835822ed51d1799 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 24 Jun 2019 15:28:33 +0100 Subject: [PATCH 11/51] Changed description in the legend. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 51b0e701f015..989306846fab 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -444,11 +444,11 @@ def add(root, total_count, parent=None, decision=None): Legend - Monotonic increasing + Monotone increasing - Monotonic decreasing + Monotone decreasing From 41bec87e31726ef4c315fddb9fa53735be61c183 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 11:43:05 +0100 Subject: [PATCH 12/51] Revert "Sqeezed leaves a bit more." This reverts commit dd8bf14a3ba604b0dfae3b7bb1c64b6784d15e03. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 989306846fab..db02b8554170 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -423,8 +423,8 @@ def add(root, total_count, parent=None, decision=None): add(root['right_child'], total_count, name, r_dec) else: # leaf name = 'leaf{0}'.format(root['leaf_index']) - label = 'leaf {0}:'.format(root['leaf_index']) - label += r'
{0}'.format(_float2str(root['leaf_value'], precision)) + label = 'leaf {0}: '.format(root['leaf_index']) + label += r'{0}'.format(_float2str(root['leaf_value'], precision)) if 'leaf_count' in show_info: label += r'
leaf_count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: From 4f7ec679cb9ebadd4523c5aebd1b3490d55e2fd8 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 12:04:18 +0100 Subject: [PATCH 13/51] Increased the readability for the gain. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index db02b8554170..d048b77efe49 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -396,7 +396,7 @@ def add(root, total_count, parent=None, decision=None): output = "gain" elif info == "internal_value": output = "value" - label += r'
{0}: {1}'.format(output, _float2str(root[info], precision)) + label += r'
{1} {0}'.format(output, _float2str(root[info], precision)) elif info == 'internal_count': label += r'
{0}: {1}'.format(info, root[info]) elif info == "data_percentage": From a8ba5521ca296fe81cb3f62224c3320936571ce0 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 12:04:42 +0100 Subject: [PATCH 14/51] Tidied up the legend. --- python-package/lightgbm/plotting.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index d048b77efe49..d71adad4eaac 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -439,22 +439,18 @@ def add(root, total_count, parent=None, decision=None): add(tree_info['tree_structure'], tree_info['tree_structure']["internal_count"]) if constraints: legend = """< - +
- - - - + - +
Legend
Monotone increasingIncreasing
Monotone decreasingDecreasing
>""" graph.node("legend", label=legend, shape="rectangle", color = "white") - return graph From 8968b30197c0d36b50243e1a429ab6790f0fea94 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 12:23:11 +0100 Subject: [PATCH 15/51] Added the data percentage in the leaves. --- python-package/lightgbm/plotting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index d71adad4eaac..9298f4ffe403 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -429,6 +429,8 @@ def add(root, total_count, parent=None, decision=None): label += r'
leaf_count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: label += r'
leaf_weight: {0}'.format(_float2str(root['leaf_weight'], precision)) + if "data_percentage" in show_info: + label += r'
{0}% of data'.format(_float2str(root['leaf_count'] / total_count * 100, 2)) label = "<" + label + ">" graph.node(name, label=label) if parent is not None: From 6cb343dc924411621e0424266b39b1c44d27d80c Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 15:30:18 +0100 Subject: [PATCH 16/51] Added the monotone constraints to the dumped model. --- include/LightGBM/utils/common.h | 40 ++++++++++++++++++++++++++++++++ src/boosting/gbdt.cpp | 1 + src/boosting/gbdt.h | 1 + src/boosting/gbdt_model_text.cpp | 24 ++++++++++++++++--- 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index 8ad82245411e..dde1a1cb8829 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -162,6 +162,31 @@ inline static const char* Atoi(const char* p, T* out) { return p; } +template +inline void SplitToIntLike(const char *c_str, char delimiter, + std::vector &ret) { + CHECK(ret.empty()); + std::string str(c_str); + size_t i = 0; + size_t pos = 0; + while (pos < str.length()) { + if (str[pos] == delimiter) { + if (i < pos) { + ret.push_back(T()); + Atoi(str.substr(i, pos - i).c_str(), &ret.back()); + } + ++pos; + i = pos; + } else { + ++pos; + } + } + if (i < pos) { + ret.push_back(T()); + Atoi(str.substr(i).c_str(), &ret.back()); + } +} + template inline static double Pow(T base, int power) { if (power < 0) { @@ -551,6 +576,21 @@ inline static std::string Join(const std::vector& strs, const char* delimiter return str_buf.str(); } +template<> +inline std::string Join(const std::vector& strs, const char* delimiter) { + if (strs.empty()) { + return std::string(""); + } + std::stringstream str_buf; + str_buf << std::setprecision(std::numeric_limits::digits10 + 2); + str_buf << static_cast(strs[0]); + for (size_t i = 1; i < strs.size(); ++i) { + str_buf << delimiter; + str_buf << static_cast(strs[i]); + } + return str_buf.str(); +} + template inline static std::string Join(const std::vector& strs, size_t start, size_t end, const char* delimiter) { if (end - start <= 0) { diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp index f114e48f6e4f..f56dad808881 100644 --- a/src/boosting/gbdt.cpp +++ b/src/boosting/gbdt.cpp @@ -103,6 +103,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective // get feature names feature_names_ = train_data_->feature_names(); feature_infos_ = train_data_->feature_infos(); + monotone_constraints_ = config->monotone_constraints; // if need bagging, create buffer ResetBaggingConfig(config_.get(), true); diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h index e3716c319402..60af787445cf 100644 --- a/src/boosting/gbdt.h +++ b/src/boosting/gbdt.h @@ -504,6 +504,7 @@ class GBDT : public GBDTBase { bool need_re_bagging_; bool balanced_bagging_; std::string loaded_parameter_; + std::vector monotone_constraints_; Json forced_splits_json_; }; diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp index 8a4b255c2881..0e87da3844c5 100644 --- a/src/boosting/gbdt_model_text.cpp +++ b/src/boosting/gbdt_model_text.cpp @@ -31,9 +31,11 @@ std::string GBDT::DumpModel(int start_iteration, int num_iteration) const { str_buf << "\"objective\":\"" << objective_function_->ToString() << "\",\n"; } - str_buf << "\"feature_names\":[\"" - << Common::Join(feature_names_, "\",\"") << "\"]," - << '\n'; + str_buf << "\"feature_names\":[\"" << Common::Join(feature_names_, "\",\"") + << "\"]," << '\n'; + + str_buf << "\"monotone_constraints\":[" + << Common::Join(monotone_constraints_, ",") << "]," << '\n'; str_buf << "\"tree_info\":["; int num_used_model = static_cast(models_.size()); @@ -269,6 +271,8 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration) cons ss << "feature_names=" << Common::Join(feature_names_, " ") << '\n'; + ss << "monotone_constraints=" << Common::Join(monotone_constraints_, " ") << '\n'; + ss << "feature_infos=" << Common::Join(feature_infos_, " ") << '\n'; int num_used_model = static_cast(models_.size()); @@ -364,6 +368,8 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { } else if (strs.size() > 2) { if (strs[0] == "feature_names") { key_vals[strs[0]] = cur_line.substr(std::strlen("feature_names=")); + } else if (strs[0] == "monotone_constraints") { + key_vals[strs[0]] = cur_line.substr(std::strlen("monotone_constraints=")); } else { // Use first 128 chars to avoid exceed the message buffer. Log::Fatal("Wrong line at model file: %s", cur_line.substr(0, std::min(128, cur_line.size())).c_str()); @@ -424,6 +430,18 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { return false; } + // get monotone_constraints + if (key_vals.count("monotone_constraints")) { + Common::SplitToIntLike(key_vals["monotone_constraints"].c_str(), ' ', monotone_constraints_); + if (monotone_constraints_.size() != static_cast(max_feature_idx_ + 1)) { + Log::Fatal("Wrong size of monotone_constraints"); + return false; + } + } else { + Log::Fatal("Model file doesn't contain monotone_constraints"); + return false; + } + if (key_vals.count("feature_infos")) { feature_infos_ = Common::Split(key_vals["feature_infos"].c_str(), ' '); if (feature_infos_.size() != static_cast(max_feature_idx_ + 1)) { From f30cacc8a60d8e78e330cf71122cb1d67d70ddf9 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 15:53:38 +0100 Subject: [PATCH 17/51] Monotone constraints are now specified automatically when plotting trees. --- python-package/lightgbm/plotting.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 9298f4ffe403..2a7c3bfc7df5 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -369,7 +369,7 @@ def plot_metric(booster, metric=None, dataset_names=None, return ax -def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraints={}, **kwargs): +def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraints=None, **kwargs): """Convert specified tree to graphviz instance. See: @@ -404,11 +404,11 @@ def add(root, total_count, parent=None, decision=None): fillcolor = "white" style = "" - if feature_names is not None and feature_names[root['split_feature']] in constraints: - if constraints[feature_names[root['split_feature']]] == 1: + if constraints: + if constraints[root['split_feature']] == 1: fillcolor = "#ddffdd" style = "filled" + style - if constraints[feature_names[root['split_feature']]] == -1: + if constraints[root['split_feature']] == -1: fillcolor = "#ffdddd" style = "filled" + style label = "<" + label + ">" @@ -456,7 +456,7 @@ def add(root, total_count, parent=None, decision=None): return graph -def create_tree_digraph(booster, tree_index=0, constraints = {}, show_info=None, precision=None, +def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, old_name=None, old_comment=None, old_filename=None, old_directory=None, old_format=None, old_engine=None, old_encoding=None, old_graph_attr=None, old_node_attr=None, old_edge_attr=None, old_body=None, old_strict=False, **kwargs): @@ -517,6 +517,11 @@ def create_tree_digraph(booster, tree_index=0, constraints = {}, show_info=None, else: feature_names = None + if 'monotone_constraints' in model: + monotone_constraints = model['monotone_constraints'] + else: + monotone_constraints = None + if tree_index < len(tree_infos): tree_info = tree_infos[tree_index] else: @@ -525,7 +530,7 @@ def create_tree_digraph(booster, tree_index=0, constraints = {}, show_info=None, if show_info is None: show_info = [] - graph = _to_graphviz(tree_info, show_info, feature_names, precision, constraints, **kwargs) + graph = _to_graphviz(tree_info, show_info, feature_names, precision, monotone_constraints, **kwargs) return graph From 0e8e51d9254b59ad99a63fc14842a7642aa33e7f Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 16:06:16 +0100 Subject: [PATCH 18/51] Raise an exception instead of the bug that was here before. --- python-package/lightgbm/plotting.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 2a7c3bfc7df5..b6617b8562b2 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -438,7 +438,11 @@ def add(root, total_count, parent=None, decision=None): graph = Digraph(**kwargs) graph.attr("graph", nodesep="0.05", ranksep="0.1", rankdir="LR") - add(tree_info['tree_structure'], tree_info['tree_structure']["internal_count"]) + if "internal_count" in tree_info['tree_structure']: + add(tree_info['tree_structure'], tree_info['tree_structure']["internal_count"]) + else: + raise Exception("Cannnot plot trees with no split") + if constraints: legend = """< From 59f369459793443dbd344b5b3d9e6e2cbaaf5aad Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 16:19:04 +0100 Subject: [PATCH 19/51] Removed operators on the branches for a clearer design. --- python-package/lightgbm/plotting.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index b6617b8562b2..47d27e922db4 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -383,11 +383,19 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraint def add(root, total_count, parent=None, decision=None): """Recursively add node or edge.""" if 'split_index' in root: # non-leaf + if root['decision_type'] == '<=': + operator = "≤" + l_dec, r_dec = "", "" + elif root['decision_type'] == '==': + operator = "=" + l_dec, r_dec = 'is', "isn't" + else: + raise ValueError('Invalid decision type in tree model.') name = 'split{0}'.format(root['split_index']) if feature_names is not None: - label = '{0} = '.format(feature_names[root['split_feature']]) + label = '{0} {1} '.format(feature_names[root['split_feature']], operator) else: - label = '{0} = '.format(root['split_feature']) + label = '{0} {1}; '.format(root['split_feature'], operator) label += r'{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: @@ -413,12 +421,6 @@ def add(root, total_count, parent=None, decision=None): style = "filled" + style label = "<" + label + ">" graph.node(name, label=label, shape="rectangle", style=style, fillcolor=fillcolor) - if root['decision_type'] == '<=': - l_dec, r_dec = '≤', '>' - elif root['decision_type'] == '==': - l_dec, r_dec = 'is', "isn't" - else: - raise ValueError('Invalid decision type in tree model.') add(root['left_child'], total_count, name, l_dec) add(root['right_child'], total_count, name, r_dec) else: # leaf @@ -437,7 +439,7 @@ def add(root, total_count, parent=None, decision=None): graph.edge(parent, name, decision) graph = Digraph(**kwargs) - graph.attr("graph", nodesep="0.05", ranksep="0.1", rankdir="LR") + graph.attr("graph", nodesep="0.05", ranksep="0.3", rankdir="LR") if "internal_count" in tree_info['tree_structure']: add(tree_info['tree_structure'], tree_info['tree_structure']["internal_count"]) else: From 77bcccc3568f161cb25934bdd148d3a4e409a2ea Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 28 Jun 2019 16:25:27 +0100 Subject: [PATCH 20/51] Small cleaning of the code. --- include/LightGBM/utils/common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index dde1a1cb8829..584ef7712a86 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -172,7 +172,7 @@ inline void SplitToIntLike(const char *c_str, char delimiter, while (pos < str.length()) { if (str[pos] == delimiter) { if (i < pos) { - ret.push_back(T()); + ret.push_back({}); Atoi(str.substr(i, pos - i).c_str(), &ret.back()); } ++pos; @@ -182,7 +182,7 @@ inline void SplitToIntLike(const char *c_str, char delimiter, } } if (i < pos) { - ret.push_back(T()); + ret.push_back({}); Atoi(str.substr(i).c_str(), &ret.back()); } } From a8f4496ae389169e4e9329890af9f1be46d84708 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Tue, 2 Jul 2019 09:33:25 +0100 Subject: [PATCH 21/51] Setting a monotone constraint on a categorical feature now returns an exception instead of doing nothing. --- docs/Parameters.rst | 2 ++ include/LightGBM/config.h | 1 + src/io/dataset_loader.cpp | 2 ++ 3 files changed, 5 insertions(+) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index afd45797b31b..929437430207 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -651,6 +651,8 @@ IO Parameters - **Note**: all negative values will be treated as **missing values** + - **Note**: the output cannot be monotonically constrained with respect to a categorical feature + - ``predict_raw_score`` :raw-html:`🔗︎`, default = ``false``, type = bool, aliases: ``is_predict_raw_score``, ``predict_rawscore``, ``raw_score`` - used only in ``prediction`` task diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 671c68aa4b76..d317f4d852cb 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -603,6 +603,7 @@ struct Config { // desc = **Note**: all values should be less than ``Int32.MaxValue`` (2147483647) // desc = **Note**: using large values could be memory consuming. Tree decision rule works best when categorical features are presented by consecutive integers starting from zero // desc = **Note**: all negative values will be treated as **missing values** + // desc = **Note**: the output cannot be monotonically constrained with respect to a categorical feature std::string categorical_feature = ""; // alias = is_predict_raw_score, predict_rawscore, raw_score diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 1130d803ea36..09f1e14d98d6 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -580,6 +580,8 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values, BinType bin_type = BinType::NumericalBin; if (categorical_features_.count(i)) { bin_type = BinType::CategoricalBin; + bool categorical_feature_is_not_monotonically_constrained = (config_.monotone_constraints[i] == 0); + CHECK(categorical_feature_is_not_monotonically_constrained); } bin_mappers[i].reset(new BinMapper()); if (config_.max_bin_by_feature.empty()) { From 79c8cb7615ae4ad84ce90e8fe4e401203a2ef619 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Tue, 2 Jul 2019 09:55:34 +0100 Subject: [PATCH 22/51] Fix bug when monotone constraints are empty. --- src/boosting/gbdt_model_text.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp index 0e87da3844c5..8df8a7bf91c3 100644 --- a/src/boosting/gbdt_model_text.cpp +++ b/src/boosting/gbdt_model_text.cpp @@ -271,7 +271,10 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration) cons ss << "feature_names=" << Common::Join(feature_names_, " ") << '\n'; - ss << "monotone_constraints=" << Common::Join(monotone_constraints_, " ") << '\n'; + if (monotone_constraints_.size() != 0) { + ss << "monotone_constraints=" << Common::Join(monotone_constraints_, " ") + << '\n'; + } ss << "feature_infos=" << Common::Join(feature_infos_, " ") << '\n'; @@ -437,9 +440,6 @@ bool GBDT::LoadModelFromString(const char* buffer, size_t len) { Log::Fatal("Wrong size of monotone_constraints"); return false; } - } else { - Log::Fatal("Model file doesn't contain monotone_constraints"); - return false; } if (key_vals.count("feature_infos")) { From aa7b96ba7ef0d6b5fb024efd5ccb2ecc57535b79 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Tue, 2 Jul 2019 09:56:22 +0100 Subject: [PATCH 23/51] Fix another bug when monotone constraints are empty. --- src/io/dataset_loader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 09f1e14d98d6..76e413d0fdc7 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -580,7 +580,7 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values, BinType bin_type = BinType::NumericalBin; if (categorical_features_.count(i)) { bin_type = BinType::CategoricalBin; - bool categorical_feature_is_not_monotonically_constrained = (config_.monotone_constraints[i] == 0); + bool categorical_feature_is_not_monotonically_constrained = ((config_.monotone_constraints.size() == 0) || (config_.monotone_constraints[i] == 0)); CHECK(categorical_feature_is_not_monotonically_constrained); } bin_mappers[i].reset(new BinMapper()); From cea089bfe989da49a679b29dcd247ba7359599d9 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Tue, 2 Jul 2019 10:17:01 +0100 Subject: [PATCH 24/51] Variable name change. --- src/io/dataset_loader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 76e413d0fdc7..0c59ba257c58 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -580,8 +580,8 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values, BinType bin_type = BinType::NumericalBin; if (categorical_features_.count(i)) { bin_type = BinType::CategoricalBin; - bool categorical_feature_is_not_monotonically_constrained = ((config_.monotone_constraints.size() == 0) || (config_.monotone_constraints[i] == 0)); - CHECK(categorical_feature_is_not_monotonically_constrained); + bool feat_is_unconstrained = ((config_.monotone_constraints.size() == 0) || (config_.monotone_constraints[i] == 0)); + CHECK(feat_is_unconstrained); } bin_mappers[i].reset(new BinMapper()); if (config_.max_bin_by_feature.empty()) { From 0bb1533cdec727da5c7643fc1befc1a4b525fa62 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Tue, 2 Jul 2019 10:30:22 +0100 Subject: [PATCH 25/51] Added is / isn't on every edge of the trees. --- python-package/lightgbm/plotting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 47d27e922db4..49ea0c039a2c 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -383,12 +383,11 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraint def add(root, total_count, parent=None, decision=None): """Recursively add node or edge.""" if 'split_index' in root: # non-leaf + l_dec, r_dec = 'is', "isn't" if root['decision_type'] == '<=': operator = "≤" - l_dec, r_dec = "", "" elif root['decision_type'] == '==': operator = "=" - l_dec, r_dec = 'is', "isn't" else: raise ValueError('Invalid decision type in tree model.') name = 'split{0}'.format(root['split_index']) From 04b0cd78b943ff7b5c360e2d84bc59bf94d67d61 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 09:25:19 +0100 Subject: [PATCH 26/51] Fix test "tree_create_digraph". --- tests/python_package_test/test_plotting.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/python_package_test/test_plotting.py b/tests/python_package_test/test_plotting.py index f6503eb94c97..f0bfdb8ca3fe 100644 --- a/tests/python_package_test/test_plotting.py +++ b/tests/python_package_test/test_plotting.py @@ -131,13 +131,11 @@ def test_create_tree_digraph(self): self.assertEqual(len(graph.graph_attr), 0) self.assertEqual(len(graph.edge_attr), 0) graph_body = ''.join(graph.body) - self.assertIn('threshold', graph_body) - self.assertIn('split_feature_name', graph_body) - self.assertNotIn('split_feature_index', graph_body) - self.assertIn('leaf_index', graph_body) - self.assertIn('split_gain', graph_body) - self.assertIn('internal_value', graph_body) + self.assertIn('leaf', graph_body) + self.assertIn('gain', graph_body) + self.assertIn('value', graph_body) self.assertIn('internal_weight', graph_body) + self.assertNotIn('leaf_data', graph_body) self.assertNotIn('internal_count', graph_body) self.assertNotIn('leaf_count', graph_body) self.assertNotIn('leaf_weight', graph_body) From 340a254ed8f9a1495cd4747f7188d5395bcbf8fc Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 10:33:24 +0100 Subject: [PATCH 27/51] Add new test for plotting trees with monotone constraints. --- tests/python_package_test/test_plotting.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/python_package_test/test_plotting.py b/tests/python_package_test/test_plotting.py index f0bfdb8ca3fe..ebd24dda3d32 100644 --- a/tests/python_package_test/test_plotting.py +++ b/tests/python_package_test/test_plotting.py @@ -140,6 +140,18 @@ def test_create_tree_digraph(self): self.assertNotIn('leaf_count', graph_body) self.assertNotIn('leaf_weight', graph_body) + @unittest.skipIf(not GRAPHVIZ_INSTALLED, 'graphviz is not installed') + def test_create_tree_digraph_with_monotone_constraints(self): + constraints = [-1, 1] * int(self.X_train.shape[1] / 2) + gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True, monotone_constraints=constraints) + gbm.fit(self.X_train, self.y_train, verbose=False) + graph = lgb.create_tree_digraph(gbm, tree_index=3) + graph.render(view=False) + graph_body = ''.join(graph.body) + + self.assertIn('#ffdddd', graph_body) + self.assertIn('#ddffdd', graph_body) + @unittest.skipIf(not MATPLOTLIB_INSTALLED, 'matplotlib is not installed') def test_plot_metrics(self): test_data = lgb.Dataset(self.X_test, self.y_test, reference=self.train_data) From 87e7a88b50050c20e34f7f3e3b194a8c677fcb4a Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 11:04:27 +0100 Subject: [PATCH 28/51] Typo. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 49ea0c039a2c..2f15d8335119 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -457,7 +457,7 @@ def add(root, total_count, parent=None, decision=None):
>""" - graph.node("legend", label=legend, shape="rectangle", color = "white") + graph.node("legend", label=legend, shape="rectangle", color="white") return graph From 51523c76c4ed2fcbb6f9e26cff00cec58e016212 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 17:13:29 +0100 Subject: [PATCH 29/51] Update documentation of categorical features. --- python-package/lightgbm/basic.py | 1 + python-package/lightgbm/engine.py | 2 ++ python-package/lightgbm/sklearn.py | 1 + 3 files changed, 4 insertions(+) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 34d9cf534fa2..55e036e139b3 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -690,6 +690,7 @@ def __init__(self, data, label=None, reference=None, All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. + The output cannot be monotonically constrained with respect to a categorical feature. params : dict or None, optional (default=None) Other parameters for Dataset. free_raw_data : bool, optional (default=True) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 24b5403eebac..fef82f1cbe7f 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -88,6 +88,7 @@ def train(params, train_set, num_boost_round=100, All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. + The output cannot be monotonically constrained with respect to a categorical feature. early_stopping_rounds : int or None, optional (default=None) Activates early stopping. The model will train until the validation score stops improving. Validation score needs to improve at least every ``early_stopping_rounds`` round(s) @@ -451,6 +452,7 @@ def cv(params, train_set, num_boost_round=100, All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. + The output cannot be monotonically constrained with respect to a categorical feature. early_stopping_rounds : int or None, optional (default=None) Activates early stopping. CV score needs to improve at least every ``early_stopping_rounds`` round(s) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 4147811480c8..a60e661c19c8 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -437,6 +437,7 @@ def fit(self, X, y, All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. + The output cannot be monotonically constrained with respect to a categorical feature. callbacks : list of callback functions or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information. From 8ab0ae5699565835632990d034d08d01903e9df7 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 17:15:47 +0100 Subject: [PATCH 30/51] Typo. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 2f15d8335119..22e8421bd4fd 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -394,7 +394,7 @@ def add(root, total_count, parent=None, decision=None): if feature_names is not None: label = '{0} {1} '.format(feature_names[root['split_feature']], operator) else: - label = '{0} {1}; '.format(root['split_feature'], operator) + label = '{0} {1} '.format(root['split_feature'], operator) label += r'{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: From 178f59309f259fab037a47123ca2dd039e88ad38 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 17:19:20 +0100 Subject: [PATCH 31/51] Information in nodes more explicit. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 22e8421bd4fd..018360c03b47 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -394,7 +394,7 @@ def add(root, total_count, parent=None, decision=None): if feature_names is not None: label = '{0} {1} '.format(feature_names[root['split_feature']], operator) else: - label = '{0} {1} '.format(root['split_feature'], operator) + label = 'feature {0} {1} '.format(root['split_feature'], operator) label += r'{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: From b025d5da9fb35f69a0ded6fbd01786f87d908f7d Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 2 Aug 2019 17:27:08 +0100 Subject: [PATCH 32/51] Used regular strings instead of raw strings. --- python-package/lightgbm/plotting.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 018360c03b47..6617111b9214 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -395,7 +395,7 @@ def add(root, total_count, parent=None, decision=None): label = '{0} {1} '.format(feature_names[root['split_feature']], operator) else: label = 'feature {0} {1} '.format(root['split_feature'], operator) - label += r'{0}'.format(_float2str(root['threshold'], precision)) + label += '{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: output = info @@ -403,11 +403,11 @@ def add(root, total_count, parent=None, decision=None): output = "gain" elif info == "internal_value": output = "value" - label += r'
{1} {0}'.format(output, _float2str(root[info], precision)) + label += '
{1} {0}'.format(output, _float2str(root[info], precision)) elif info == 'internal_count': - label += r'
{0}: {1}'.format(info, root[info]) + label += '
{0}: {1}'.format(info, root[info]) elif info == "data_percentage": - label += r'
{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) + label += '
{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) fillcolor = "white" style = "" @@ -425,13 +425,13 @@ def add(root, total_count, parent=None, decision=None): else: # leaf name = 'leaf{0}'.format(root['leaf_index']) label = 'leaf {0}: '.format(root['leaf_index']) - label += r'{0}'.format(_float2str(root['leaf_value'], precision)) + label += '{0}'.format(_float2str(root['leaf_value'], precision)) if 'leaf_count' in show_info: - label += r'
leaf_count: {0}'.format(root['leaf_count']) + label += '
leaf_count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: - label += r'
leaf_weight: {0}'.format(_float2str(root['leaf_weight'], precision)) + label += '
leaf_weight: {0}'.format(_float2str(root['leaf_weight'], precision)) if "data_percentage" in show_info: - label += r'
{0}% of data'.format(_float2str(root['leaf_count'] / total_count * 100, 2)) + label += '
{0}% of data'.format(_float2str(root['leaf_count'] / total_count * 100, 2)) label = "<" + label + ">" graph.node(name, label=label) if parent is not None: From afdb147f224fa3df0e09974b5839e46abd9cc985 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:13:43 +0100 Subject: [PATCH 33/51] Small refactoring. --- python-package/lightgbm/plotting.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 6617111b9214..7c2ea9b7b8f5 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -398,11 +398,7 @@ def add(root, total_count, parent=None, decision=None): label += '{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: - output = info - if info == "split_gain": - output = "gain" - elif info == "internal_value": - output = "value" + output = info.split('_')[-1] label += '
{1} {0}'.format(output, _float2str(root[info], precision)) elif info == 'internal_count': label += '
{0}: {1}'.format(info, root[info]) From cf80160cfca0994736d91998d68aca34c49c377a Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:26:12 +0100 Subject: [PATCH 34/51] Some cleaning. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 7c2ea9b7b8f5..5ccf116fef19 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -399,7 +399,7 @@ def add(root, total_count, parent=None, decision=None): for info in show_info: if info in {'split_gain', 'internal_value', 'internal_weight'}: output = info.split('_')[-1] - label += '
{1} {0}'.format(output, _float2str(root[info], precision)) + label += '
{0} {1}'.format(_float2str(root[info], precision), output) elif info == 'internal_count': label += '
{0}: {1}'.format(info, root[info]) elif info == "data_percentage": From a791bf071878325aa61a1ad46af0dd3934995a69 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:33:53 +0100 Subject: [PATCH 35/51] Added future statement. --- python-package/lightgbm/plotting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 5ccf116fef19..cb38a5d7333b 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -2,6 +2,7 @@ # pylint: disable = C0103 """Plotting library.""" from __future__ import absolute_import +from __future__ import division import warnings from copy import deepcopy From 3fde4fab9e282f75fe5f0468a9da153afce04906 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:34:27 +0100 Subject: [PATCH 36/51] Changed output for consistency. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index cb38a5d7333b..863426231f9b 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -398,11 +398,11 @@ def add(root, total_count, parent=None, decision=None): label = 'feature {0} {1} '.format(root['split_feature'], operator) label += '{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: + output = info.split('_')[-1] if info in {'split_gain', 'internal_value', 'internal_weight'}: - output = info.split('_')[-1] label += '
{0} {1}'.format(_float2str(root[info], precision), output) elif info == 'internal_count': - label += '
{0}: {1}'.format(info, root[info]) + label += '
{0}: {1}'.format(output, root[info]) elif info == "data_percentage": label += '
{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) From 62834e0f4dfb636367b1991f7585a9922aff4a6d Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:37:15 +0100 Subject: [PATCH 37/51] Updated documentation. --- python-package/lightgbm/plotting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 863426231f9b..0b616034bb3a 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -478,7 +478,8 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, show_info : list of strings or None, optional (default=None) What information should be shown in nodes. Possible values of list items: - 'split_gain', 'internal_value', 'internal_count', 'internal_weight', 'leaf_count', 'leaf_weight'. + 'split_gain', 'internal_value', 'internal_count', 'internal_weight', + 'leaf_count', 'leaf_weight', 'data_percentage'. precision : int or None, optional (default=None) Used to restrict the display of floating point values to a certain precision. **kwargs @@ -561,7 +562,8 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, show_info : list of strings or None, optional (default=None) What information should be shown in nodes. Possible values of list items: - 'split_gain', 'internal_value', 'internal_count', 'internal_weight', 'leaf_count', 'leaf_weight'. + 'split_gain', 'internal_value', 'internal_count', 'internal_weight', + 'leaf_count', 'leaf_weight', 'data_percentage'. precision : int or None, optional (default=None) Used to restrict the display of floating point values to a certain precision. **kwargs From 2688e19062d27f880077b503c48b55776f5a375b Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:40:29 +0100 Subject: [PATCH 38/51] Added comments for colors. --- python-package/lightgbm/plotting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 0b616034bb3a..ad7b28e2556a 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -410,10 +410,10 @@ def add(root, total_count, parent=None, decision=None): style = "" if constraints: if constraints[root['split_feature']] == 1: - fillcolor = "#ddffdd" + fillcolor = "#ddffdd" # light green style = "filled" + style if constraints[root['split_feature']] == -1: - fillcolor = "#ffdddd" + fillcolor = "#ffdddd" # light red style = "filled" + style label = "<" + label + ">" graph.node(name, label=label, shape="rectangle", style=style, fillcolor=fillcolor) @@ -442,6 +442,7 @@ def add(root, total_count, parent=None, decision=None): raise Exception("Cannnot plot trees with no split") if constraints: + # "#ddffdd" is light green, "#ffdddd" is light red legend = """< From 8a2c683cdf416b8999873658abadc12fc7d37538 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:41:03 +0100 Subject: [PATCH 39/51] Changed text on edges for more clarity. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index ad7b28e2556a..78f8d4082ffc 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -384,7 +384,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraint def add(root, total_count, parent=None, decision=None): """Recursively add node or edge.""" if 'split_index' in root: # non-leaf - l_dec, r_dec = 'is', "isn't" + l_dec, r_dec = 'yes', "no" if root['decision_type'] == '<=': operator = "≤" elif root['decision_type'] == '==': From 0e9920913dc563a88d83769e1c0ad14e581f4285 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 09:46:51 +0100 Subject: [PATCH 40/51] Small refactoring. --- python-package/lightgbm/plotting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 78f8d4082ffc..a454c4e4f9cf 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -411,10 +411,9 @@ def add(root, total_count, parent=None, decision=None): if constraints: if constraints[root['split_feature']] == 1: fillcolor = "#ddffdd" # light green - style = "filled" + style if constraints[root['split_feature']] == -1: fillcolor = "#ffdddd" # light red - style = "filled" + style + style = "filled" label = "<" + label + ">" graph.node(name, label=label, shape="rectangle", style=style, fillcolor=fillcolor) add(root['left_child'], total_count, name, l_dec) From e45924cbdb088ccb1e5bc971b34c6a71394bd468 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 10:00:39 +0100 Subject: [PATCH 41/51] Modified text in leaves for consistency with nodes. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index a454c4e4f9cf..422bab850058 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -423,9 +423,9 @@ def add(root, total_count, parent=None, decision=None): label = 'leaf {0}: '.format(root['leaf_index']) label += '{0}'.format(_float2str(root['leaf_value'], precision)) if 'leaf_count' in show_info: - label += '
leaf_count: {0}'.format(root['leaf_count']) + label += '
count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: - label += '
leaf_weight: {0}'.format(_float2str(root['leaf_weight'], precision)) + label += '
{0} weight'.format(_float2str(root['leaf_weight'], precision)) if "data_percentage" in show_info: label += '
{0}% of data'.format(_float2str(root['leaf_count'] / total_count * 100, 2)) label = "<" + label + ">" From 2e7b0f7dce3fa327cea8c00d6208e569531e65b4 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 10:04:07 +0100 Subject: [PATCH 42/51] Updated default values and documentaton for consistency. --- python-package/lightgbm/plotting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 422bab850058..a90823a7ff2e 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -370,7 +370,7 @@ def plot_metric(booster, metric=None, dataset_names=None, return ax -def _to_graphviz(tree_info, show_info, feature_names, precision=None, constraints=None, **kwargs): +def _to_graphviz(tree_info, show_info, feature_names, precision=3, constraints=None, **kwargs): """Convert specified tree to graphviz instance. See: @@ -458,7 +458,7 @@ def add(root, total_count, parent=None, decision=None): return graph -def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, +def create_tree_digraph(booster, tree_index=0, show_info=None, precision=3, old_name=None, old_comment=None, old_filename=None, old_directory=None, old_format=None, old_engine=None, old_encoding=None, old_graph_attr=None, old_node_attr=None, old_edge_attr=None, old_body=None, old_strict=False, **kwargs): @@ -480,7 +480,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'internal_weight', 'leaf_count', 'leaf_weight', 'data_percentage'. - precision : int or None, optional (default=None) + precision : int or None, optional (default=3) Used to restrict the display of floating point values to a certain precision. **kwargs Other parameters passed to ``Digraph`` constructor. @@ -564,7 +564,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'internal_weight', 'leaf_count', 'leaf_weight', 'data_percentage'. - precision : int or None, optional (default=None) + precision : int or None, optional (default=3) Used to restrict the display of floating point values to a certain precision. **kwargs Other parameters passed to ``Digraph`` constructor. From 34382fb3670b42577b29e1a9c8457762dc74ac61 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 10:19:53 +0100 Subject: [PATCH 43/51] Replaced CHECK with Log::Fatal for user-friendliness. --- src/io/dataset_loader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 0c59ba257c58..ee47bece8fa5 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -581,7 +581,9 @@ Dataset* DatasetLoader::CostructFromSampleData(double** sample_values, if (categorical_features_.count(i)) { bin_type = BinType::CategoricalBin; bool feat_is_unconstrained = ((config_.monotone_constraints.size() == 0) || (config_.monotone_constraints[i] == 0)); - CHECK(feat_is_unconstrained); + if (!feat_is_unconstrained) { + Log::Fatal("The output cannot be monotone with respect to categorical features"); + } } bin_mappers[i].reset(new BinMapper()); if (config_.max_bin_by_feature.empty()) { From 162db7d372b8150ec44901fe46fce8668a2bb4a4 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 11:32:40 +0100 Subject: [PATCH 44/51] Updated tests. --- tests/python_package_test/test_plotting.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/tests/python_package_test/test_plotting.py b/tests/python_package_test/test_plotting.py index ebd24dda3d32..574bdf39a760 100644 --- a/tests/python_package_test/test_plotting.py +++ b/tests/python_package_test/test_plotting.py @@ -114,7 +114,8 @@ def test_plot_tree(self): @unittest.skipIf(not GRAPHVIZ_INSTALLED, 'graphviz is not installed') def test_create_tree_digraph(self): - gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True) + constraints = [-1, 1] * int(self.X_train.shape[1] / 2) + gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True, monotone_constraints=constraints) gbm.fit(self.X_train, self.y_train, verbose=False) self.assertRaises(IndexError, lgb.create_tree_digraph, gbm, tree_index=83) @@ -134,23 +135,11 @@ def test_create_tree_digraph(self): self.assertIn('leaf', graph_body) self.assertIn('gain', graph_body) self.assertIn('value', graph_body) - self.assertIn('internal_weight', graph_body) - self.assertNotIn('leaf_data', graph_body) - self.assertNotIn('internal_count', graph_body) - self.assertNotIn('leaf_count', graph_body) - self.assertNotIn('leaf_weight', graph_body) - - @unittest.skipIf(not GRAPHVIZ_INSTALLED, 'graphviz is not installed') - def test_create_tree_digraph_with_monotone_constraints(self): - constraints = [-1, 1] * int(self.X_train.shape[1] / 2) - gbm = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True, monotone_constraints=constraints) - gbm.fit(self.X_train, self.y_train, verbose=False) - graph = lgb.create_tree_digraph(gbm, tree_index=3) - graph.render(view=False) - graph_body = ''.join(graph.body) - + self.assertIn('weight', graph_body) self.assertIn('#ffdddd', graph_body) self.assertIn('#ddffdd', graph_body) + self.assertNotIn('data', graph_body) + self.assertNotIn('count', graph_body) @unittest.skipIf(not MATPLOTLIB_INSTALLED, 'matplotlib is not installed') def test_plot_metrics(self): From 4ca2c36b0492442cde59ebc791bc3f90d48c1e2d Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Mon, 5 Aug 2019 12:17:18 +0100 Subject: [PATCH 45/51] Typo. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index a90823a7ff2e..7d41da92c12e 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -410,9 +410,9 @@ def add(root, total_count, parent=None, decision=None): style = "" if constraints: if constraints[root['split_feature']] == 1: - fillcolor = "#ddffdd" # light green + fillcolor = "#ddffdd" # light green if constraints[root['split_feature']] == -1: - fillcolor = "#ffdddd" # light red + fillcolor = "#ffdddd" # light red style = "filled" label = "<" + label + ">" graph.node(name, label=label, shape="rectangle", style=style, fillcolor=fillcolor) From 8978b748153b7d4c2f391c9d8918f0f44ddd2b5a Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Wed, 7 Aug 2019 09:17:08 +0100 Subject: [PATCH 46/51] Simplify imports. --- python-package/lightgbm/plotting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 7d41da92c12e..afd6b43023fe 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -1,8 +1,7 @@ # coding: utf-8 # pylint: disable = C0103 """Plotting library.""" -from __future__ import absolute_import -from __future__ import division +from __future__ import absolute_import, division import warnings from copy import deepcopy From a44930bff79733b0a09b6bfdcab04bbdec35e015 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Thu, 8 Aug 2019 09:32:27 +0100 Subject: [PATCH 47/51] Swapped count and weight to improve readibility of the leaves in the plotted trees. --- python-package/lightgbm/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index afd6b43023fe..3bbbe54c0e8a 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -421,10 +421,10 @@ def add(root, total_count, parent=None, decision=None): name = 'leaf{0}'.format(root['leaf_index']) label = 'leaf {0}: '.format(root['leaf_index']) label += '{0}'.format(_float2str(root['leaf_value'], precision)) - if 'leaf_count' in show_info: - label += '
count: {0}'.format(root['leaf_count']) if 'leaf_weight' in show_info: label += '
{0} weight'.format(_float2str(root['leaf_weight'], precision)) + if 'leaf_count' in show_info: + label += '
count: {0}'.format(root['leaf_count']) if "data_percentage" in show_info: label += '
{0}% of data'.format(_float2str(root['leaf_count'] / total_count * 100, 2)) label = "<" + label + ">" From b28b6238c9412978d25bf2a521dc639c7591f4cf Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Thu, 8 Aug 2019 10:05:51 +0100 Subject: [PATCH 48/51] Thresholds in bold. --- python-package/lightgbm/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 3bbbe54c0e8a..25cd8e7fe2fd 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -395,7 +395,7 @@ def add(root, total_count, parent=None, decision=None): label = '{0} {1} '.format(feature_names[root['split_feature']], operator) else: label = 'feature {0} {1} '.format(root['split_feature'], operator) - label += '{0}'.format(_float2str(root['threshold'], precision)) + label += '{0}'.format(_float2str(root['threshold'], precision)) for info in show_info: output = info.split('_')[-1] if info in {'split_gain', 'internal_value', 'internal_weight'}: From 4df0a648f7fae1a33c07677b332ba2deab74cf89 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Thu, 8 Aug 2019 10:20:43 +0100 Subject: [PATCH 49/51] Made information in nodes written in a specific order. --- python-package/lightgbm/plotting.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 25cd8e7fe2fd..2debcb7a6c07 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -396,14 +396,15 @@ def add(root, total_count, parent=None, decision=None): else: label = 'feature {0} {1} '.format(root['split_feature'], operator) label += '{0}'.format(_float2str(root['threshold'], precision)) - for info in show_info: - output = info.split('_')[-1] - if info in {'split_gain', 'internal_value', 'internal_weight'}: - label += '
{0} {1}'.format(_float2str(root[info], precision), output) - elif info == 'internal_count': - label += '
{0}: {1}'.format(output, root[info]) - elif info == "data_percentage": - label += '
{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) + for info in ['split_gain', 'internal_value', 'internal_weight', "internal_count", "data_percentage"]: + if info in show_info: + output = info.split('_')[-1] + if info in {'split_gain', 'internal_value', 'internal_weight'}: + label += '
{0} {1}'.format(_float2str(root[info], precision), output) + elif info == 'internal_count': + label += '
{0}: {1}'.format(output, root[info]) + elif info == "data_percentage": + label += '
{0}% of data'.format(_float2str(root['internal_count'] / total_count * 100, 2)) fillcolor = "white" style = "" From 82c6ef0e0cafa86cdf30a30e867678f7ab3cba07 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Fri, 9 Aug 2019 09:20:56 +0100 Subject: [PATCH 50/51] Added information to clarify legend. --- python-package/lightgbm/plotting.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index 2debcb7a6c07..ebaf31ab9fe0 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -444,6 +444,9 @@ def add(root, total_count, parent=None, decision=None): # "#ddffdd" is light green, "#ffdddd" is light red legend = """<
+ + + From 388543b405a8c4d5e963a863ec9123ce83fc94f6 Mon Sep 17 00:00:00 2001 From: Charles Auguste Date: Thu, 5 Sep 2019 13:00:13 +0100 Subject: [PATCH 51/51] Code cleaning. --- python-package/lightgbm/plotting.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index ebaf31ab9fe0..f58c63b3c08e 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -383,9 +383,11 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=3, constraints=N def add(root, total_count, parent=None, decision=None): """Recursively add node or edge.""" if 'split_index' in root: # non-leaf - l_dec, r_dec = 'yes', "no" + l_dec = 'yes' + r_dec = 'no' if root['decision_type'] == '<=': - operator = "≤" + lte_symbol = "≤" + operator = lte_symbol elif root['decision_type'] == '==': operator = "=" else: @@ -523,10 +525,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=3, else: feature_names = None - if 'monotone_constraints' in model: - monotone_constraints = model['monotone_constraints'] - else: - monotone_constraints = None + monotone_constraints = model.get('monotone_constraints', None) if tree_index < len(tree_infos): tree_info = tree_infos[tree_index]
Monotone constraints
Increasing