diff --git a/lib/exgboost/plotting.ex b/lib/exgboost/plotting.ex index 6caec2f..a86c4ac 100644 --- a/lib/exgboost/plotting.ex +++ b/lib/exgboost/plotting.ex @@ -413,13 +413,11 @@ defmodule EXGBoost.Plotting do align: :center, baseline: :middle, font_size: 13, - font: "Calibri", - fill: :black + font: "Calibri" ] @default_leaf_rect [ corner_radius: 2, - fill: :teal, opacity: 1 ] @@ -427,47 +425,38 @@ defmodule EXGBoost.Plotting do align: :center, baseline: :middle, font_size: 13, - font: "Calibri", - fill: :black + font: "Calibri" ] @default_split_rect [ corner_radius: 2, - fill: :teal, opacity: 1 ] @default_split_children [ align: :right, baseline: :middle, - fill: :black, font: "Calibri", font_size: 13 ] - @default_yes_path [ - stroke: :red - ] + @default_yes_path [] @default_yes_text [ align: :center, baseline: :middle, font_size: 13, font: "Calibri", - fill: :black, text: "yes" ] - @default_no_path [ - stroke: :black - ] + @default_no_path [] @default_no_text [ align: :center, baseline: :middle, font_size: 13, font: "Calibri", - fill: :black, text: "no" ] @@ -475,7 +464,7 @@ defmodule EXGBoost.Plotting do style: [ doc: "The style to use for the visualization. Refer to `EXGBoost.Plotting.Styles` for a list of available styles.", - default: Application.compile_env(:exgboost, [:plotting, :style], :solarized_light), + default: Application.compile_env(:exgboost, [:plotting, :style], :dracula), type: {:or, [{:in, Keyword.keys(@styles)}, {:in, [nil, false]}, :keyword_list]} ], rankdir: [ @@ -948,12 +937,13 @@ defmodule EXGBoost.Plotting do opts Keyword.keyword?(opts[:style]) -> - deep_merge_kw(opts, opts[:style]) + deep_merge_kw(opts[:style], opts, @defaults) true -> style = apply(__MODULE__, opts[:style], []) - deep_merge_kw(opts, style) + deep_merge_kw(style, opts, @defaults) end + |> then(&deep_merge_kw(@defaults, &1)) %{ "$schema" => "https://vega.github.io/schema/vega/v5.json", @@ -1278,14 +1268,13 @@ defmodule EXGBoost.Plotting do opts Keyword.keyword?(opts[:style]) -> - deep_merge_kw(opts, opts[:style]) + deep_merge_kw(opts[:style], opts, @defaults) true -> style = apply(__MODULE__, opts[:style], []) - deep_merge_kw(opts, style) + deep_merge_kw(style, opts, @defaults) end - - defaults = get_defaults() + |> then(&deep_merge_kw(@defaults, &1)) # Try to account for non-default node height / width to adjust spacing # between nodes and levels as a quality of life improvement @@ -1293,17 +1282,17 @@ defmodule EXGBoost.Plotting do opts = cond do opts[:rankdir] in [:lr, :rl] and - opts[:space_between][:levels] == defaults[:space_between][:levels] -> + opts[:space_between][:levels] == @defaults[:space_between][:levels] -> put_in( opts[:space_between][:levels], - defaults[:space_between][:levels] + (opts[:node_width] - defaults[:node_width]) + @defaults[:space_between][:levels] + (opts[:node_width] - @defaults[:node_width]) ) opts[:rankdir] in [:tb, :bt] and - opts[:space_between][:levels] == defaults[:space_between][:levels] -> + opts[:space_between][:levels] == @defaults[:space_between][:levels] -> put_in( opts[:space_between][:levels], - defaults[:space_between][:levels] + (opts[:node_height] - defaults[:node_height]) + @defaults[:space_between][:levels] + (opts[:node_height] - @defaults[:node_height]) ) true -> @@ -1313,17 +1302,17 @@ defmodule EXGBoost.Plotting do opts = cond do opts[:rankdir] in [:lr, :rl] and - opts[:space_between][:nodes] == defaults[:space_between][:nodes] -> + opts[:space_between][:nodes] == @defaults[:space_between][:nodes] -> put_in( opts[:space_between][:nodes], - defaults[:space_between][:nodes] + (opts[:node_height] - defaults[:node_height]) + @defaults[:space_between][:nodes] + (opts[:node_height] - @defaults[:node_height]) ) opts[:rankdir] in [:tb, :bt] and - opts[:space_between][:nodes] == defaults[:space_between][:nodes] -> + opts[:space_between][:nodes] == @defaults[:space_between][:nodes] -> put_in( opts[:space_between][:nodes], - defaults[:space_between][:nodes] + (opts[:node_width] - defaults[:node_width]) + @defaults[:space_between][:nodes] + (opts[:node_width] - @defaults[:node_width]) ) true -> @@ -1381,15 +1370,19 @@ defmodule EXGBoost.Plotting do %{ "encode" => %{ "update" => - Map.merge( + deep_merge_maps( %{ "x" => %{ "signal" => - "(scale('xscale', datum.source.x+(nodeWidth/3)) + scale('xscale', datum.target.x)) / 2" + "(scale('xscale', datum.source.x#{cond do + opts[:rankdir] in [:tb, :bt, :lr] -> ~c"-nodeWidth/4" + opts[:rankdir] in [:rl] -> ~c"+nodeWidth/4" + true -> ~c"" + end}) + scale('xscale', datum.target.x)) / 2" }, "y" => %{ "signal" => - "(scale('yscale', datum.source.y) + scale('yscale', datum.target.y)) / 2 - (scaledNodeHeight/2)" + "(scale('yscale', datum.source.y#{if opts[:rankdir] in [:lr, :rl], do: ~c"-nodeWidth/3", else: ~c""}) + scale('yscale', datum.target.y)) / 2 - (scaledNodeHeight/2)" } }, format_mark(opts[:yes][:text]) @@ -1405,7 +1398,11 @@ defmodule EXGBoost.Plotting do %{ "x" => %{ "signal" => - "(scale('xscale', datum.source.x+(nodeWidth/3)) + scale('xscale', datum.target.x)) / 2" + "(scale('xscale', datum.source.x#{cond do + opts[:rankdir] in [:tb, :bt, :lr] -> ~c"-nodeWidth/4" + opts[:rankdir] in [:rl] -> ~c"+nodeWidth/4" + true -> ~c"" + end}) + scale('xscale', datum.target.x)) / 2" }, "y" => %{ "signal" => diff --git a/lib/exgboost/plotting/style.ex b/lib/exgboost/plotting/style.ex index 8019945..d181099 100644 --- a/lib/exgboost/plotting/style.ex +++ b/lib/exgboost/plotting/style.ex @@ -7,13 +7,31 @@ defmodule EXGBoost.Plotting.Style do end end - def deep_merge_kw(a, b) do + def deep_merge_kw(a, b, ignore_set \\ []) do Keyword.merge(a, b, fn _key, val_a, val_b when is_list(val_a) and is_list(val_b) -> deep_merge_kw(val_a, val_b) key, val_a, val_b -> if Keyword.has_key?(b, key) do + if Keyword.has_key?(ignore_set, key) and Keyword.get(ignore_set, key) == val_b do + val_a + else + val_b + end + else + val_a + end + end) + end + + def deep_merge_maps(b, a) do + Map.merge(a, b, fn + _key, val_a, val_b when is_map(val_a) and is_map(val_b) -> + deep_merge_maps(val_a, val_b) + + key, val_a, val_b -> + if Map.has_key?(b, key) do val_b else val_a diff --git a/notebooks/plotting.livemd b/notebooks/plotting.livemd index 9bb4b2e..22a18aa 100644 --- a/notebooks/plotting.livemd +++ b/notebooks/plotting.livemd @@ -113,13 +113,199 @@ booster = `EXGBoost.plot_tree/2` is the quickest way to customize the output of the plot. +This API uses [Vega `Mark`s](https://vega.github.io/vega/docs/marks/) to describe the plot. Each of the following `Mark` options accepts any of the valid keys from their respective `Mark` type as described in the Vega documentation. + +**Please note that these are passed as a `Keyword`, and as such the keys must be atoms rather than strings as the Vega docs show. Valid options for this API are `camel_cased` atoms as opposed to the `pascalCased` strings the Vega docs describe, so if you wish to pass `"fontSize"` as the Vega docs show, you would instead pass it as `font_size:` in this API.** + +The plot is composed of the following parts: + +* Top-level keys: Options controlling parts of the plot outside of direct control of a `Mark`, such as `:padding`, `:autosize`, etc. Accepts any Vega top-level [top-level key](https://vega.github.io/vega/docs/specification/) in addition to several specific to this API (scuh as `:style` and `:depth`). +* `:leaves`: `Mark` specifying the leaf nodes of the tree + * `:text`: [Text Mark](https://vega.github.io/vega/docs/marks/text/) + * `:rect`: [Rect Mark](https://vega.github.io/vega/docs/marks/rect/) +* `:splits` `Mark` specifying the split (or inner / decision) nodes of the tree + * `:text`: [Text Mark](https://vega.github.io/vega/docs/marks/text/) + * `:rect`: [Rect Mark](https://vega.github.io/vega/docs/marks/rect/) + * `:children`: [Text Mark](https://vega.github.io/vega/docs/marks/text/) for the child count +* `:yes` + * `:path`: [Path Mark](https://vega.github.io/vega/docs/marks/path/) + * `:text`: [Text Mark](https://vega.github.io/vega/docs/marks/text/) +* `:no` + * `:path`: [Path Mark](https://vega.github.io/vega/docs/marks/path/) + * `:text`: [Text Mark](https://vega.github.io/vega/docs/marks/text/) + +`EXGBoost.plot_tree/2` defaults to outputting a `VegaLite` struct. If you pass the `:path` option it will save to a file instead. + +If you want to add any marks to the underlying plot you will have to use the lower-level `EXGBoost.Plotting` API, as the top-level API is only capable of customizing these marks. + +### Top-Level Keys + + + +`EXGBoost` supports changing the direction of the plots through the `:rankdir` option. Avaiable directions are `[:tb, :bt, :lr, :rl]`, with top-to-bottom (`:tb`) being the default. + +```elixir +EXGBoost.plot_tree(booster, rankdir: :bt) +``` + +By default, plotting only shows one (the first) tree, but seeing as a `Booster` is really an ensemble of trees you can choose which tree to plot through the `:index` option, or set to `nil` to have a dropdown box to select the tree. + +```elixir +EXGBoost.plot_tree(booster, rankdir: :lr, index: 4) +``` + +You'll also notice that the plot is interactive, with support for scrolling, zooming, and collapsing sections of the tree. If you click on a split node you will toggle the visibility of its descendents, and the rest of the tree will fill the canvas. + +You can also use the `:depth` option to programatically set the max depth to display in the tree: + +```elixir +EXGBoost.plot_tree(booster, rankdir: :lr, index: 4, depth: 3) +``` + +One way to affect the canvas size is by controlling the padding. + +You can add padding to all side by specifying an integer for the `:padding` option + +```elixir +EXGBoost.plot_tree(booster, rankdir: :rl, index: 4, depth: 3, padding: 50) +``` + +Or specify padding for each side: + +```elixir +EXGBoost.plot_tree(booster, + rankdir: :lr, + index: 4, + depth: 3, + padding: [top: 5, bottom: 25, left: 50, right: 10] +) +``` + +You can also specify the canvas size using the `:width` and `:height` options: + +```elixir +EXGBoost.plot_tree(booster, + rankdir: :lr, + index: 4, + depth: 3, + width: 500, + height: 500 +) +``` + +But do note that changing the padding of a canvas does change the size, even if you specify the size using `:height` and `:width` + +```elixir +EXGBoost.plot_tree(booster, + rankdir: :lr, + index: 4, + depth: 3, + width: 500, + height: 500, + padding: 10 +) +``` + +You can change the dimensions of all nodes through the `:node_height` and `:node_width` options: + +```elixir +EXGBoost.plot_tree(booster, rankdir: :lr, index: 4, depth: 3, node_width: 60, node_height: 60) +``` + +Or change the space between nodes using the `:space_between` option. + +**Note that the size of the accompanying nodes and/or text will change to accomodate the new `:space_between` option while trying to maintain the canvas size.** + +```elixir +EXGBoost.plot_tree( + booster, + rankdir: :lr, + index: 4, + depth: 3, + space_between: [nodes: 200] +) +``` + +So if you want to add the space between while not changing the size of the nodes you might need to manually adjust the canvas size: + +```elixir +EXGBoost.plot_tree( + booster, + rankdir: :lr, + index: 4, + depth: 3, + space_between: [nodes: 200], + height: 800 +) +``` + +```elixir +EXGBoost.plot_tree( + booster, + rankdir: :lr, + index: 4, + depth: 3, + space_between: [levels: 200] +) +``` + +### Mark Options + +The options controlling the appearance of individual marks all conform to a similar API. You can refer to the options and pre-defined defaults for a subset of the allowed options, but you can also pass other options so long as they are allowed by the Vega Mark spec (as defined [here](#cell-y5oxrrri4daa6xt5)) + +```elixir +EXGBoost.plot_tree( + booster, + rankdir: :bt, + index: 4, + depth: 3, + space_between: [levels: 200], + yes: [ + text: [font_size: 18, fill: :teal] + ], + no: [ + text: [font_size: 20] + ], + node_width: 100 +) +``` + +Most marks accept an `:opacity` option that you can use to effectively hide the mark: + +```elixir +EXGBoost.plot_tree( + booster, + rankdir: :lr, + index: 4, + depth: 3, + splits: [ + text: [opacity: 0], + rect: [opacity: 0], + children: [opacity: 1] + ] +) +``` + +And `text` marks accept normal text options such as `:fill`, `:font_size`, and `:font`: + +```elixir +EXGBoost.plot_tree( + booster, + node_width: 250, + splits: [ + text: [font: "Helvetica Neue", font_size: 20, fill: "orange"] + ], + space_between: [levels: 20] +) +``` + ### Styles There are a set of provided pre-configured settings for the top-level API that you may optionally use. You can refer to the `EXGBoost.Plottings.Styles` docs to see a gallery of each style in action. You can specify a style with the `:style` option in `EXGBoost.plot_tree/2`. -You can still specify custom settings along with using a style. Most styles only specify a subset of the total possible settings, so you are free to specify any other allowed keys and they will be merged with the style. However, any options set by the style **do** take precedence over options. +You can still specify custom settings along with using a style. Most styles only specify a subset of the total possible settings, but you are free to specify any other allowed keys and they will be merged with the style. Any options passed explicitly to the option **does** take precedence over the style options. For example, let's look at the `:solarized_dark` style: @@ -134,28 +320,21 @@ You can see that it defines a background color of `#002b36` but does not restric EXGBoost.plot_tree(booster, style: :solarized_dark, background: "white", height: 200) ``` -We specified both `:background` and `:height` here, but only `:height` was changed because it was not specified in the style. +We specified both `:background` and `:height` here, and the background specified in the option supercedes the one from the style. -If you want to leverage a style but have the flexibility to change something it defines, you can always get the style specification as a `Keyword` which can be passed to `EXGBoost.plot_tree/2` manually, making any needed changes yourself, like so: +You can also always get the style specification as a `Keyword` which can be passed to `EXGBoost.plot_tree/2` manually, making any needed changes yourself, like so: ```elixir custom_style = EXGBoost.Plotting.solarized_dark() |> Keyword.put(:background, "white") EXGBoost.plot_tree(booster, style: custom_style) ``` -The benefits of using a style is you still get to leverage all of the defaults provided by the API. Look at the difference between changing the background as we just did by specifying the style versus using the style as the new `opts` argument: +You can also programatically check which styles are available: ```elixir -custom_style = EXGBoost.Plotting.solarized_dark() |> Keyword.put(:background, "white") -EXGBoost.plot_tree(booster, [style: nil] ++ custom_style) +EXGBoost.Plotting.get_styles() ``` -As you can see, it maintained the pieces that were **EXPLICITLY** set by the style, but lost some of the defaults that improve the plot appearance. - -Obviously, if you wish to specify all parameters yourself, this shouldn't be an issue. - - - ### Configuration You can also set defaults for the top-level API using an `Application` configuration for `EXGBoost` under the `:plotting` key. Since the defaults are collected from your configuration file at compile-time, anything you set during runtime, even if you set it to the Application environment, will not be registered as defaults. @@ -228,3 +407,9 @@ config: ``` **NOTE: When you specify a parameter in the configuration, it is merged with the defaults which is different from runtime behavior.** + +At any point, you can check what your default settings are by using `EXGBoost.Plotting.get_defaults/0` + +```elixir +EXGBoost.Plotting.get_defaults() +```