From 421d2468504e4ac93aa749b0b877d141a8bf28c2 Mon Sep 17 00:00:00 2001 From: DolphinDream Date: Thu, 13 Dec 2018 15:50:13 -0500 Subject: [PATCH] Add node to compute various statistical quantities, currently supporting the following: Sum Sum Of Squares Product Average Geometric Mean Harmonic Mean Standard Deviation Root Mean Square Skewness Kurtosis Minimum Maximum Median Percentile Histogram --- docs/nodes/list_main/statistics.rst | 66 +++++++++ index.md | 1 + nodes/list_main/statistics.py | 188 ++++++++++++++++++++++++++ utils/modules/statistics_functions.py | 121 +++++++++++++++++ 4 files changed, 376 insertions(+) create mode 100644 docs/nodes/list_main/statistics.rst create mode 100644 nodes/list_main/statistics.py create mode 100644 utils/modules/statistics_functions.py diff --git a/docs/nodes/list_main/statistics.rst b/docs/nodes/list_main/statistics.rst new file mode 100644 index 0000000000..c63a29f195 --- /dev/null +++ b/docs/nodes/list_main/statistics.rst @@ -0,0 +1,66 @@ +List Statistics +=============== + +Functionality +------------- + +List Statistics node computes various statistical quantities for the values in a list. + +Inputs +------ + +The **Data** input is expected to be a list of integers / floats or list of lists of integers / floats. +All inputs are vectorized. + +Parameters +---------- + +The **Function** parameter allows to select the statistical function to compute the corresponding statistical quantity for the input values. + ++----------------+---------------------+---------+------------------------------------------+ +| Param | Type | Default | Description | ++================+=====================+=========+==========================================+ +| **Function** | Enum | Average | The statistical function applied to | +| | All Statistics | | the input values. | +| | Sum | | | +| | Sum Of Squares | | For "All Statistics" selection the node | +| | Product | | computes and outputs the statistical | +| | Average | | quantities for all the statistical | +| | Geometric Mean | | functions along with their corresponding | +| | Harmonic Mean | | names. | +| | Standard Deviation | | | +| | Root Mean Square | | | +| | Skewness | | | +| | Kurtosis | | | +| | Minimum | | | +| | Maximum | | | +| | Median | | | +| | Percentile | | | +| | Histogram | | | ++----------------+---------------------+---------+------------------------------------------+ +| **Percentage** | Float | 0.75 | The percentage value for the | +| | | | percentile function. [1] | ++----------------+---------------------+---------+------------------------------------------+ +| **Normalize** | Boolean | False | Flag to normalize the histogram bins | +| | | | to the given normalize size. [2] | ++----------------+---------------------+---------+------------------------------------------+ +| **Bins** | Int | 10 | The number of bins in the histogram. [2] | ++----------------+---------------------+---------+------------------------------------------+ +| **Size** | Float | 10.00 | The normalized size of the histogram.[2] | ++----------------+---------------------+---------+------------------------------------------+ + +Notes: +[1] : The **Percentage** input socket is available only for the **Percentile** function. +[2] : The **Normalize** setting and the **Bins** and **Size** input sockets are available only for the **Histogram** function. + +Outputs +------- +**Name(s)** +The name(s) of the statistical value(s) computed corresponding to the selected statistical function. + +**Value(s)** +The statistical quantity of the input values corresponding to the selected function. For a vectorized input the output values are a series of quantities corresponding to the selected function. + +When "All Statistics" is selected the **Names** and **Values** outputs will list the names and the corresponding values for all the statistical functions. + + diff --git a/index.md b/index.md index 12c4a67e88..eadce32285 100644 --- a/index.md +++ b/index.md @@ -146,6 +146,7 @@ ListMatchNode ListFuncNode SvListDecomposeNode + SvListStatisticsNode ## List Struct ShiftNodeMK2 diff --git a/nodes/list_main/statistics.py b/nodes/list_main/statistics.py new file mode 100644 index 0000000000..86b033d17b --- /dev/null +++ b/nodes/list_main/statistics.py @@ -0,0 +1,188 @@ +# ##### BEGIN GPL LICENSE BLOCK ##### +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# ##### END GPL LICENSE BLOCK ##### + +import bpy +from bpy.props import EnumProperty, IntProperty, FloatProperty, BoolProperty + +from sverchok.node_tree import SverchCustomTreeNode +from sverchok.data_structure import updateNode, match_long_repeat +from sverchok.utils.modules.statistics_functions import * + +functions = { + "ALL STATISTICS": (0, 0), + "SUM": (10, get_sum), + "SUM OF SQUARES": (11, get_sum_of_squares), + "SUM OF INVERSIONS": (12, get_sum_of_inversions), + "PRODUCT": (13, get_product), + "AVERAGE": (14, get_average), + "GEOMETRIC MEAN": (15, get_geometric_mean), + "HARMONIC MEAN": (16, get_harmonic_mean), + "STANDARD DEVIATION": (17, get_standard_deviation), + "ROOT MEAN SQUARE": (18, get_root_mean_square), + "SKEWNESS": (19, get_skewness), + "KURTOSIS": (20, get_kurtosis), + "MINIMUM": (21, get_minimum), + "MAXIMUM": (22, get_maximum), + "MEDIAN": (23, get_median), + "PERCENTILE": (24, get_percentile), + "HISTOGRAM": (25, get_histogram) +} + + +modeItems = [ + ("INT", "Int", "", "", 0), + ("FLOAT", "Float", "", "", 1)] + +functionItems = [(k, k.title(), "", "", s[0]) for k, s in sorted(functions.items(), key=lambda k: k[1][0])] + + +class SvListStatisticsNode(bpy.types.Node, SverchCustomTreeNode): + ''' + Triggers: Sum, Avg, Min, Max + Tooltip: Statistical quantities: sum, average, standard deviation, min, max, product... + ''' + bl_idname = 'SvListStatisticsNode' + bl_label = 'List Statistics' + bl_icon = 'OUTLINER_OB_EMPTY' + + def update_function(self, context): + if self.function == "ALL STATISTICS": + self.inputs["Percentage"].hide_safe = False + self.inputs["Bins"].hide_safe = False + self.inputs["Size"].hide_safe = not self.normalize + self.outputs[0].name = "Names" + self.outputs[1].name = "Values" + else: + for name in ["Percentage", "Bins", "Size"]: + self.inputs[name].hide_safe = True + if self.function == "PERCENTILE": + self.inputs["Percentage"].hide_safe = False + elif self.function == "HISTOGRAM": + self.inputs["Bins"].hide_safe = False + self.inputs["Size"].hide_safe = not self.normalize + + self.outputs[0].name = "Name" + self.outputs[1].name = "Value" + + updateNode(self, context) + + def update_normalize(self, context): + socket = self.inputs["Size"] + socket.hide_safe = not self.normalize + + updateNode(self, context) + + mode = EnumProperty( + name="Mode", items=modeItems, default="FLOAT", update=updateNode) + + function = EnumProperty( + name="Function", items=functionItems, update=update_function) + + percentage = FloatProperty( + name="Percentage", + default=0.75, min=0.0, max=1.0, update=updateNode) + + bins = IntProperty( + name="Bins", + default=10, min=1, update=updateNode) + + normalize = BoolProperty( + name="Normalize", description="Normalize the bins to a normalize size", + default=False, update=update_normalize) + + normalized_size = FloatProperty( + name="Size", description="The normalized size of the bins", + default=10.0, update=updateNode) + + def draw_buttons(self, context, layout): + layout.prop(self, "mode", expand=True) + layout.prop(self, "function", text="") + if self.function in ["HISTOGRAM", "ALL STATISTICS"]: + layout.prop(self, "normalize", toggle=True) + + def sv_init(self, context): + self.width = 150 + self.inputs.new('StringsSocket', "Data") + self.inputs.new('StringsSocket', "Percentage").prop_name = "percentage" + self.inputs.new('StringsSocket', "Bins").prop_name = "bins" + self.inputs.new('StringsSocket', "Size").prop_name = "normalized_size" + self.outputs.new('StringsSocket', "Names") + self.outputs.new('StringsSocket', "Values") + self.function = "AVERAGE" + + def get_statistics_function(self): + return functions[self.function][1] + + def process(self): + outputs = self.outputs + # return if no outputs are connected + if not any(s.is_linked for s in outputs): + return + + inputs = self.inputs + input_D = inputs["Data"].sv_get() + input_P = inputs["Percentage"].sv_get()[0] + input_B = inputs["Bins"].sv_get()[0] + input_S = inputs["Size"].sv_get()[0] + + # sanitize the inputs + input_P = list(map(lambda x: max(0, min(1, x)), input_P)) + input_B = list(map(lambda x: max(1, x), input_B)) + + if self.mode == "INT": + input_P = list(map(lambda x: int(x), input_P)) + + if self.function == "ALL STATISTICS": + functionNames = [fn[0] for fn in functionItems[1:]] + else: + functionNames = [self.function] + + params = match_long_repeat([input_D, input_P, input_B, input_S]) + + allNames = [] + allValues = [] + for functionName in functionNames: + statistics_function = functions[functionName][1] + quantityList = [] + for d, p, b, s in zip(*params): + if functionName == "PERCENTILE": + quantity = statistics_function(d, p) + elif functionName == "HISTOGRAM": + quantity = statistics_function(d, b, self.normalize, s) + else: + quantity = statistics_function(d) + + if functionName != "HISTOGRAM": + if self.mode == "INT": + quantity = int(quantity) + + quantityList.append(quantity) + + allNames.append(functionName) + allValues.append(quantityList) + + outputs[0].sv_set(allNames) + outputs[1].sv_set(allValues) + + +def register(): + bpy.utils.register_class(SvListStatisticsNode) + + +def unregister(): + bpy.utils.unregister_class(SvListStatisticsNode) diff --git a/utils/modules/statistics_functions.py b/utils/modules/statistics_functions.py new file mode 100644 index 0000000000..193c5b61cb --- /dev/null +++ b/utils/modules/statistics_functions.py @@ -0,0 +1,121 @@ +# ##### BEGIN GPL LICENSE BLOCK ##### +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# ##### END GPL LICENSE BLOCK ##### + +from functools import reduce +from math import sqrt, floor +import sys + + +def get_sum(values): + return sum(values) + + +def get_sum_of_squares(values): + return sum([v * v for v in values]) + + +def get_sum_of_inversions(values): + return sum([1.0 / v for v in values]) + + +def get_product(values): + return reduce((lambda x, y: x * y), values) + + +def get_average(values): + return sum(values) / len(values) + + +def get_geometric_mean(values): + return pow(get_product(values), 1.0 / len(values)) + + +def get_harmonic_mean(values): + return len(values) / get_sum_of_inversions(values) + + +def get_standard_deviation(values): + a = get_average(values) + return sqrt(sum([(v - a)**2 for v in values])) + + +def get_root_mean_square(values): + return sqrt(get_sum_of_squares(values) / len(values)) + + +def get_skewness(values): + a = get_average(values) + n = len(values) + s = get_standard_deviation(values) + return sum([(v - a)**3 for v in values]) / n / pow(s, 3) + + +def get_kurtosis(values): + a = get_average(values) + n = len(values) + s = get_standard_deviation(values) + return sum([(v - a)**4 for v in values]) / n / pow(s, 4) + + +def get_minimum(values): + return min(values) + + +def get_maximum(values): + return max(values) + + +def get_median(values): + sortedValues = sorted(values) + index = int(floor(len(values) / 2)) + print("index=", index) + if len(values) % 2 == 0: # even number of values ? => take the average of central values + median = (sortedValues[index - 1] + sortedValues[index]) / 2 + else: # odd number of values ? => take the central value + median = sortedValues[index] + + return median + + +def get_percentile(values, percentage): + sortedValues = sorted(values) + index = int(min(int(floor(len(values) * percentage)), len(values) - 1)) + return sortedValues[index] + + +def get_histogram(values, numBins, normalize=False, normalizedSize=10): + minValue = get_minimum(values) + maxValue = get_maximum(values) + + binSize = max((maxValue - minValue) / numBins, sys.float_info.min) + + # initialize the histogram bins + histogram = [0] * numBins + + # populate the histogram bins + for i in range(len(values)): + binIndex = int(min(int(floor((values[i] - minValue) / binSize)), numBins - 1)) + histogram[binIndex] = histogram[binIndex] + 1 + + # normalize histogram ? + if normalize: + binMax = max(histogram) + for i in range(len(histogram)): + histogram[i] = histogram[i] / binMax * normalizedSize + + return histogram