diff --git a/0.19.0/404.html b/0.19.0/404.html new file mode 100644 index 0000000000..511f8d21a9 --- /dev/null +++ b/0.19.0/404.html @@ -0,0 +1,985 @@ + + + + + + + + + + + + + + + + + + + + + + + River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/CNAME b/0.19.0/CNAME new file mode 100644 index 0000000000..4e958e7a94 --- /dev/null +++ b/0.19.0/CNAME @@ -0,0 +1 @@ +riverml.xyz diff --git a/0.19.0/api/active/EntropySampler/index.html b/0.19.0/api/active/EntropySampler/index.html new file mode 100644 index 0000000000..1ced7015c5 --- /dev/null +++ b/0.19.0/api/active/EntropySampler/index.html @@ -0,0 +1,3502 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EntropySampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EntropySampler

+

Active learning classifier based on entropy measures.

+

The entropy sampler selects samples for labeling based on the entropy of the prediction. The higher the entropy, the more likely the sample will be selected for labeling. The entropy measure is normalized to [0, 1] and then raised to the power of the discount factor.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +

    The classifier to wrap.

    +
  • +
  • +

    discount_factor

    +

    Typefloat

    +

    Default3

    +

    The discount factor to apply to the entropy measure. A value of 1 won't affect the entropy. The higher the discount factor, the more the entropy will be discounted, and the less likely samples will be selected for labeling. A value of 0 will select all samples for labeling. The discount factor is thus a way to control how many samples are selected for labeling.

    +
  • +
  • +

    seed

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Examples

+

from river import active
+from river import datasets
+from river import feature_extraction
+from river import linear_model
+from river import metrics
+
+dataset = datasets.SMSSpam()
+metric = metrics.Accuracy()
+model = (
+    feature_extraction.TFIDF(on='body') |
+    linear_model.LogisticRegression()
+)
+model = active.EntropySampler(model, seed=42)
+
+n_samples_used = 0
+for x, y in dataset:
+    y_pred, ask = model.predict_one(x)
+    metric = metric.update(y, y_pred)
+    if ask:
+        n_samples_used += 1
+        model = model.learn_one(x, y)
+
+metric
+
+
Accuracy: 86.60%
+

+

dataset.n_samples, n_samples_used
+
+
(5574, 1921)
+

+

print(f"{n_samples_used / dataset.n_samples:.2%}")
+
+
34.46%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of x and indicate whether a label is needed.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for x and indicate whether a label is needed.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/active/base/ActiveLearningClassifier/index.html b/0.19.0/api/active/base/ActiveLearningClassifier/index.html new file mode 100644 index 0000000000..2b08db1860 --- /dev/null +++ b/0.19.0/api/active/base/ActiveLearningClassifier/index.html @@ -0,0 +1,3475 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ActiveLearningClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ActiveLearningClassifier

+

Base class for active learning classifiers.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +

    The classifier to wrap.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of x and indicate whether a label is needed.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for x and indicate whether a label is needed.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/GaussianScorer/index.html b/0.19.0/api/anomaly/GaussianScorer/index.html new file mode 100644 index 0000000000..95a0a979f0 --- /dev/null +++ b/0.19.0/api/anomaly/GaussianScorer/index.html @@ -0,0 +1,3561 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GaussianScorer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GaussianScorer

+

Univariate Gaussian anomaly detector.

+

This is a supervised anomaly detector. It fits a Gaussian distribution to the target values. The anomaly score is then computed as so:

+
\[score = 2 \mid CDF(y) - 0.5 \mid\]
+

This makes it so that the anomaly score is between 0 and 1.

+

Parameters

+
    +
  • +

    window_size

    +

    DefaultNone

    +

    Set this to fit the Gaussian distribution over a window of recent values.

    +
  • +
  • +

    grace_period

    +

    Default100

    +

    Number of samples before which a 0 is always returned. This is handy because the Gaussian distribution needs time to stabilize, and will likely produce overly high anomaly score for the first samples.

    +
  • +
+

Examples

+

import random
+from river import anomaly
+
+rng = random.Random(42)
+detector = anomaly.GaussianScorer()
+
+for y in (rng.gauss(0, 1) for _ in range(100)):
+    detector = detector.learn_one(None, y)
+
+detector.score_one(None, -3)
+
+
0.999477...
+

+

detector.score_one(None, 3)
+
+
0.999153...
+

+

detector.score_one(None, 0)
+
+
0.052665...
+

+

detector.score_one(None, 0.5)
+
+
0.383717...
+

+

Methods

+
+learn_one +

Update the model.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
+

Returns

+

SupervisedAnomalyDetector: self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds a normal observation.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
+

Returns

+

float: An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/HalfSpaceTrees/index.html b/0.19.0/api/anomaly/HalfSpaceTrees/index.html new file mode 100644 index 0000000000..61bab12a26 --- /dev/null +++ b/0.19.0/api/anomaly/HalfSpaceTrees/index.html @@ -0,0 +1,3649 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HalfSpaceTrees - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HalfSpaceTrees

+

Half-Space Trees (HST).

+

Half-space trees are an online variant of isolation forests. They work well when anomalies are spread out. However, they do not work well if anomalies are packed together in windows.

+

By default, this implementation assumes that each feature has values that are comprised between 0 and 1. If this isn't the case, then you can manually specify the limits via the limits argument. If you do not know the limits in advance, then you can use a preprocessing.MinMaxScaler as an initial preprocessing step.

+

The current implementation builds the trees the first time the learn_one method is called. Therefore, the first learn_one call might be slow, whereas subsequent calls will be very fast in comparison. In general, the computation time of both learn_one and score_one scales linearly with the number of trees, and exponentially with the height of each tree.

+

Note that high scores indicate anomalies, whereas low scores indicate normal observations.

+

Parameters

+
    +
  • +

    n_trees

    +

    Default10

    +

    Number of trees to use.

    +
  • +
  • +

    height

    +

    Default8

    +

    Height of each tree. Note that a tree of height h is made up of h + 1 levels and therefore contains 2 ** (h + 1) - 1 nodes.

    +
  • +
  • +

    window_size

    +

    Default250

    +

    Number of observations to use for calculating the mass at each node in each tree.

    +
  • +
  • +

    limits

    +

    Typedict[base.typing.FeatureName, tuple[float, float]] | None

    +

    DefaultNone

    +

    Specifies the range of each feature. By default each feature is assumed to be in range [0, 1].

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number seed.

    +
  • +
+

Attributes

+
    +
  • +

    size_limit

    +

    This is the threshold under which the node search stops during the scoring phase. The value .1 is a magic constant indicated in the original paper.

    +
  • +
+

Examples

+

from river import anomaly
+
+X = [0.5, 0.45, 0.43, 0.44, 0.445, 0.45, 0.0]
+hst = anomaly.HalfSpaceTrees(
+    n_trees=5,
+    height=3,
+    window_size=3,
+    seed=42
+)
+
+for x in X[:3]:
+    hst = hst.learn_one({'x': x})  # Warming up
+
+for x in X:
+    features = {'x': x}
+    hst = hst.learn_one(features)
+    print(f'Anomaly score for x={x:.3f}: {hst.score_one(features):.3f}')
+
+
Anomaly score for x=0.500: 0.107
+Anomaly score for x=0.450: 0.071
+Anomaly score for x=0.430: 0.107
+Anomaly score for x=0.440: 0.107
+Anomaly score for x=0.445: 0.107
+Anomaly score for x=0.450: 0.071
+Anomaly score for x=0.000: 0.853
+

+

The feature values are all comprised between 0 and 1. This is what is assumed by the model +by default. In the following example, we construct a pipeline that scales the data online +and ensures that the values of each feature are comprised between 0 and 1.

+

from river import compose
+from river import datasets
+from river import metrics
+from river import preprocessing
+
+model = compose.Pipeline(
+    preprocessing.MinMaxScaler(),
+    anomaly.HalfSpaceTrees(seed=42)
+)
+
+auc = metrics.ROCAUC()
+
+for x, y in datasets.CreditCard().take(2500):
+    score = model.score_one(x)
+    model = model.learn_one(x)
+    auc = auc.update(y, score)
+
+auc
+
+
ROCAUC: 91.15%
+

+

You can also use the evaluate.progressive_val_score function to evaluate the model on a +data stream.

+

from river import evaluate
+
+model = model.clone()
+
+evaluate.progressive_val_score(
+    dataset=datasets.CreditCard().take(2500),
+    model=model,
+    metric=metrics.ROCAUC(),
+    print_every=1000
+)
+
+
[1,000] ROCAUC: 88.43%
+[2,000] ROCAUC: 89.28%
+[2,500] ROCAUC: 91.15%
+ROCAUC: 91.15%
+

+

Methods

+
+learn_one +

Update the model.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

AnomalyDetector: self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds to a normal observation.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

float: An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/OneClassSVM/index.html b/0.19.0/api/anomaly/OneClassSVM/index.html new file mode 100644 index 0000000000..31ab90d50a --- /dev/null +++ b/0.19.0/api/anomaly/OneClassSVM/index.html @@ -0,0 +1,3613 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OneClassSVM - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OneClassSVM

+

One-class SVM for anomaly detection.

+

This is a stochastic implementation of the one-class SVM algorithm, and will not exactly match its batch formulation.

+

It is encouraged to scale the data upstream with preprocessing.StandardScaler, as well as use feature_extraction.RBFSampler to capture non-linearities.

+

Parameters

+
    +
  • +

    nu

    +

    Default0.1

    +

    An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. You can think of it as the expected fraction of anomalies.

    +
  • +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the weights.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. A optim.schedulers.Constant is used if a float is provided. The intercept is not updated when this is set to 0.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    initializer

    +

    Typeoptim.base.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme.

    +
  • +
+

Attributes

+
    +
  • weights
  • +
+

Examples

+

from river import anomaly
+from river import compose
+from river import datasets
+from river import metrics
+from river import preprocessing
+
+model = anomaly.QuantileFilter(
+    anomaly.OneClassSVM(nu=0.2),
+    q=0.995
+)
+
+auc = metrics.ROCAUC()
+
+for x, y in datasets.CreditCard().take(2500):
+    score = model.score_one(x)
+    is_anomaly = model.classify(score)
+    model = model.learn_one(x)
+    auc = auc.update(y, is_anomaly)
+
+auc
+
+
ROCAUC: 74.68%
+

+

You can also use the evaluate.progressive_val_score function to evaluate the model on a +data stream.

+

from river import evaluate
+
+model = model.clone()
+
+evaluate.progressive_val_score(
+    dataset=datasets.CreditCard().take(2500),
+    model=model,
+    metric=metrics.ROCAUC(),
+    print_every=1000
+)
+
+
[1,000] ROCAUC: 74.40%
+[2,000] ROCAUC: 74.60%
+[2,500] ROCAUC: 74.68%
+ROCAUC: 74.68%
+

+

Methods

+
+learn_many +
+
+learn_one +

Update the model.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

AnomalyDetector: self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds to a normal observation.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/QuantileFilter/index.html b/0.19.0/api/anomaly/QuantileFilter/index.html new file mode 100644 index 0000000000..4551fbcdb2 --- /dev/null +++ b/0.19.0/api/anomaly/QuantileFilter/index.html @@ -0,0 +1,3601 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + QuantileFilter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

QuantileFilter

+

Threshold anomaly filter.

+

Parameters

+
    +
  • +

    anomaly_detector

    +

    An anomaly detector.

    +
  • +
  • +

    q

    +

    Typefloat

    +

    The quantile level above which to classify an anomaly score as anomalous.

    +
  • +
  • +

    protect_anomaly_detector

    +

    DefaultTrue

    +

    Indicates whether or not the anomaly detector should be updated when the anomaly score is anomalous. If the data contains sporadic anomalies, then the anomaly detector should likely not be updated. Indeed, if it learns the anomaly score, then it will slowly start to consider anomalous anomaly scores as normal. This might be desirable, for instance in the case of drift.

    +
  • +
+

Attributes

+
    +
  • q
  • +
+

Examples

+

from river import anomaly
+from river import compose
+from river import datasets
+from river import metrics
+from river import preprocessing
+
+model = compose.Pipeline(
+    preprocessing.MinMaxScaler(),
+    anomaly.QuantileFilter(
+        anomaly.HalfSpaceTrees(seed=42),
+        q=0.95
+    )
+)
+
+report = metrics.ClassificationReport()
+
+for x, y in datasets.CreditCard().take(2000):
+    score = model.score_one(x)
+    is_anomaly = model['QuantileFilter'].classify(score)
+    model = model.learn_one(x)
+    report = report.update(y, is_anomaly)
+
+report
+
+
               Precision   Recall   F1       Support
+<BLANKLINE>
+       0      99.95%   94.49%   97.14%      1998
+       1       0.90%   50.00%    1.77%         2
+<BLANKLINE>
+   Macro      50.42%   72.25%   49.46%
+   Micro      94.45%   94.45%   94.45%
+Weighted      99.85%   94.45%   97.05%
+<BLANKLINE>
+                 94.45% accuracy
+

+

Methods

+
+classify +

Classify an anomaly score as anomalous or not.

+

Parameters

+
    +
  • score'float'
  • +
+

Returns

+

bool: A boolean value indicating whether the anomaly score is anomalous or not.

+
+

+
+learn_one +

Update the anomaly filter and the underlying anomaly detector.

+

Parameters

+
    +
  • args
  • +
  • learn_kwargs
  • +
+

Returns

+

self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds to a normal observation.

+

Parameters

+
    +
  • args
  • +
  • kwargs
  • +
+

Returns

+

An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/ThresholdFilter/index.html b/0.19.0/api/anomaly/ThresholdFilter/index.html new file mode 100644 index 0000000000..8f927f240c --- /dev/null +++ b/0.19.0/api/anomaly/ThresholdFilter/index.html @@ -0,0 +1,3608 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ThresholdFilter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ThresholdFilter

+

Threshold anomaly filter.

+

Parameters

+
    +
  • +

    anomaly_detector

    +

    An anomaly detector.

    +
  • +
  • +

    threshold

    +

    Typefloat

    +

    A threshold above which to classify an anomaly score as anomalous.

    +
  • +
  • +

    protect_anomaly_detector

    +

    DefaultTrue

    +

    Indicates whether or not the anomaly detector should be updated when the anomaly score is anomalous. If the data contains sporadic anomalies, then the anomaly detector should likely not be updated. Indeed, if it learns the anomaly score, then it will slowly start to consider anomalous anomaly scores as normal. This might be desirable, for instance in the case of drift.

    +
  • +
+

Examples

+

Anomaly filters can be used as part of a pipeline. For instance, we might want to filter out +anomalous observations so as not to corrupt a supervised model. As an example, let's take +the datasets.WaterFlow dataset. Some of the samples have anomalous target variables because +of human interventions. We don't want our model to learn these values.

+

from river import datasets
+from river import metrics
+from river import time_series
+
+dataset = datasets.WaterFlow()
+metric = metrics.SMAPE()
+
+period = 24  # 24 samples per day
+
+model = (
+    anomaly.ThresholdFilter(
+        anomaly.GaussianScorer(
+            window_size=period * 7,  # 7 days
+            grace_period=30
+        ),
+        threshold=0.995
+    ) |
+    time_series.HoltWinters(
+        alpha=0.3,
+        beta=0.1,
+        multiplicative=False
+    )
+)
+
+time_series.evaluate(
+    dataset,
+    model,
+    metric,
+    horizon=period
+)
+
+
+1  SMAPE: 4.220171
++2  SMAPE: 4.322648
++3  SMAPE: 4.418546
++4  SMAPE: 4.504986
++5  SMAPE: 4.57924
++6  SMAPE: 4.64123
++7  SMAPE: 4.694042
++8  SMAPE: 4.740753
++9  SMAPE: 4.777291
++10 SMAPE: 4.804558
++11 SMAPE: 4.828114
++12 SMAPE: 4.849823
++13 SMAPE: 4.865871
++14 SMAPE: 4.871972
++15 SMAPE: 4.866274
++16 SMAPE: 4.842614
++17 SMAPE: 4.806214
++18 SMAPE: 4.763355
++19 SMAPE: 4.713455
++20 SMAPE: 4.672062
++21 SMAPE: 4.659102
++22 SMAPE: 4.693496
++23 SMAPE: 4.773707
++24 SMAPE: 4.880654
+

+

Methods

+
+classify +

Classify an anomaly score as anomalous or not.

+

Parameters

+
    +
  • score'float'
  • +
+

Returns

+

bool: A boolean value indicating whether the anomaly score is anomalous or not.

+
+

+
+learn_one +

Update the anomaly filter and the underlying anomaly detector.

+

Parameters

+
    +
  • args
  • +
  • learn_kwargs
  • +
+

Returns

+

self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds to a normal observation.

+

Parameters

+
    +
  • args
  • +
  • kwargs
  • +
+

Returns

+

An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/base/AnomalyDetector/index.html b/0.19.0/api/anomaly/base/AnomalyDetector/index.html new file mode 100644 index 0000000000..691453ff95 --- /dev/null +++ b/0.19.0/api/anomaly/base/AnomalyDetector/index.html @@ -0,0 +1,3557 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AnomalyDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AnomalyDetector

+

An anomaly detector.

+

Methods

+
+learn_one +

Update the model.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

AnomalyDetector: self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds to a normal observation.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

float: An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/base/AnomalyFilter/index.html b/0.19.0/api/anomaly/base/AnomalyFilter/index.html new file mode 100644 index 0000000000..59570dba1b --- /dev/null +++ b/0.19.0/api/anomaly/base/AnomalyFilter/index.html @@ -0,0 +1,3598 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AnomalyFilter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AnomalyFilter

+

Anomaly filter base class.

+

An anomaly filter has the ability to classify an anomaly score as anomalous or not. It can then be used to filter anomalies, in particular as part of a pipeline.

+

Parameters

+
    +
  • +

    anomaly_detector

    +

    TypeAnomalyDetector

    +

    An anomaly detector wrapped by the anomaly filter.

    +
  • +
  • +

    protect_anomaly_detector

    +

    DefaultTrue

    +

    Indicates whether or not the anomaly detector should be updated when the anomaly score is anomalous. If the data contains sporadic anomalies, then the anomaly detector should likely not be updated. Indeed, if it learns the anomaly score, then it will slowly start to consider anomalous anomaly scores as normal. This might be desirable, for instance in the case of drift.

    +
  • +
+

Methods

+
+classify +

Classify an anomaly score as anomalous or not.

+

Parameters

+
    +
  • score'float'
  • +
+

Returns

+

bool: A boolean value indicating whether the anomaly score is anomalous or not.

+
+

+
+learn_one +

Update the anomaly filter and the underlying anomaly detector.

+

Parameters

+
    +
  • args
  • +
  • learn_kwargs
  • +
+

Returns

+

self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds to a normal observation.

+

Parameters

+
    +
  • args
  • +
  • kwargs
  • +
+

Returns

+

An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/anomaly/base/SupervisedAnomalyDetector/index.html b/0.19.0/api/anomaly/base/SupervisedAnomalyDetector/index.html new file mode 100644 index 0000000000..a10695dd2d --- /dev/null +++ b/0.19.0/api/anomaly/base/SupervisedAnomalyDetector/index.html @@ -0,0 +1,3559 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SupervisedAnomalyDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SupervisedAnomalyDetector

+

A supervised anomaly detector.

+

Methods

+
+learn_one +

Update the model.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
+

Returns

+

SupervisedAnomalyDetector: self

+
+

+
+score_one +

Return an outlier score.

+

A high score is indicative of an anomaly. A low score corresponds a normal observation.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
+

Returns

+

float: An anomaly score. A high score is indicative of an anomaly. A low score corresponds a

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/BayesUCB/index.html b/0.19.0/api/bandit/BayesUCB/index.html new file mode 100644 index 0000000000..e8c02ff324 --- /dev/null +++ b/0.19.0/api/bandit/BayesUCB/index.html @@ -0,0 +1,3756 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BayesUCB - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BayesUCB

+

Bayes-UCB bandit policy.

+

Bayes-UCB is a Bayesian algorithm for the multi-armed bandit problem. It uses the posterior distribution of the reward of each arm to compute an upper confidence bound (UCB) on the expected reward of each arm. The arm with the highest UCB is then pulled. The posterior distribution is updated after each pull. The algorithm is described in [^1].

+

Parameters

+
    +
  • +

    reward_obj

    +

    DefaultNone

    +

    The reward object that is used to update the posterior distribution.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    Number of initial observations per arm before using the posterior distribution.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+from river import proba
+from river import stats
+
+env = gym.make(
+    'river_bandits/CandyCaneContest-v0'
+)
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+policy = bandit.BayesUCB(seed=123)
+
+metric = stats.Sum()
+while True:
+    action = policy.pull(range(env.action_space.n))
+    observation, reward, terminated, truncated, info = env.step(action)
+    policy = policy.update(action, reward)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 841.
+

+

Methods

+
+compute_index +

the p-th quantile of the beta distribution for the arm

+

Parameters

+
    +
  • arm_id
  • +
+
+

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Rewrite update function

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/EpsilonGreedy/index.html b/0.19.0/api/bandit/EpsilonGreedy/index.html new file mode 100644 index 0000000000..16d96d0cf5 --- /dev/null +++ b/0.19.0/api/bandit/EpsilonGreedy/index.html @@ -0,0 +1,3769 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EpsilonGreedy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EpsilonGreedy

+

\(\varepsilon\)-greedy bandit policy.

+

Performs arm selection by using an \(\varepsilon\)-greedy bandit strategy. An arm is selected at each step. The best arm is selected (1 - \(\varepsilon\))% of the time.

+

Selection bias is a common problem when using bandits. This bias can be mitigated by using burn-in phase. Each model is given the chance to learn during the first burn_in steps.

+

Parameters

+
    +
  • +

    epsilon

    +

    Typefloat

    +

    The probability of exploring.

    +
  • +
  • +

    decay

    +

    Default0.0

    +

    The decay rate of epsilon.

    +
  • +
  • +

    reward_obj

    +

    DefaultNone

    +

    The reward object used to measure the performance of each arm. This can be a metric, a statistic, or a distribution.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of steps to use for the burn-in phase. Each arm is given the chance to be pulled during the burn-in phase. This is useful to mitigate selection bias.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    current_epsilon

    +

    The value of epsilon after factoring in the decay rate.

    +
  • +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+from river import stats
+
+env = gym.make(
+    'river_bandits/CandyCaneContest-v0'
+)
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+policy = bandit.EpsilonGreedy(epsilon=0.9, seed=101)
+
+metric = stats.Sum()
+while True:
+    arm = policy.pull(range(env.action_space.n))
+    observation, reward, terminated, truncated, info = env.step(arm)
+    policy = policy.update(arm, reward)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 775.
+

+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/Exp3/index.html b/0.19.0/api/bandit/Exp3/index.html new file mode 100644 index 0000000000..c0106daee5 --- /dev/null +++ b/0.19.0/api/bandit/Exp3/index.html @@ -0,0 +1,3768 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Exp3 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Exp3

+

Exp3 bandit policy.

+

This policy works by maintaining a weight for each arm. These weights are used to randomly decide which arm to pull. The weights are increased or decreased, depending on the reward. An egalitarianism factor \(\gamma \in [0, 1]\) is included, to tune the desire to pick an arm uniformly at random. That is, if \(\gamma = 1\), the arms are picked uniformly at random.

+

Parameters

+
    +
  • +

    gamma

    +

    Typefloat

    +

    The egalitarianism factor. Setting this to 0 leads to what is called the EXP3 policy.

    +
  • +
  • +

    reward_obj

    +

    DefaultNone

    +

    The reward object used to measure the performance of each arm. This can be a metric, a statistic, or a distribution.

    +
  • +
  • +

    reward_scaler

    +

    DefaultNone

    +

    A reward scaler used to scale the rewards before they are fed to the reward object. This can be useful to scale the rewards to a (0, 1) range for instance.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of steps to use for the burn-in phase. Each arm is given the chance to be pulled during the burn-in phase. This is useful to mitigate selection bias.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+from river import proba
+from river import stats
+
+env = gym.make(
+    'river_bandits/CandyCaneContest-v0'
+)
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+policy = bandit.Exp3(gamma=0.5, seed=42)
+
+metric = stats.Sum()
+while True:
+    action = policy.pull(range(env.action_space.n))
+    observation, reward, terminated, truncated, info = env.step(action)
+    policy = policy.update(action, reward)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 799.
+

+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/LinUCBDisjoint/index.html b/0.19.0/api/bandit/LinUCBDisjoint/index.html new file mode 100644 index 0000000000..825041d5b0 --- /dev/null +++ b/0.19.0/api/bandit/LinUCBDisjoint/index.html @@ -0,0 +1,3736 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LinUCBDisjoint - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LinUCBDisjoint

+

LinUCB, disjoint variant.

+

Although it works, as of yet it is too slow to realistically be used in practice.

+

The way this works is that each arm is assigned a linear_model.BayesianLinearRegression instance. This instance is updated every time the arm is pulled. The context is used as features for the regression. The reward is used as the target. The posterior distribution is used to compute the upper confidence bound. The arm with the highest upper confidence bound is pulled.

+

Parameters

+
    +
  • +

    alpha

    +

    Typefloat

    +

    Default1.0

    +

    Parameter used in each Bayesian linear regression.

    +
  • +
  • +

    beta

    +

    Typefloat

    +

    Default1.0

    +

    Parameter used in each Bayesian linear regression.

    +
  • +
  • +

    smoothing

    +

    Typefloat | None

    +

    DefaultNone

    +

    Parameter used in each Bayesian linear regression.

    +
  • +
  • +

    reward_obj

    +

    DefaultNone

    +

    The reward object used to measure the performance of each arm.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of time steps during which each arm is pulled once.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
  • context'dict' — defaults to None
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Rewrite update function

+

Parameters

+
    +
  • arm_id
  • +
  • context
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/RandomPolicy/index.html b/0.19.0/api/bandit/RandomPolicy/index.html new file mode 100644 index 0000000000..63b3cb293b --- /dev/null +++ b/0.19.0/api/bandit/RandomPolicy/index.html @@ -0,0 +1,3747 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomPolicy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomPolicy

+

Random bandit policy.

+

This policy simply pulls a random arm at each time step. It is useful as a baseline.

+

Parameters

+
    +
  • +

    reward_obj

    +

    DefaultNone

    +

    The reward object that is used to update the posterior distribution.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    Number of initial observations per arm before using the posterior distribution.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+from river import proba
+from river import stats
+
+env = gym.make(
+    'river_bandits/CandyCaneContest-v0'
+)
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+policy = bandit.RandomPolicy(seed=123)
+
+metric = stats.Sum()
+while True:
+    action = policy.pull(range(env.action_space.n))
+    observation, reward, terminated, truncated, info = env.step(action)
+    policy = policy.update(action, reward)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 755.
+

+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/ThompsonSampling/index.html b/0.19.0/api/bandit/ThompsonSampling/index.html new file mode 100644 index 0000000000..66e5babf7c --- /dev/null +++ b/0.19.0/api/bandit/ThompsonSampling/index.html @@ -0,0 +1,3762 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ThompsonSampling - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ThompsonSampling

+

Thompson sampling.

+

Thompson sampling is often used with a Beta distribution. However, any probability distribution can be used, as long it makes sense with the reward shape. For instance, a Beta distribution is meant to be used with binary rewards, while a Gaussian distribution is meant to be used with continuous rewards.

+

The randomness of a distribution is controlled by its seed. The seed should not set within the distribution, but should rather be defined in the policy parametrization. In other words, you should do this:

+
policy = ThompsonSampling(dist=proba.Beta(1, 1), seed=42) 
+
+

and not this:

+
policy = ThompsonSampling(dist=proba.Beta(1, 1, seed=42)) 
+
+

Parameters

+
    +
  • +

    reward_obj

    +

    Typeproba.base.Distribution

    +

    DefaultNone

    +

    A distribution to sample from.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of steps to use for the burn-in phase. Each arm is given the chance to be pulled during the burn-in phase. This is useful to mitigate selection bias.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+from river import proba
+from river import stats
+
+env = gym.make(
+    'river_bandits/CandyCaneContest-v0'
+)
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+policy = bandit.ThompsonSampling(reward_obj=proba.Beta(), seed=101)
+
+metric = stats.Sum()
+while True:
+    arm = policy.pull(range(env.action_space.n))
+    observation, reward, terminated, truncated, info = env.step(arm)
+    policy = policy.update(arm, reward)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 820.
+

+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/UCB/index.html b/0.19.0/api/bandit/UCB/index.html new file mode 100644 index 0000000000..42c903c16f --- /dev/null +++ b/0.19.0/api/bandit/UCB/index.html @@ -0,0 +1,3775 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UCB - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

UCB

+

Upper Confidence Bound (UCB) bandit policy.

+

Due to the nature of this algorithm, it's recommended to scale the target so that it exhibits sub-gaussian properties. This can be done by passing a preprocessing.TargetStandardScaler instance to the reward_scaler argument.

+

Parameters

+
    +
  • +

    delta

    +

    Typefloat

    +

    The confidence level. Setting this to 1 leads to what is called the UCB1 policy.

    +
  • +
  • +

    reward_obj

    +

    DefaultNone

    +

    The reward object used to measure the performance of each arm. This can be a metric, a statistic, or a distribution.

    +
  • +
  • +

    reward_scaler

    +

    DefaultNone

    +

    A reward scaler used to scale the rewards before they are fed to the reward object. This can be useful to scale the rewards to a (0, 1) range for instance.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of steps to use for the burn-in phase. Each arm is given the chance to be pulled during the burn-in phase. This is useful to mitigate selection bias.

    +
  • +
  • +

    seed

    +

    Typeint

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+from river import preprocessing
+from river import stats
+
+env = gym.make(
+    'river_bandits/CandyCaneContest-v0'
+)
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+policy = bandit.UCB(
+    delta=100,
+    reward_scaler=preprocessing.TargetStandardScaler(None),
+    seed=42
+)
+
+metric = stats.Sum()
+while True:
+    arm = policy.pull(range(env.action_space.n))
+    observation, reward, terminated, truncated, info = env.step(arm)
+    policy = policy.update(arm, reward)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 744.
+

+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/base/ContextualPolicy/index.html b/0.19.0/api/bandit/base/ContextualPolicy/index.html new file mode 100644 index 0000000000..9c1eeed0b6 --- /dev/null +++ b/0.19.0/api/bandit/base/ContextualPolicy/index.html @@ -0,0 +1,3756 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContextualPolicy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ContextualPolicy

+

Contextual bandit policy base class.

+

Parameters

+
    +
  • +

    reward_obj

    +

    TypeRewardObj | None

    +

    DefaultNone

    +

    The reward object used to measure the performance of each arm. This can be a metric, a statistic, or a distribution.

    +
  • +
  • +

    reward_scaler

    +

    Typecompose.TargetTransformRegressor | None

    +

    DefaultNone

    +

    A reward scaler used to scale the rewards before they are fed to the reward object. This can be useful to scale the rewards to a (0, 1) range for instance.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of steps to use for the burn-in phase. Each arm is given the chance to be pulled during the burn-in phase. This is useful to mitigate selection bias.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
  • context'dict' — defaults to None
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • context
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/base/Policy/index.html b/0.19.0/api/bandit/base/Policy/index.html new file mode 100644 index 0000000000..cc4ee22631 --- /dev/null +++ b/0.19.0/api/bandit/base/Policy/index.html @@ -0,0 +1,3754 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Policy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Policy

+

Bandit policy base class.

+

Parameters

+
    +
  • +

    reward_obj

    +

    TypeRewardObj | None

    +

    DefaultNone

    +

    The reward object used to measure the performance of each arm. This can be a metric, a statistic, or a distribution.

    +
  • +
  • +

    reward_scaler

    +

    Typecompose.TargetTransformRegressor | None

    +

    DefaultNone

    +

    A reward scaler used to scale the rewards before they are fed to the reward object. This can be useful to scale the rewards to a (0, 1) range for instance.

    +
  • +
  • +

    burn_in

    +

    Default0

    +

    The number of steps to use for the burn-in phase. Each arm is given the chance to be pulled during the burn-in phase. This is useful to mitigate selection bias.

    +
  • +
+

Attributes

+
    +
  • +

    ranking

    +

    Return the list of arms in descending order of performance.

    +
  • +
+

Methods

+
+pull +

Pull arm(s).

+

This method is a generator that yields the arm(s) that should be pulled. During the burn-in phase, all the arms that have not been pulled enough times are yielded. Once the burn-in phase is over, the policy is allowed to choose the arm(s) that should be pulled. If you only want to pull one arm at a time during the burn-in phase, simply call next(policy.pull(arms)).

+

Parameters

+
    +
  • arm_ids'list[ArmID]'
  • +
+

Returns

+

ArmID: A single arm.

+
+

+
+update +

Update an arm's state.

+

Parameters

+
    +
  • arm_id
  • +
  • reward_args
  • +
  • reward_kwargs
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/datasets/BanditDataset/index.html b/0.19.0/api/bandit/datasets/BanditDataset/index.html new file mode 100644 index 0000000000..1ae96478b6 --- /dev/null +++ b/0.19.0/api/bandit/datasets/BanditDataset/index.html @@ -0,0 +1,3751 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BanditDataset - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BanditDataset

+

Base class for bandit datasets.

+

Parameters

+
    +
  • +

    n_features

    +

    Number of features in the dataset.

    +
  • +
  • +

    n_samples

    +

    DefaultNone

    +

    Number of samples in the dataset.

    +
  • +
  • +

    n_classes

    +

    DefaultNone

    +

    Number of classes in the dataset, only applies to classification datasets.

    +
  • +
  • +

    n_outputs

    +

    DefaultNone

    +

    Number of outputs the target is made of, only applies to multi-output datasets.

    +
  • +
  • +

    sparse

    +

    DefaultFalse

    +

    Whether the dataset is sparse or not.

    +
  • +
+

Attributes

+
    +
  • +

    arms

    +

    The list of arms that can be pulled.

    +
  • +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/datasets/NewsArticles/index.html b/0.19.0/api/bandit/datasets/NewsArticles/index.html new file mode 100644 index 0000000000..673e365b35 --- /dev/null +++ b/0.19.0/api/bandit/datasets/NewsArticles/index.html @@ -0,0 +1,3760 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NewsArticles - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

NewsArticles

+

News articles bandit dataset.

+

This is a personalization dataset. It contains 10000 observations. There are 10 arms, and the reward is binary. There are 100 features, which turns this into a contextual bandit problem.

+

Attributes

+
    +
  • +

    arms

    +

    The list of arms that can be pulled.

    +
  • +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Examples

+

from river import bandit
+
+dataset = bandit.datasets.NewsArticles()
+context, arm, reward = next(iter(dataset))
+
+len(context)
+
+
100
+

+

arm, reward
+
+
(2, False)
+

+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/envs/CandyCaneContest/index.html b/0.19.0/api/bandit/envs/CandyCaneContest/index.html new file mode 100644 index 0000000000..286a592436 --- /dev/null +++ b/0.19.0/api/bandit/envs/CandyCaneContest/index.html @@ -0,0 +1,3809 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CandyCaneContest - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

CandyCaneContest

+

Candy cane contest Kaggle competition.

+

Parameters

+
    +
  • +

    n_machines

    +

    Default100

    +

    Number of vending machines.

    +
  • +
  • +

    reward_decay

    +

    Default0.03

    +

    The multiplicate rate at which the expected reward of each vending machine decays.

    +
  • +
+

Attributes

+
    +
  • +

    np_random

    +

    Returns the environment's internal :attr:_np_random that if not set will initialise with a random seed.

    +
  • +
  • +

    render_mode

    +
  • +
  • +

    spec

    +
  • +
  • +

    unwrapped

    +

    Returns the base non-wrapped environment. Returns: Env: The base non-wrapped gym.Env instance

    +
  • +
+

Examples

+

import gym
+from river import stats
+
+env = gym.make('river_bandits/CandyCaneContest-v0')
+_ = env.reset(seed=42)
+_ = env.action_space.seed(123)
+
+metric = stats.Sum()
+while True:
+    arm = env.action_space.sample()
+    observation, reward, terminated, truncated, info = env.step(arm)
+    metric = metric.update(reward)
+    if terminated or truncated:
+        break
+
+metric
+
+
Sum: 734.
+

+

Methods

+
+close +

Override close in your subclass to perform any necessary cleanup.

+

Environments will automatically :meth:close() themselves when garbage collected or when the program exits.

+
+

+
+render +

Compute the render frames as specified by render_mode attribute during initialization of the environment.

+

The set of supported modes varies per environment. (And some third-party environments may not support rendering at all.) By convention, if render_mode is: - None (default): no render is computed. - human: render return None. The environment is continuously rendered in the current display or terminal. Usually for human consumption. - rgb_array: return a single frame representing the current state of the environment. A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. - rgb_array_list: return a list of frames representing the states of the environment since the last reset. Each frame is a numpy.ndarray with shape (x, y, 3), as with rgb_array. - ansi: Return a strings (str) or StringIO.StringIO containing a terminal-style text representation for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render_modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method.

+
+

+
+reset +

Resets the environment to an initial state and returns the initial observation.

+

This method can reset the environment's random number generator(s) if seed is an integer or if the environment has not yet initialized a random number generator. If the environment already has a random number generator and :meth:reset is called with seed=None, the RNG should not be reset. Moreover, :meth:reset should (in the typical use case) be called with an integer seed right after initialization and then never again. Args: seed (optional int): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and seed=None (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and seed=None is passed, the PRNG will not be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer right after the environment has been initialized and then never again. Please refer to the minimal example above to see this paradigm in action. options (optional dict): Additional information to specify how the environment is reset (optional, depending on the specific environment) Returns: observation (object): Observation of the initial state. This will be an element of :attr:observation_space (typically a numpy array) and is analogous to the observation returned by :meth:step. info (dictionary): This dictionary contains auxiliary information complementing observation. It should be analogous to the info returned by :meth:step.

+

Parameters

+
    +
  • seedOptional[int] — defaults to None
  • +
  • optionsOptional[dict] — defaults to None
  • +
+
+

+
+step +

Run one timestep of the environment's dynamics.

+

When end of episode is reached, you are responsible for calling :meth:reset to reset this environment's state. Accepts an action and returns either a tuple (observation, reward, terminated, truncated, info). Args: action (ActType): an action provided by the agent Returns: observation (object): this will be an element of the environment's :attr:observation_space. This may, for instance, be a numpy array containing the positions and velocities of certain objects. reward (float): The amount of reward returned as a result of taking the action. terminated (bool): whether a terminal state (as defined under the MDP of the task) is reached. In this case further step() calls could return undefined results. truncated (bool): whether a truncation condition outside the scope of the MDP is satisfied. Typically a timelimit, but could also be used to indicate agent physically going out of bounds. Can be used to end the episode prematurely before a terminal state is reached. info (dictionary): info contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain: metrics that describe the agent's performance state, variables that are hidden from observations, or individual reward terms that are combined to produce the total reward. It also can contain information that distinguishes truncation and termination, however this is deprecated in favour of returning two booleans, and will be removed in a future version. (deprecated) done (bool): A boolean value for if the episode has ended, in which case further :meth:step calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state.

+

Parameters

+
    +
  • machine_index
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/envs/KArmedTestbed/index.html b/0.19.0/api/bandit/envs/KArmedTestbed/index.html new file mode 100644 index 0000000000..729ba85698 --- /dev/null +++ b/0.19.0/api/bandit/envs/KArmedTestbed/index.html @@ -0,0 +1,3764 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KArmedTestbed - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

KArmedTestbed

+

k-armed testbed.

+

This is a simple environment that can be used to test bandit algorithms. It is based on the 10 armed testbed described in the book "Reinforcement Learning: An Introduction" by Sutton and Barto.

+

Parameters

+
    +
  • +

    k

    +

    Typeint

    +

    Default10

    +

    Number of arms.

    +
  • +
+

Attributes

+
    +
  • +

    np_random

    +

    Returns the environment's internal :attr:_np_random that if not set will initialise with a random seed.

    +
  • +
  • +

    render_mode

    +
  • +
  • +

    spec

    +
  • +
  • +

    unwrapped

    +

    Returns the base non-wrapped environment. Returns: Env: The base non-wrapped gym.Env instance

    +
  • +
+

Methods

+
+close +

Override close in your subclass to perform any necessary cleanup.

+

Environments will automatically :meth:close() themselves when garbage collected or when the program exits.

+
+

+
+render +

Compute the render frames as specified by render_mode attribute during initialization of the environment.

+

The set of supported modes varies per environment. (And some third-party environments may not support rendering at all.) By convention, if render_mode is: - None (default): no render is computed. - human: render return None. The environment is continuously rendered in the current display or terminal. Usually for human consumption. - rgb_array: return a single frame representing the current state of the environment. A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. - rgb_array_list: return a list of frames representing the states of the environment since the last reset. Each frame is a numpy.ndarray with shape (x, y, 3), as with rgb_array. - ansi: Return a strings (str) or StringIO.StringIO containing a terminal-style text representation for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render_modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method.

+
+

+
+reset +

Resets the environment to an initial state and returns the initial observation.

+

This method can reset the environment's random number generator(s) if seed is an integer or if the environment has not yet initialized a random number generator. If the environment already has a random number generator and :meth:reset is called with seed=None, the RNG should not be reset. Moreover, :meth:reset should (in the typical use case) be called with an integer seed right after initialization and then never again. Args: seed (optional int): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and seed=None (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and seed=None is passed, the PRNG will not be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer right after the environment has been initialized and then never again. Please refer to the minimal example above to see this paradigm in action. options (optional dict): Additional information to specify how the environment is reset (optional, depending on the specific environment) Returns: observation (object): Observation of the initial state. This will be an element of :attr:observation_space (typically a numpy array) and is analogous to the observation returned by :meth:step. info (dictionary): This dictionary contains auxiliary information complementing observation. It should be analogous to the info returned by :meth:step.

+

Parameters

+
    +
  • seedOptional[int] — defaults to None
  • +
  • optionsOptional[dict] — defaults to None
  • +
+
+

+
+step +

Run one timestep of the environment's dynamics.

+

When end of episode is reached, you are responsible for calling :meth:reset to reset this environment's state. Accepts an action and returns either a tuple (observation, reward, terminated, truncated, info). Args: action (ActType): an action provided by the agent Returns: observation (object): this will be an element of the environment's :attr:observation_space. This may, for instance, be a numpy array containing the positions and velocities of certain objects. reward (float): The amount of reward returned as a result of taking the action. terminated (bool): whether a terminal state (as defined under the MDP of the task) is reached. In this case further step() calls could return undefined results. truncated (bool): whether a truncation condition outside the scope of the MDP is satisfied. Typically a timelimit, but could also be used to indicate agent physically going out of bounds. Can be used to end the episode prematurely before a terminal state is reached. info (dictionary): info contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain: metrics that describe the agent's performance state, variables that are hidden from observations, or individual reward terms that are combined to produce the total reward. It also can contain information that distinguishes truncation and termination, however this is deprecated in favour of returning two booleans, and will be removed in a future version. (deprecated) done (bool): A boolean value for if the episode has ended, in which case further :meth:step calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state.

+

Parameters

+
    +
  • arm
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/evaluate-offline/index.html b/0.19.0/api/bandit/evaluate-offline/index.html new file mode 100644 index 0000000000..db8b0fefe7 --- /dev/null +++ b/0.19.0/api/bandit/evaluate-offline/index.html @@ -0,0 +1,3721 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + evaluate_offline - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

evaluate_offline

+

Evaluate a policy on historical logs using replay.

+

This is a high-level utility function for evaluating a policy using the replay methodology. This methodology is an off-policy evaluation method. It does not require an environment, and is instead data-driven.

+

At each step, an arm is pulled from the provided policy. If the arm is the same as the arm that was pulled in the historical data, the reward is used to update the policy. If the arm is different, the reward is ignored. This is the off-policy aspect of the evaluation.

+

Parameters

+ +

Examples

+

import random
+from river import bandit
+
+rng = random.Random(42)
+arms = ['A', 'B', 'C']
+clicks = [
+    (
+        arms,
+        # no context
+        None,
+        # random arm
+        rng.choice(arms),
+        # reward
+        rng.random() > 0.5
+    )
+    for _ in range(1000)
+]
+
+total_reward, n_samples_used = bandit.evaluate_offline(
+    policy=bandit.EpsilonGreedy(0.1, seed=42),
+    history=clicks,
+)
+
+total_reward
+
+
Sum: 172.
+

+

n_samples_used
+
+
321
+

+

This also works out of the box with datasets that inherit from river.bandit.BanditDataset.

+

news = bandit.datasets.NewsArticles()
+total_reward, n_samples_used = bandit.evaluate_offline(
+    policy=bandit.RandomPolicy(seed=42),
+    history=news,
+)
+
+total_reward, n_samples_used
+
+
(Sum: 105., 1027)
+

+

As expected, the policy succeeds in roughly 10% of cases. Indeed, there are 10 arms and 10000 +samples, so the expected number of successes is 1000.

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/bandit/evaluate/index.html b/0.19.0/api/bandit/evaluate/index.html new file mode 100644 index 0000000000..e2c4a410b5 --- /dev/null +++ b/0.19.0/api/bandit/evaluate/index.html @@ -0,0 +1,3737 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + evaluate - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

evaluate

+

Benchmark a list of policies on a given Gym environment.

+

This is a high-level utility function for benchmarking a list of policies on a given Gym environment. For example, it can be used to populate a pandas.DataFrame with the contents of each step of each episode.

+

Parameters

+
    +
  • +

    policies

    +

    Typelist[bandit.base.Policy]

    +

    A list of policies to evaluate. The policy will be reset before each episode.

    +
  • +
  • +

    env

    +

    Typegym.Env

    +

    The Gym environment to use. One copy will be made for each policy at the beginning of each episode.

    +
  • +
  • +

    reward_stat

    +

    Typestats.base.Univariate | None

    +

    DefaultNone

    +

    A univariate statistic to keep track of the rewards. This statistic will be reset before each episode. Note that this is not the same as the reward object used by the policies. It's just a statistic to keep track of each policy's performance. If None, stats.Sum is used.

    +
  • +
  • +

    n_episodes

    +

    Typeint

    +

    Default20

    +

    The number of episodes to run.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility. A random number generator will be used to seed differently the environment before each episode.

    +
  • +
+

Examples

+

import gym
+from river import bandit
+
+trace = bandit.evaluate(
+    policies=[
+        bandit.UCB(delta=1, seed=42),
+        bandit.EpsilonGreedy(epsilon=0.1, seed=42),
+    ],
+    env=gym.make(
+        'river_bandits/CandyCaneContest-v0',
+        max_episode_steps=100
+    ),
+    n_episodes=5,
+    seed=42
+)
+
+for step in trace:
+    print(step)
+    break
+
+
{'episode': 0, 'step': 0, 'policy_idx': 0, 'arm': 81, 'reward': 0.0, 'reward_stat': 0.0}
+

+

The return type of this function is a generator. Each step of the generator is a dictionary. +You can pass the generator to a pandas.DataFrame to get a nice representation of the results.

+

import pandas as pd
+
+trace = bandit.evaluate(
+    policies=[
+        bandit.UCB(delta=1, seed=42),
+        bandit.EpsilonGreedy(epsilon=0.1, seed=42),
+    ],
+    env=gym.make(
+        'river_bandits/CandyCaneContest-v0',
+        max_episode_steps=100
+    ),
+    n_episodes=5,
+    seed=42
+)
+
+trace_df = pd.DataFrame(trace)
+trace_df.sample(5, random_state=42)
+
+
     episode  step  policy_idx  arm  reward  reward_stat
+521        2    60           1   25     0.0         36.0
+737        3    68           1   40     1.0         20.0
+740        3    70           0   58     0.0         36.0
+660        3    30           0   31     1.0         16.0
+411        2     5           1   35     1.0          5.0
+

+

The length of the dataframe is the number of policies times the number of episodes times the +maximum number of steps per episode.

+

len(trace_df)
+
+
1000
+

+

(
+    trace_df.policy_idx.nunique() *
+    trace_df.episode.nunique() *
+    trace_df.step.nunique()
+)
+
+
1000
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Base/index.html b/0.19.0/api/base/Base/index.html new file mode 100644 index 0000000000..9a3f605c62 --- /dev/null +++ b/0.19.0/api/base/Base/index.html @@ -0,0 +1,3755 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Base - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Base

+

Base class that is inherited by the majority of classes in River.

+

This base class allows us to handle the following tasks in a uniform manner:

+
    +
  • +

    Getting and setting parameters

    +
  • +
  • +

    Displaying information

    +
  • +
  • +

    Mutating/cloning

    +
  • +
+

Methods

+
+clone +

Return a fresh estimator with the same parameters.

+

The clone has the same parameters but has not been updated with any data. This works by looking at the parameters from the class signature. Each parameter is either - recursively cloned if its a class. - deep-copied via copy.deepcopy if not. If the calling object is stochastic (i.e. it accepts a seed parameter) and has not been seeded, then the clone will not be idempotent. Indeed, this method's purpose if simply to return a new instance with the same input parameters.

+

Parameters

+
    +
  • new_params'dict | None' — defaults to None
  • +
  • include_attributes — defaults to False
  • +
+
+

+
+mutate +

Modify attributes.

+

This changes parameters inplace. Although you can change attributes yourself, this is the recommended way to proceed. By default, all attributes are immutable, meaning they shouldn't be mutated. Calling mutate on an immutable attribute raises a ValueError. Mutable attributes are specified via the _mutable_attributes property, and are thus specified on a per-estimator basis.

+

Parameters

+
    +
  • new_attrs'dict'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/BinaryDriftAndWarningDetector/index.html b/0.19.0/api/base/BinaryDriftAndWarningDetector/index.html new file mode 100644 index 0000000000..b5b5d9a9ea --- /dev/null +++ b/0.19.0/api/base/BinaryDriftAndWarningDetector/index.html @@ -0,0 +1,3758 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryDriftAndWarningDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryDriftAndWarningDetector

+

A binary drift detector that is also capable of issuing warnings.

+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    warning_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Methods

+
+update +

Update the detector with a single boolean input.

+

Parameters

+
    +
  • x'bool'
  • +
+

Returns

+

BinaryDriftDetector: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/BinaryDriftDetector/index.html b/0.19.0/api/base/BinaryDriftDetector/index.html new file mode 100644 index 0000000000..b8688b6cd5 --- /dev/null +++ b/0.19.0/api/base/BinaryDriftDetector/index.html @@ -0,0 +1,3754 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryDriftDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryDriftDetector

+

A drift detector for binary data.

+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Methods

+
+update +

Update the detector with a single boolean input.

+

Parameters

+
    +
  • x'bool'
  • +
+

Returns

+

BinaryDriftDetector: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Classifier/index.html b/0.19.0/api/base/Classifier/index.html new file mode 100644 index 0000000000..ff367e79b7 --- /dev/null +++ b/0.19.0/api/base/Classifier/index.html @@ -0,0 +1,3757 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Classifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Classifier

+

A classifier.

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Clusterer/index.html b/0.19.0/api/base/Clusterer/index.html new file mode 100644 index 0000000000..558f05eae7 --- /dev/null +++ b/0.19.0/api/base/Clusterer/index.html @@ -0,0 +1,3744 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Clusterer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Clusterer

+

A clustering model.

+

Methods

+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Clusterer: self

+
+

+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

int: A cluster number.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/DriftAndWarningDetector/index.html b/0.19.0/api/base/DriftAndWarningDetector/index.html new file mode 100644 index 0000000000..c957155a00 --- /dev/null +++ b/0.19.0/api/base/DriftAndWarningDetector/index.html @@ -0,0 +1,3758 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DriftAndWarningDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DriftAndWarningDetector

+

A drift detector that is also capable of issuing warnings.

+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    warning_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Methods

+
+update +

Update the detector with a single data point.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+

Returns

+

DriftDetector: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/DriftDetector/index.html b/0.19.0/api/base/DriftDetector/index.html new file mode 100644 index 0000000000..912a9f26dd --- /dev/null +++ b/0.19.0/api/base/DriftDetector/index.html @@ -0,0 +1,3754 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DriftDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DriftDetector

+

A drift detector.

+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Methods

+
+update +

Update the detector with a single data point.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+

Returns

+

DriftDetector: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Ensemble/index.html b/0.19.0/api/base/Ensemble/index.html new file mode 100644 index 0000000000..b2260c4a75 --- /dev/null +++ b/0.19.0/api/base/Ensemble/index.html @@ -0,0 +1,3843 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Ensemble - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Ensemble

+

An ensemble is a model which is composed of a list of models.

+

Parameters

+
    +
  • +

    models

    +

    TypeIterator[Estimator]

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Methods

+
+append +

S.append(value) -- append value to the end of the sequence

+

Parameters

+
    +
  • item
  • +
+
+

+
+clear +

S.clear() -> None -- remove all items from S

+
+

+
+copy +
+
+count +

S.count(value) -> integer -- return number of occurrences of value

+

Parameters

+
    +
  • item
  • +
+
+

+
+extend +

S.extend(iterable) -- extend sequence by appending elements from the iterable

+

Parameters

+
    +
  • other
  • +
+
+

+
+index +

S.index(value, [start, [stop]]) -> integer -- return first index of value. Raises ValueError if the value is not present.

+

Supporting start and stop arguments is optional, but recommended.

+

Parameters

+
    +
  • item
  • +
  • args
  • +
+
+

+
+insert +

S.insert(index, value) -- insert value before index

+

Parameters

+
    +
  • i
  • +
  • item
  • +
+
+

+
+pop +

S.pop([index]) -> item -- remove and return item at index (default last). Raise IndexError if list is empty or index is out of range.

+

Parameters

+
    +
  • i — defaults to -1
  • +
+
+

+
+remove +

S.remove(value) -- remove first occurrence of value. Raise ValueError if the value is not present.

+

Parameters

+
    +
  • item
  • +
+
+

+
+reverse +

S.reverse() -- reverse IN PLACE

+
+

+
+sort +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Estimator/index.html b/0.19.0/api/base/Estimator/index.html new file mode 100644 index 0000000000..aa8ad2308e --- /dev/null +++ b/0.19.0/api/base/Estimator/index.html @@ -0,0 +1,3722 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Estimator - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Estimator

+

An estimator.

+

Methods

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/MiniBatchClassifier/index.html b/0.19.0/api/base/MiniBatchClassifier/index.html new file mode 100644 index 0000000000..4709de2fb3 --- /dev/null +++ b/0.19.0/api/base/MiniBatchClassifier/index.html @@ -0,0 +1,3791 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MiniBatchClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MiniBatchClassifier

+

A classifier that can operate on mini-batches.

+

Methods

+
+learn_many +

Update the model with a mini-batch of features X and boolean targets y.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
+

Returns

+

MiniBatchClassifier: self

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted labels.

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +

Predict the outcome probabilities for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A dataframe with probabilities of True and False for each sample.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/MiniBatchRegressor/index.html b/0.19.0/api/base/MiniBatchRegressor/index.html new file mode 100644 index 0000000000..9f6e2fede5 --- /dev/null +++ b/0.19.0/api/base/MiniBatchRegressor/index.html @@ -0,0 +1,3768 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MiniBatchRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MiniBatchRegressor

+

A regressor that can operate on mini-batches.

+

Methods

+
+learn_many +

Update the model with a mini-batch of features X and real-valued targets y.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
+

Returns

+

MiniBatchRegressor: self

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted outcomes.

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/MiniBatchSupervisedTransformer/index.html b/0.19.0/api/base/MiniBatchSupervisedTransformer/index.html new file mode 100644 index 0000000000..12af1821b6 --- /dev/null +++ b/0.19.0/api/base/MiniBatchSupervisedTransformer/index.html @@ -0,0 +1,3768 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MiniBatchSupervisedTransformer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MiniBatchSupervisedTransformer

+

A supervised transformer that can operate on mini-batches.

+

Methods

+
+learn_many +

Update the model with a mini-batch of features X and targets y.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
+

Returns

+

MiniBatchSupervisedTransformer: self

+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Transform a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A new DataFrame.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/MiniBatchTransformer/index.html b/0.19.0/api/base/MiniBatchTransformer/index.html new file mode 100644 index 0000000000..4b6ac93b2f --- /dev/null +++ b/0.19.0/api/base/MiniBatchTransformer/index.html @@ -0,0 +1,3768 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MiniBatchTransformer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MiniBatchTransformer

+

A transform that can operate on mini-batches.

+

Methods

+
+learn_many +

Update with a mini-batch of features.

+

A lot of transformers don't actually have to do anything during the learn_many step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_many can override this method.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

Transformer: self

+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Transform a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A new DataFrame.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/MultiLabelClassifier/index.html b/0.19.0/api/base/MultiLabelClassifier/index.html new file mode 100644 index 0000000000..36b4dc128e --- /dev/null +++ b/0.19.0/api/base/MultiLabelClassifier/index.html @@ -0,0 +1,3758 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiLabelClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiLabelClassifier

+

Multi-label classifier.

+

Methods

+
+learn_one +

Update the model with a set of features x and the labels y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'dict[FeatureName, bool]'
  • +
+

Returns

+

MultiLabelClassifier: self

+
+

+
+predict_one +

Predict the labels of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, bool]: The predicted labels.

+
+

+
+predict_proba_one +

Predict the probability of each label appearing given dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, dict[bool, float]]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/MultiTargetRegressor/index.html b/0.19.0/api/base/MultiTargetRegressor/index.html new file mode 100644 index 0000000000..a4301afa12 --- /dev/null +++ b/0.19.0/api/base/MultiTargetRegressor/index.html @@ -0,0 +1,3746 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiTargetRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiTargetRegressor

+

Multi-target regressor.

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'dict[FeatureName, RegTarget]'
  • +
  • kwargs
  • +
+

Returns

+

MultiTargetRegressor: self

+
+

+
+predict_one +

Predict the outputs of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[FeatureName, RegTarget]: The predictions.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Regressor/index.html b/0.19.0/api/base/Regressor/index.html new file mode 100644 index 0000000000..edc736c2fb --- /dev/null +++ b/0.19.0/api/base/Regressor/index.html @@ -0,0 +1,3745 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Regressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Regressor

+

A regressor.

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/SupervisedTransformer/index.html b/0.19.0/api/base/SupervisedTransformer/index.html new file mode 100644 index 0000000000..18c56a24ba --- /dev/null +++ b/0.19.0/api/base/SupervisedTransformer/index.html @@ -0,0 +1,3745 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SupervisedTransformer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SupervisedTransformer

+

A supervised transformer.

+

Methods

+
+learn_one +

Update with a set of features x and a target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
+

Returns

+

SupervisedTransformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Transformer/index.html b/0.19.0/api/base/Transformer/index.html new file mode 100644 index 0000000000..a529290ec0 --- /dev/null +++ b/0.19.0/api/base/Transformer/index.html @@ -0,0 +1,3745 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Transformer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Transformer

+

A transformer.

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/Wrapper/index.html b/0.19.0/api/base/Wrapper/index.html new file mode 100644 index 0000000000..74775ea739 --- /dev/null +++ b/0.19.0/api/base/Wrapper/index.html @@ -0,0 +1,3669 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Wrapper - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Wrapper

+

A wrapper model.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/base/WrapperEnsemble/index.html b/0.19.0/api/base/WrapperEnsemble/index.html new file mode 100644 index 0000000000..ed58e38a70 --- /dev/null +++ b/0.19.0/api/base/WrapperEnsemble/index.html @@ -0,0 +1,3769 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WrapperEnsemble - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WrapperEnsemble

+

A wrapper ensemble is an ensemble composed of multiple copies of the same model.

+

Parameters

+
    +
  • +

    model

    +

    The model to copy.

    +
  • +
  • +

    n_models

    +

    The number of copies to make.

    +
  • +
  • +

    seed

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Methods

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/cluster/CluStream/index.html b/0.19.0/api/cluster/CluStream/index.html new file mode 100644 index 0000000000..58300d3f24 --- /dev/null +++ b/0.19.0/api/cluster/CluStream/index.html @@ -0,0 +1,3634 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CluStream - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

CluStream

+

CluStream

+

The CluStream algorithm 1 maintains statistical information about the data using micro-clusters. These micro-clusters are temporal extensions of cluster feature vectors. The micro-clusters are stored at snapshots in time following a pyramidal pattern. This pattern allows to recall summary statistics from different time horizons.

+

Training with a new point p is performed in two main tasks:

+
    +
  • +

    Determinate the closest micro-cluster to p.

    +
  • +
  • +

    Check whether p fits (memory) into the closest micro-cluster:

    +
      +
    • +

      if p fits, put into micro-cluster

      +
    • +
    • +

      if p does not fit, free some space to insert a new micro-cluster.

      +
    • +
    +

    This is done in two ways, delete an old micro-cluster or merge the two micro-clusters closest to each other.

    +
  • +
+

This implementation is an improved version from the original algorithm. Instead of calculating the traditional cluster feature vector of the number of observations, linear sum and sum of squares of data points and time stamps, this implementation adopts the use of Welford's algorithm 2 to calculate the incremental variance, facilitated through stats.Var available within River.

+

Since River does not support an actual "off-line" phase of the clustering algorithm (as data points are assumed to arrive continuously, one at a time), a time_gap parameter is introduced. After each time_gap, an incremental K-Means clustering algorithm will be initialized and applied on currently available micro-clusters to form the final solution, i.e. macro-clusters.

+

Parameters

+
    +
  • +

    n_macro_clusters

    +

    Typeint

    +

    Default5

    +

    The number of clusters (k) for the k-means algorithm.

    +
  • +
  • +

    max_micro_clusters

    +

    Typeint

    +

    Default100

    +

    The maximum number of micro-clusters to use.

    +
  • +
  • +

    micro_cluster_r_factor

    +

    Typeint

    +

    Default2

    +

    Multiplier for the micro-cluster radius. When deciding to add a new data point to a micro-cluster, the maximum boundary is defined as a factor of the micro_cluster_r_factor of the RMS deviation of the data points in the micro-cluster from the centroid.

    +
  • +
  • +

    time_window

    +

    Typeint

    +

    Default1000

    +

    If the current time is T and the time window is h, we only consider the data that arrived within the period (T-h,T).

    +
  • +
  • +

    time_gap

    +

    Typeint

    +

    Default100

    +

    An incremental k-means is applied on the current set of micro-clusters after each time_gap to form the final macro-cluster solution.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed used for generating initial centroid positions.

    +
  • +
  • +

    kwargs

    +

    Other parameters passed to the incremental kmeans at cluster.KMeans.

    +
  • +
+

Attributes

+
    +
  • +

    centers (dict)

    +

    Central positions of each cluster.

    +
  • +
+

Examples

+

In the following example, max_micro_clusters is set relatively low due to the +limited number of training points. Moreover, all points are learnt before any predictions are made. +The halflife is set at 0.4, to show that you can pass cluster.KMeans parameters via keyword arguments.

+

from river import cluster
+from river import stream
+
+X = [
+    [1, 2],
+    [1, 4],
+    [1, 0],
+    [-4, 2],
+    [-4, 4],
+    [-4, 0],
+    [5, 0],
+    [5, 2],
+    [5, 4]
+]
+
+clustream = cluster.CluStream(
+    n_macro_clusters=3,
+    max_micro_clusters=5,
+    time_gap=3,
+    seed=0,
+    halflife=0.4
+)
+
+for x, _ in stream.iter_array(X):
+    clustream = clustream.learn_one(x)
+
+clustream.predict_one({0: 1, 1: 1})
+
+
1
+

+

clustream.predict_one({0: -4, 1: 3})
+
+
2
+

+

clustream.predict_one({0: 4, 1: 3.5})
+
+
0
+

+

Methods

+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • w — defaults to 1.0
  • +
+

Returns

+

Clusterer: self

+
+

+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

int: A cluster number.

+
+

+
+
+
    +
  1. +

    Aggarwal, C.C., Philip, S.Y., Han, J. and Wang, J., 2003, A framework for clustering evolving data +streams. In Proceedings 2003 VLDB conference (pp. 81-92). Morgan Kaufmann. 

    +
  2. +
  3. +

    Chan, T.F., Golub, G.H. and LeVeque, R.J., 1982. Updating formulae and a pairwise algorithm for +computing sample variances. In COMPSTAT 1982 5th Symposium held at Toulouse 1982 (pp. 30-41). +Physica, Heidelberg. https://doi.org/10.1007/978-3-642-51461-6_3. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/cluster/DBSTREAM/index.html b/0.19.0/api/cluster/DBSTREAM/index.html new file mode 100644 index 0000000000..3caaf2d160 --- /dev/null +++ b/0.19.0/api/cluster/DBSTREAM/index.html @@ -0,0 +1,3635 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DBSTREAM - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DBSTREAM

+

DBSTREAM

+

DBSTREAM 1 is a clustering algorithm for evolving data streams. It is the first micro-cluster-based online clustering component that explicitely captures the density between micro-clusters via a shared density graph. The density information in the graph is then exploited for reclustering based on actual density between adjacent micro clusters.

+

The algorithm is divided into two parts:

+

Online micro-cluster maintenance (learning)

+

For a new point p:

+
    +
  • +

    Find all micro clusters for which p falls within the fixed radius (clustering threshold). If no neighbor is found, a new micro cluster with a weight of 1 is created for p.

    +
  • +
  • +

    If no neighbor is found, a new micro cluster with a weight of 1 is created for p. If one or more neighbors of p are found, we update the micro clusters by applying the appropriate fading, increasing their weight and then we try to move them closer to p using the Gaussian neighborhood function.

    +
  • +
  • +

    Next, the shared density graph is updated. To prevent collapsing micro clusters, we will restrict the movement for micro clusters in case they come closer than \(r\) (clustering threshold) to each other. Finishing this process, the time stamp is also increased by 1.

    +
  • +
  • +

    Finally, the cleanup will be processed. It is executed every t_gap time steps, removing weak micro clusters and weak entries in the shared density graph to recover memory and improve the clustering algorithm's processing speed.

    +
  • +
+

Offline generation of macro clusters (clustering)

+

The offline generation of macro clusters is generated through the two following steps:

+
    +
  • +

    The connectivity graph C is constructed using shared density entries between strong micro clusters. The edges in this connectivity graph with a connectivity value greater than the intersection threshold (\(\alpha\)) are used to find connected components representing the final cluster.

    +
  • +
  • +

    After the connectivity graph is generated, a variant of the DBSCAN algorithm proposed by Ester et al. is applied to form all macro clusters from \(\alpha\)-connected micro clusters.

    +
  • +
+

Parameters

+
    +
  • +

    clustering_threshold

    +

    Typefloat

    +

    Default1.0

    +

    DBStream represents each micro cluster by a leader (a data point defining the micro cluster's center) and the density in an area of a user-specified radius \(r\) (clustering_threshold) around the center.

    +
  • +
  • +

    fading_factor

    +

    Typefloat

    +

    Default0.01

    +

    Parameter that controls the importance of historical data to current cluster. Note that fading_factor has to be different from 0.

    +
  • +
  • +

    cleanup_interval

    +

    Typefloat

    +

    Default2

    +

    The time interval between two consecutive time points when the cleanup process is conducted.

    +
  • +
  • +

    intersection_factor

    +

    Typefloat

    +

    Default0.3

    +

    The intersection factor related to the area of the overlap of the micro clusters relative to the area cover by micro clusters. This parameter is used to determine whether a micro cluster or a shared density is weak.

    +
  • +
  • +

    minimum_weight

    +

    Typefloat

    +

    Default1.0

    +

    The minimum weight for a cluster to be not "noisy".

    +
  • +
+

Attributes

+
    +
  • +

    n_clusters

    +

    Number of clusters generated by the algorithm.

    +
  • +
  • +

    clusters

    +

    A set of final clusters of type DBStreamMicroCluster. However, these are either micro clusters, or macro clusters that are generated by merging all \(\alpha\)-connected micro clusters. This set is generated through the offline phase of the algorithm.

    +
  • +
  • +

    centers

    +

    Final clusters' centers.

    +
  • +
  • +

    micro_clusters

    +

    Micro clusters generated by the algorithm. Instead of updating directly the new instance points into a nearest micro cluster, through each iteration, the weight and center will be modified so that the clusters are closer to the new points, using the Gaussian neighborhood function.

    +
  • +
+

Examples

+

from river import cluster
+from river import stream
+
+X = [
+    [1, 0.5], [1, 0.625], [1, 0.75], [1, 1.125], [1, 1.5], [1, 1.75],
+    [4, 1.5], [4, 2.25], [4, 2.5], [4, 3], [4, 3.25], [4, 3.5]
+]
+
+dbstream = cluster.DBSTREAM(
+    clustering_threshold=1.5,
+    fading_factor=0.05,
+    cleanup_interval=4,
+    intersection_factor=0.5,
+    minimum_weight=1
+)
+
+for x, _ in stream.iter_array(X):
+    dbstream = dbstream.learn_one(x)
+
+dbstream.predict_one({0: 1, 1: 2})
+
+
0
+

+

dbstream.predict_one({0: 5, 1: 2})
+
+
1
+

+

dbstream._n_clusters
+
+
2
+

+

Methods

+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

Clusterer: self

+
+

+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

int: A cluster number.

+
+

+
+
+
    +
  1. +

    Michael Hahsler and Matthew Bolanos (2016, pp 1449-1461). Clustering Data Streams Based on + Shared Density between Micro-Clusters, IEEE Transactions on Knowledge and Data Engineering 28(6) . + In Proceedings of the Sixth SIAM International Conference on Data Mining, + April 20–22, 2006, Bethesda, MD, USA. 

    +
  2. +
  3. +

    Ester et al (1996). A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases + with Noise. In KDD-96 Proceedings, AAAI. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/cluster/DenStream/index.html b/0.19.0/api/cluster/DenStream/index.html new file mode 100644 index 0000000000..c5bb6921d8 --- /dev/null +++ b/0.19.0/api/cluster/DenStream/index.html @@ -0,0 +1,3637 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DenStream - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DenStream

+

DenStream

+

DenStream 1 is a clustering algorithm for evolving data streams. DenStream can discover clusters with arbitrary shape and is robust against noise (outliers).

+

"Dense" micro-clusters (named core-micro-clusters) summarise the clusters of arbitrary shape. A pruning strategy based on the concepts of potential and outlier micro-clusters guarantees the precision of the weights of the micro-clusters with limited memory.

+

The algorithm is divided into two parts:

+

Online micro-cluster maintenance (learning)

+

For a new point p:

+
    +
  • +

    Try to merge p into either the nearest p-micro-cluster (potential), o-micro-cluster (outlier), or create a new o-micro-cluster and insert it into the outlier buffer.

    +
  • +
  • +

    For each T_p iterations, consider the weights of all potential and outlier micro-clusters. If their weights are smaller than a certain threshold (different for each type of micro-clusters), the micro-cluster is deleted.

    +
  • +
+

Offline generation of clusters on-demand (clustering)

+

A variant of the DBSCAN algorithm 2 is used, such that all density-connected p-micro-clusters determine the final clusters. Moreover, in order for the algorithm to always be able to generate clusters, a certain number of points must be passed through the algorithm with a suitable streaming speed (number of points passed through within a unit time), indicated by n_samples_init and stream_speed.

+

Parameters

+
    +
  • +

    decaying_factor

    +

    Typefloat

    +

    Default0.25

    +

    Parameter that controls the importance of historical data to current cluster. Note that decaying_factor has to be different from 0.

    +
  • +
  • +

    beta

    +

    Typefloat

    +

    Default0.75

    +

    Parameter to determine the threshold of outlier relative to core micro-clusters. The value of beta must be within the range (0,1].

    +
  • +
  • +

    mu

    +

    Typefloat

    +

    Default2

    +

    Parameter to determine the threshold of outliers relative to core micro-cluster. As beta * mu must be greater than 1, mu must be within the range (1/beta, inf).

    +
  • +
  • +

    epsilon

    +

    Typefloat

    +

    Default0.02

    +

    Defines the epsilon neighborhood

    +
  • +
  • +

    n_samples_init

    +

    Typeint

    +

    Default1000

    +

    Number of points to to initiqalize the online process

    +
  • +
  • +

    stream_speed

    +

    Typeint

    +

    Default100

    +

    Number of points arrived in unit time

    +
  • +
+

Attributes

+
    +
  • +

    n_clusters

    +

    Number of clusters generated by the algorithm.

    +
  • +
  • +

    clusters

    +

    A set of final clusters of type MicroCluster, which means that these cluster include all the required information, including number of points, creation time, weight, (weighted) linear sum, (weighted) square sum, center and radius.

    +
  • +
  • +

    p_micro_clusters

    +

    The potential core-icro-clusters that are generated by the algorithm. When a generate cluster request arrives, these p-micro-clusters will go through a variant of the DBSCAN algorithm to determine the final clusters.

    +
  • +
  • +

    o_micro_clusters

    +

    The outlier micro-clusters.

    +
  • +
+

Examples

+

The following example uses the default parameters of the algorithm to test its functionality. +The set of evolving points X are designed so that clusters are easily identifiable.

+

from river import cluster
+from river import stream
+
+X = [
+    [-1, -0.5], [-1, -0.625], [-1, -0.75], [-1, -1], [-1, -1.125],
+    [-1, -1.25], [-1.5, -0.5], [-1.5, -0.625], [-1.5, -0.75], [-1.5, -1],
+    [-1.5, -1.125], [-1.5, -1.25], [1, 1.5], [1, 1.75], [1, 2],
+    [4, 1.25], [4, 1.5], [4, 2.25], [4, 2.5], [4, 3],
+    [4, 3.25], [4, 3.5], [4, 3.75], [4, 4],
+]
+
+denstream = cluster.DenStream(decaying_factor=0.01,
+                              beta=0.5,
+                              mu=2.5,
+                              epsilon=0.5,
+                              n_samples_init=10)
+
+for x, _ in stream.iter_array(X):
+    denstream = denstream.learn_one(x)
+
+denstream.predict_one({0: -1, 1: -2})
+
+
0
+

+

denstream.predict_one({0: 5, 1: 4})
+
+
1
+

+

denstream.predict_one({0: 1, 1: 1})
+
+
0
+

+

denstream.n_clusters
+
+
2
+

+

Methods

+
+BufferItem +
+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

Clusterer: self

+
+

+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

int: A cluster number.

+
+

+
+
+
    +
  1. +

    Feng et al (2006, pp 328-339). Density-Based Clustering over an Evolving Data Stream with + Noise. In Proceedings of the Sixth SIAM International Conference on Data Mining, + April 20–22, 2006, Bethesda, MD, USA. 

    +
  2. +
  3. +

    Ester et al (1996). A Density-Based Algorithm for Discovering Clusters in Large Spatial + Databases with Noise. In KDD-96 Proceedings, AAAI. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/cluster/KMeans/index.html b/0.19.0/api/cluster/KMeans/index.html new file mode 100644 index 0000000000..000c9b489c --- /dev/null +++ b/0.19.0/api/cluster/KMeans/index.html @@ -0,0 +1,3606 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KMeans - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

KMeans

+

Incremental k-means.

+

The most common way to implement batch k-means is to use Lloyd's algorithm, which consists in assigning all the data points to a set of cluster centers and then moving the centers accordingly. This requires multiple passes over the data and thus isn't applicable in a streaming setting.

+

In this implementation we start by finding the cluster that is closest to the current observation. We then move the cluster's central position towards the new observation. The halflife parameter determines by how much to move the cluster toward the new observation. You will get better results if you scale your data appropriately.

+

Parameters

+
    +
  • +

    n_clusters

    +

    Default5

    +

    Maximum number of clusters to assign.

    +
  • +
  • +

    halflife

    +

    Default0.5

    +

    Amount by which to move the cluster centers, a reasonable value if between 0 and 1.

    +
  • +
  • +

    mu

    +

    Default0

    +

    Mean of the normal distribution used to instantiate cluster positions.

    +
  • +
  • +

    sigma

    +

    Default1

    +

    Standard deviation of the normal distribution used to instantiate cluster positions.

    +
  • +
  • +

    p

    +

    Default2

    +

    Power parameter for the Minkowski metric. When p=1, this corresponds to the Manhattan distance, while p=2 corresponds to the Euclidean distance.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed used for generating initial centroid positions.

    +
  • +
+

Attributes

+
    +
  • +

    centers (dict)

    +

    Central positions of each cluster.

    +
  • +
+

Examples

+

In the following example the cluster assignments are exactly the same as when using +sklearn's batch implementation. However changing the halflife parameter will +produce different outputs.

+

from river import cluster
+from river import stream
+
+X = [
+    [1, 2],
+    [1, 4],
+    [1, 0],
+    [-4, 2],
+    [-4, 4],
+    [-4, 0]
+]
+
+k_means = cluster.KMeans(n_clusters=2, halflife=0.1, sigma=3, seed=42)
+
+for i, (x, _) in enumerate(stream.iter_array(X)):
+    k_means = k_means.learn_one(x)
+    print(f'{X[i]} is assigned to cluster {k_means.predict_one(x)}')
+
+
[1, 2] is assigned to cluster 1
+[1, 4] is assigned to cluster 1
+[1, 0] is assigned to cluster 0
+[-4, 2] is assigned to cluster 1
+[-4, 4] is assigned to cluster 1
+[-4, 0] is assigned to cluster 0
+

+

k_means.predict_one({0: 0, 1: 0})
+
+
0
+

+

k_means.predict_one({0: 4, 1: 4})
+
+
1
+

+

Methods

+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Clusterer: self

+
+

+
+learn_predict_one +

Equivalent to k_means.learn_one(x).predict_one(x), but faster.

+

Parameters

+
    +
  • x
  • +
+
+

+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

int: A cluster number.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/cluster/STREAMKMeans/index.html b/0.19.0/api/cluster/STREAMKMeans/index.html new file mode 100644 index 0000000000..579fb47a1c --- /dev/null +++ b/0.19.0/api/cluster/STREAMKMeans/index.html @@ -0,0 +1,3585 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + STREAMKMeans - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

STREAMKMeans

+

STREAMKMeans

+

STREAMKMeans is an alternative version of the original algorithm STREAMLSEARCH proposed by O'Callaghan et al. 1, by replacing the k-medians using LSEARCH by the k-means algorithm.

+

However, instead of using the traditional k-means, which requires a total reclustering each time the temporary chunk of data points is full, the implementation of this algorithm uses an increamental k-means.

+

At first, the cluster centers are initialized with a KMeans instance. For a new point p:

+
    +
  • +

    If the size of chunk is less than the maximum size allowed, add the new point to the temporary chunk.

    +
  • +
  • +

    When the size of chunk reaches the maximum value size allowed

    +
      +
    • A new incremental KMeans instance is created. The latter will process all points in the
    • +
    +

    temporary chunk. The centers of this new instance then become the new centers.

    +
      +
    • All points are deleted from the temporary chunk so that new points can be added.
    • +
    +
  • +
  • +

    When a prediction request arrives, the centers of the algorithm will be exactly the same as the centers of the original KMeans at the time of retrieval.

    +
  • +
+

Parameters

+
    +
  • +

    chunk_size

    +

    Default10

    +

    Maximum size allowed for the temporary data chunk.

    +
  • +
  • +

    n_clusters

    +

    Default2

    +

    Number of clusters generated by the algorithm.

    +
  • +
  • +

    kwargs

    +

    Other parameters passed to the incremental kmeans at cluster.KMeans.

    +
  • +
+

Attributes

+
    +
  • +

    centers

    +

    Cluster centers generated from running the incremental KMeans algorithm through centers of each chunk.

    +
  • +
+

Examples

+

from river import cluster
+from river import stream
+
+X = [
+    [1, 0.5], [1, 0.625], [1, 0.75], [1, 1.125], [1, 1.5], [1, 1.75],
+    [4, 1.5], [4, 2.25], [4, 2.5], [4, 3], [4, 3.25], [4, 3.5]
+]
+
+streamkmeans = cluster.STREAMKMeans(chunk_size=3, n_clusters=2, halflife=0.5, sigma=1.5, seed=0)
+
+for x, _ in stream.iter_array(X):
+    streamkmeans = streamkmeans.learn_one(x)
+
+streamkmeans.predict_one({0: 1, 1: 0})
+
+
0
+

+

streamkmeans.predict_one({0: 5, 1: 2})
+
+
1
+

+

Methods

+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

Clusterer: self

+
+

+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

int: A cluster number.

+
+

+
+
+
    +
  1. +

    O'Callaghan et al. (2002). Streaming-data algorithms for high-quality clustering. + In Proceedings 18th International Conference on Data Engineering, Feb 26 - March 1, + San Jose, CA, USA. DOI: 10.1109/ICDE.2002.994785. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/cluster/TextClust/index.html b/0.19.0/api/cluster/TextClust/index.html new file mode 100644 index 0000000000..8cd3246c14 --- /dev/null +++ b/0.19.0/api/cluster/TextClust/index.html @@ -0,0 +1,3652 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TextClust - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TextClust

+

textClust, a clustering algorithm for text data.

+

textClust 12 is a stream clustering algorithm for textual data that can identify and track topics over time in a stream of texts. The algorithm uses a widely popular two-phase clustering approach where the stream is first summarised in real-time.

+

The result is many small preliminary clusters in the stream called micro-clusters. Micro-clusters maintain enough information to update and efficiently calculate the cosine similarity between them over time, based on the TF-IDF vector of their texts. Upon request, the miro-clusters can be reclustered to generate the final result using any distance-based clustering algorithm, such as hierarchical clustering. To keep the micro-clusters up-to-date, our algorithm applies a fading strategy where micro-clusters that are not updated regularly lose relevance and are eventually removed.

+

Parameters

+
    +
  • +

    radius

    +

    Default0.3

    +

    Distance threshold to merge two micro-clusters. Must be within the range (0, 1]

    +
  • +
  • +

    fading_factor

    +

    Default0.0005

    +

    Fading factor of micro-clusters

    +
  • +
  • +

    tgap

    +

    Default100

    +

    Time between outlier removal

    +
  • +
  • +

    term_fading

    +

    DefaultTrue

    +

    Determines whether individual terms should also be faded

    +
  • +
  • +

    real_time_fading

    +

    DefaultTrue

    +

    Parameter that specifies whether natural time or the number of observations should be used for fading

    +
  • +
  • +

    micro_distance

    +

    Defaulttfidf_cosine_distance

    +

    Distance metric used for clustering macro-clusters

    +
  • +
  • +

    macro_distance

    +

    Defaulttfidf_cosine_distance

    +

    Distance metric used for clustering macro-clusters

    +
  • +
  • +

    num_macro

    +

    Default3

    +

    Number of macro clusters that should be identified during the reclustering phase

    +
  • +
  • +

    min_weight

    +

    Default0

    +

    Minimum weight of micro clusters to be used for reclustering

    +
  • +
  • +

    auto_r

    +

    DefaultFalse

    +

    Parameter that specifies if radius should be automatically updated

    +
  • +
  • +

    auto_merge

    +

    DefaultTrue

    +

    Determines, if close observations shall be merged together

    +
  • +
  • +

    sigma

    +

    Default1

    +

    Parameter that influences the automated trheshold adaption technique

    +
  • +
+

Attributes

+
    +
  • +

    micro_clusters

    +

    Micro-clusters generated by the algorithm. Micro-clusters are of type textclust.microcluster

    +
  • +
+

Examples

+

from river import compose
+from river import feature_extraction
+from river import metrics
+from river import cluster
+
+corpus = [
+   {"text":'This is the first document.',"idd":1, "cluster": 1, "cluster":1},
+   {"text":'This document is the second document.',"idd":2,"cluster": 1},
+   {"text":'And this is super unrelated.',"idd":3,"cluster": 2},
+   {"text":'Is this the first document?',"idd":4,"cluster": 1},
+   {"text":'This is super unrelated as well',"idd":5,"cluster": 2},
+   {"text":'Test text',"idd":6,"cluster": 5}
+]
+
+stopwords = [ 'stop', 'the', 'to', 'and', 'a', 'in', 'it', 'is', 'I']
+
+metric = metrics.AdjustedRand()
+
+model = compose.Pipeline(
+    feature_extraction.BagOfWords(lowercase=True, ngram_range=(1, 2), stop_words=stopwords),
+    cluster.TextClust(real_time_fading=False, fading_factor=0.001, tgap=100, auto_r=True,
+    radius=0.9)
+)
+
+for x in corpus:
+    y_pred = model.predict_one(x["text"])
+    y = x["cluster"]
+    metric = metric.update(y,y_pred)
+    model = model.learn_one(x["text"])
+
+print(metric)
+
+
AdjustedRand: -0.17647058823529413
+

+

Methods

+
+distances +
+
+get_assignment +
+
+get_macroclusters +
+
+learn_one +

Update the model with a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • t — defaults to None
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

Clusterer: self

+
+

+
+microcluster +
+
+predict_one +

Predicts the cluster number for a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • sample_weight — defaults to None
  • +
  • type — defaults to micro
  • +
+

Returns

+

int: A cluster number.

+
+

+
+showclusters +
+
+tfcontainer +
+
+updateMacroClusters +
+
+
+
    +
  1. +

    Assenmacher, D. und Trautmann, H. (2022). Textual One-Pass Stream Clustering with +Automated Distance Threshold Adaption. In: Asian Conference on Intelligent Information and +Database Systems (Accepted) 

    +
  2. +
  3. +

    Carnein, M., Assenmacher, D., Trautmann, H. (2017). Stream Clustering of Chat Messages with +Applications to Twitch Streams. In: Advances in Conceptual Modeling. ER 2017. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/River2SKLClassifier/index.html b/0.19.0/api/compat/River2SKLClassifier/index.html new file mode 100644 index 0000000000..d11fb585f8 --- /dev/null +++ b/0.19.0/api/compat/River2SKLClassifier/index.html @@ -0,0 +1,3619 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + River2SKLClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

River2SKLClassifier

+

Compatibility layer from River to scikit-learn for classification.

+

Parameters

+ +

Methods

+
+fit +

Fits to an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+get_metadata_routing +

Get metadata routing of this object.

+

Please check :ref:User Guide <metadata_routing> on how the routing mechanism works.

+

Returns

+

MetadataRequest

+
+

+
+get_params +

Get parameters for this estimator.

+

Parameters

+
    +
  • deep — defaults to True
  • +
+

Returns

+

dict

+
+

+
+partial_fit +

Fits incrementally on a portion of a dataset.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
  • classes — defaults to None
  • +
+

Returns

+

self

+
+

+
+predict +

Predicts the target of an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
+

Returns

+

Predicted target values for each row of X.

+
+

+
+predict_proba +

Predicts the target probability of an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
+

Returns

+

Predicted target values for each row of X.

+
+

+
+score +

Return the mean accuracy on the given test data and labels.

+

In multi-label classification, this is the subset accuracy which is a harsh metric since you require for each sample that each label set be correctly predicted.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

float

+
+

+
+set_params +

Set the parameters of this estimator.

+

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

+

Parameters

+
    +
  • params
  • +
+

Returns

+

estimator instance

+
+

+
+set_partial_fit_request +

Request metadata passed to the partial_fit method.

+

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works. The options for each parameter are: - True: metadata is requested, and passed to partial_fit if provided. The request is ignored if metadata is not provided. - False: metadata is not requested and the meta-estimator will not pass it to partial_fit. - None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. - str: metadata should be passed to the meta-estimator with this given alias instead of the original name. The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others. .. versionadded:: 1.3 .. note:: This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:pipeline.Pipeline. Otherwise it has no effect.

+

Parameters

+
    +
  • classesUnion[bool, NoneType, str] — defaults to $UNCHANGED$
  • +
+

Returns

+

River2SKLClassifier: object

+
+

+
+set_score_request +

Request metadata passed to the score method.

+

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works. The options for each parameter are: - True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided. - False: metadata is not requested and the meta-estimator will not pass it to score. - None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. - str: metadata should be passed to the meta-estimator with this given alias instead of the original name. The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others. .. versionadded:: 1.3 .. note:: This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:pipeline.Pipeline. Otherwise it has no effect.

+

Parameters

+
    +
  • sample_weightUnion[bool, NoneType, str] — defaults to $UNCHANGED$
  • +
+

Returns

+

River2SKLClassifier: object

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/River2SKLClusterer/index.html b/0.19.0/api/compat/River2SKLClusterer/index.html new file mode 100644 index 0000000000..da91ca2428 --- /dev/null +++ b/0.19.0/api/compat/River2SKLClusterer/index.html @@ -0,0 +1,3581 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + River2SKLClusterer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

River2SKLClusterer

+

Compatibility layer from River to scikit-learn for clustering.

+

Parameters

+ +

Methods

+
+fit +

Fits to an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
  • y — defaults to None
  • +
+

Returns

+

self

+
+

+
+fit_predict +

Perform clustering on X and returns cluster labels.

+

Parameters

+
    +
  • X
  • +
  • y — defaults to None
  • +
+

Returns

+

ndarray of shape (n_samples,), dtype=np.int64

+
+

+
+get_metadata_routing +

Get metadata routing of this object.

+

Please check :ref:User Guide <metadata_routing> on how the routing mechanism works.

+

Returns

+

MetadataRequest

+
+

+
+get_params +

Get parameters for this estimator.

+

Parameters

+
    +
  • deep — defaults to True
  • +
+

Returns

+

dict

+
+

+
+partial_fit +

Fits incrementally on a portion of a dataset.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict +

Predicts the target of an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
+

Returns

+

Transformed output.

+
+

+
+set_params +

Set the parameters of this estimator.

+

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

+

Parameters

+
    +
  • params
  • +
+

Returns

+

estimator instance

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/River2SKLRegressor/index.html b/0.19.0/api/compat/River2SKLRegressor/index.html new file mode 100644 index 0000000000..90ff2eec16 --- /dev/null +++ b/0.19.0/api/compat/River2SKLRegressor/index.html @@ -0,0 +1,3595 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + River2SKLRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

River2SKLRegressor

+

Compatibility layer from River to scikit-learn for regression.

+

Parameters

+ +

Methods

+
+fit +

Fits to an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+get_metadata_routing +

Get metadata routing of this object.

+

Please check :ref:User Guide <metadata_routing> on how the routing mechanism works.

+

Returns

+

MetadataRequest

+
+

+
+get_params +

Get parameters for this estimator.

+

Parameters

+
    +
  • deep — defaults to True
  • +
+

Returns

+

dict

+
+

+
+partial_fit +

Fits incrementally on a portion of a dataset.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict +

Predicts the target of an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
+

Returns

+

np.ndarray: Predicted target values for each row of X.

+
+

+
+score +

Return the coefficient of determination of the prediction.

+

The coefficient of determination :math:R^2 is defined as :math:(1 - \frac{u}{v}), where :math:u is the residual sum of squares ((y_true - y_pred)** 2).sum() and :math:v is the total sum of squares ((y_true - y_true.mean()) ** 2).sum(). The best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of y, disregarding the input features, would get a :math:R^2 score of 0.0.

+

Parameters

+
    +
  • X
  • +
  • y
  • +
  • sample_weight — defaults to None
  • +
+

Returns

+

float

+
+

+
+set_params +

Set the parameters of this estimator.

+

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

+

Parameters

+
    +
  • params
  • +
+

Returns

+

estimator instance

+
+

+
+set_score_request +

Request metadata passed to the score method.

+

Note that this method is only relevant if enable_metadata_routing=True (see :func:sklearn.set_config). Please see :ref:User Guide <metadata_routing> on how the routing mechanism works. The options for each parameter are: - True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided. - False: metadata is not requested and the meta-estimator will not pass it to score. - None: metadata is not requested, and the meta-estimator will raise an error if the user provides it. - str: metadata should be passed to the meta-estimator with this given alias instead of the original name. The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others. .. versionadded:: 1.3 .. note:: This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a :class:pipeline.Pipeline. Otherwise it has no effect.

+

Parameters

+
    +
  • sample_weightUnion[bool, NoneType, str] — defaults to $UNCHANGED$
  • +
+

Returns

+

River2SKLRegressor: object

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/River2SKLTransformer/index.html b/0.19.0/api/compat/River2SKLTransformer/index.html new file mode 100644 index 0000000000..33a123792a --- /dev/null +++ b/0.19.0/api/compat/River2SKLTransformer/index.html @@ -0,0 +1,3595 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + River2SKLTransformer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

River2SKLTransformer

+

Compatibility layer from River to scikit-learn for transformation.

+

Parameters

+ +

Methods

+
+fit +

Fits to an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
  • y — defaults to None
  • +
+

Returns

+

self

+
+

+
+fit_transform +

Fit to data, then transform it.

+

Fits transformer to X and y with optional parameters fit_params and returns a transformed version of X.

+

Parameters

+
    +
  • X
  • +
  • y — defaults to None
  • +
  • fit_params
  • +
+

Returns

+

ndarray array of shape (n_samples, n_features_new)

+
+

+
+get_metadata_routing +

Get metadata routing of this object.

+

Please check :ref:User Guide <metadata_routing> on how the routing mechanism works.

+

Returns

+

MetadataRequest

+
+

+
+get_params +

Get parameters for this estimator.

+

Parameters

+
    +
  • deep — defaults to True
  • +
+

Returns

+

dict

+
+

+
+partial_fit +

Fits incrementally on a portion of a dataset.

+

Parameters

+
    +
  • X
  • +
  • y — defaults to None
  • +
+

Returns

+

self

+
+

+
+set_output +

Set output container.

+

See :ref:sphx_glr_auto_examples_miscellaneous_plot_set_output.py for an example on how to use the API.

+

Parameters

+
    +
  • transform — defaults to None
  • +
+

Returns

+

estimator instance

+
+

+
+set_params +

Set the parameters of this estimator.

+

The method works on simple estimators as well as on nested objects (such as :class:~sklearn.pipeline.Pipeline). The latter have parameters of the form <component>__<parameter> so that it's possible to update each component of a nested object.

+

Parameters

+
    +
  • params
  • +
+

Returns

+

estimator instance

+
+

+
+transform +

Predicts the target of an entire dataset contained in memory.

+

Parameters

+
    +
  • X
  • +
+

Returns

+

Transformed output.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/SKL2RiverClassifier/index.html b/0.19.0/api/compat/SKL2RiverClassifier/index.html new file mode 100644 index 0000000000..704deb80d7 --- /dev/null +++ b/0.19.0/api/compat/SKL2RiverClassifier/index.html @@ -0,0 +1,3596 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SKL2RiverClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SKL2RiverClassifier

+

Compatibility layer from scikit-learn to River for classification.

+

Parameters

+
    +
  • +

    estimator

    +

    Typesklearn_base.ClassifierMixin

    +

    A scikit-learn regressor which has a partial_fit method.

    +
  • +
  • +

    classes

    +

    Typelist

    +
  • +
+

Examples

+

from river import compat
+from river import evaluate
+from river import metrics
+from river import preprocessing
+from river import stream
+from sklearn import linear_model
+from sklearn import datasets
+
+dataset = stream.iter_sklearn_dataset(
+    dataset=datasets.load_breast_cancer(),
+    shuffle=True,
+    seed=42
+)
+
+model = preprocessing.StandardScaler()
+model |= compat.convert_sklearn_to_river(
+    estimator=linear_model.SGDClassifier(
+        loss='log_loss',
+        eta0=0.01,
+        learning_rate='constant'
+    ),
+    classes=[False, True]
+)
+
+metric = metrics.LogLoss()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
LogLoss: 0.198029
+

+

Methods

+
+learn_many +
+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_many +
+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_many +
+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/SKL2RiverRegressor/index.html b/0.19.0/api/compat/SKL2RiverRegressor/index.html new file mode 100644 index 0000000000..7c10f1d241 --- /dev/null +++ b/0.19.0/api/compat/SKL2RiverRegressor/index.html @@ -0,0 +1,3572 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SKL2RiverRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SKL2RiverRegressor

+

Compatibility layer from scikit-learn to River for regression.

+

Parameters

+
    +
  • +

    estimator

    +

    Typesklearn_base.BaseEstimator

    +

    A scikit-learn transformer which has a partial_fit method.

    +
  • +
+

Examples

+

from river import compat
+from river import evaluate
+from river import metrics
+from river import preprocessing
+from river import stream
+from sklearn import linear_model
+from sklearn import datasets
+
+dataset = stream.iter_sklearn_dataset(
+    dataset=datasets.load_diabetes(),
+    shuffle=True,
+    seed=42
+)
+
+scaler = preprocessing.StandardScaler()
+sgd_reg = compat.convert_sklearn_to_river(linear_model.SGDRegressor())
+model = scaler | sgd_reg
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 84.501421
+

+

Methods

+
+learn_many +
+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_many +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/convert-river-to-sklearn/index.html b/0.19.0/api/compat/convert-river-to-sklearn/index.html new file mode 100644 index 0000000000..1d771f58ce --- /dev/null +++ b/0.19.0/api/compat/convert-river-to-sklearn/index.html @@ -0,0 +1,3488 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + convert_river_to_sklearn - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

convert_river_to_sklearn

+

Wraps a river estimator to make it compatible with scikit-learn.

+

Parameters

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compat/convert-sklearn-to-river/index.html b/0.19.0/api/compat/convert-sklearn-to-river/index.html new file mode 100644 index 0000000000..610c080843 --- /dev/null +++ b/0.19.0/api/compat/convert-sklearn-to-river/index.html @@ -0,0 +1,3494 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + convert_sklearn_to_river - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

convert_sklearn_to_river

+

Wraps a scikit-learn estimator to make it compatible with river.

+

Parameters

+
    +
  • +

    estimator

    +

    Typesklearn_base.BaseEstimator

    +
  • +
  • +

    classes

    +

    Typelist | None

    +

    DefaultNone

    +

    Class names necessary for classifiers.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Discard/index.html b/0.19.0/api/compose/Discard/index.html new file mode 100644 index 0000000000..130c789cd2 --- /dev/null +++ b/0.19.0/api/compose/Discard/index.html @@ -0,0 +1,3664 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Discard - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Discard

+

Removes features.

+

This can be used in a pipeline when you want to remove certain features. The transform_one method is pure, and therefore returns a fresh new dictionary instead of removing the specified keys from the input.

+

Parameters

+
    +
  • +

    keys

    +

    Typetuple[base.typing.FeatureName]

    +

    Key(s) to discard.

    +
  • +
+

Examples

+

from river import compose
+
+x = {'a': 42, 'b': 12, 'c': 13}
+compose.Discard('a', 'b').transform_one(x)
+
+
{'c': 13}
+

+

You can chain a discarder with any estimator in order to apply said estimator to the +desired features.

+

from river import feature_extraction as fx
+
+x = {'sales': 10, 'shop': 'Ikea', 'country': 'Sweden'}
+
+pipeline = (
+    compose.Discard('shop', 'country') |
+    fx.PolynomialExtender()
+)
+pipeline.transform_one(x)
+
+
{'sales': 10, 'sales*sales': 100}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/FuncTransformer/index.html b/0.19.0/api/compose/FuncTransformer/index.html new file mode 100644 index 0000000000..0178fc91d6 --- /dev/null +++ b/0.19.0/api/compose/FuncTransformer/index.html @@ -0,0 +1,3726 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FuncTransformer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FuncTransformer

+

Wraps a function to make it usable in a pipeline.

+

There is often a need to apply an arbitrary transformation to a set of features. For instance, this could involve parsing a date and then extracting the hour from said date. If you're processing a stream of data, then you can do this yourself by calling the necessary code at your leisure. On the other hand, if you want to do this as part of a pipeline, then you need to follow a simple convention.

+

To use a function as part of a pipeline, take as input a dict of features and output a dict. Once you have initialized this class with your function, then you can use it like you would use any other (unsupervised) transformer.

+

It is up to you if you want your function to be pure or not. By pure we refer to a function that doesn't modify its input. However, we recommend writing pure functions because this reduces the chances of inserting bugs into your pipeline.

+

Parameters

+
    +
  • +

    func

    +

    Typetyping.Callable[[dict], dict]

    +

    A function that takes as input a dict and outputs a dict.

    +
  • +
+

Examples

+

from pprint import pprint
+import datetime as dt
+from river import compose
+
+x = {'date': '2019-02-14'}
+
+def parse_date(x):
+    date = dt.datetime.strptime(x['date'], '%Y-%m-%d')
+    x['is_weekend'] = date.day in (5, 6)
+    x['hour'] = date.hour
+    return x
+
+t = compose.FuncTransformer(parse_date)
+pprint(t.transform_one(x))
+
+
{'date': '2019-02-14', 'hour': 0, 'is_weekend': False}
+

+

The above example is not pure because it modifies the input. The following example is pure +and produces the same output:

+

def parse_date(x):
+    date = dt.datetime.strptime(x['date'], '%Y-%m-%d')
+    return {'is_weekend': date.day in (5, 6), 'hour': date.hour}
+
+t = compose.FuncTransformer(parse_date)
+pprint(t.transform_one(x))
+
+
{'hour': 0, 'is_weekend': False}
+

+

The previous example doesn't include the date feature because it returns a new dict. +However, a common usecase is to add a feature to an existing set of features. You can do +this in a pure way by unpacking the input dict into the output dict:

+

def parse_date(x):
+    date = dt.datetime.strptime(x['date'], '%Y-%m-%d')
+    return {'is_weekend': date.day in (5, 6), 'hour': date.hour, **x}
+
+t = compose.FuncTransformer(parse_date)
+pprint(t.transform_one(x))
+
+
{'date': '2019-02-14', 'hour': 0, 'is_weekend': False}
+

+

You can add FuncTransformer to a pipeline just like you would with any other transformer.

+

from river import naive_bayes
+
+pipeline = compose.FuncTransformer(parse_date) | naive_bayes.MultinomialNB()
+pipeline
+
+
Pipeline (
+  FuncTransformer (
+    func="parse_date"
+  ),
+  MultinomialNB (
+    alpha=1.
+  )
+)
+

+

If you provide a function without wrapping it, then the pipeline will do it for you:

+
pipeline = parse_date | naive_bayes.MultinomialNB()
+
+

Methods

+
+learn_many +

Update with a mini-batch of features.

+

A lot of transformers don't actually have to do anything during the learn_many step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_many can override this method.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

Transformer: self

+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Transform a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A new DataFrame.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Grouper/index.html b/0.19.0/api/compose/Grouper/index.html new file mode 100644 index 0000000000..8a837dd672 --- /dev/null +++ b/0.19.0/api/compose/Grouper/index.html @@ -0,0 +1,3632 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Grouper - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Grouper

+

Applies a transformer within different groups.

+

This transformer allows you to split your data into groups and apply a transformer within each group. This happens in a streaming manner, which means that the groups are discovered online. A separate copy of the provided transformer is made whenever a new group appears. The groups are defined according to one or more keys.

+

Parameters

+
    +
  • +

    transformer

    +

    Typebase.Transformer

    +
  • +
  • +

    by

    +

    Typebase.typing.FeatureName | list[base.typing.FeatureName]

    +

    The field on which to group the data. This can either by a single value, or a list of values.

    +
  • +
+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Pipeline/index.html b/0.19.0/api/compose/Pipeline/index.html new file mode 100644 index 0000000000..e6d4928813 --- /dev/null +++ b/0.19.0/api/compose/Pipeline/index.html @@ -0,0 +1,3855 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Pipeline - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Pipeline

+

A pipeline of estimators.

+

Pipelines allow you to chain different steps into a sequence. Typically, when doing supervised learning, a pipeline contains one ore more transformation steps, whilst it's is a regressor or a classifier. It is highly recommended to use pipelines with River. Indeed, in an online learning setting, it is very practical to have a model defined as a single object. Take a look at the user guide for further information and practical examples.

+

One special thing to take notice to is the way transformers are handled. It is usual to predict something for a sample and wait for the ground truth to arrive. In such a scenario, the features are seen before the ground truth arrives. Therefore, the unsupervised parts of the pipeline are updated when predict_one and predict_proba_one are called. Usually the unsupervised parts of the pipeline are all the steps that precede the final step, which is a supervised model. However, some transformers are supervised and are therefore also updated during calls to learn_one.

+

Parameters

+
    +
  • +

    steps

    +

    Ideally, a list of (name, estimator) tuples. A name is automatically inferred if none is provided.

    +
  • +
+

Examples

+

The recommended way to declare a pipeline is to use the | operator. The latter allows you +to chain estimators in a very terse manner:

+
from river import linear_model
+from river import preprocessing
+
+scaler = preprocessing.StandardScaler()
+log_reg = linear_model.LinearRegression()
+model = scaler | log_reg
+
+

This results in a pipeline that stores each step inside a dictionary.

+

model
+
+
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LinearRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=Squared ()
+    l2=0.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+

+

You can access parts of a pipeline in the same manner as a dictionary:

+

model['LinearRegression']
+
+
LinearRegression (
+  optimizer=SGD (
+    lr=Constant (
+      learning_rate=0.01
+    )
+  )
+  loss=Squared ()
+  l2=0.
+  l1=0.
+  intercept_init=0.
+  intercept_lr=Constant (
+    learning_rate=0.01
+  )
+  clip_gradient=1e+12
+  initializer=Zeros ()
+)
+

+

Note that you can also declare a pipeline by using the compose.Pipeline constructor +method, which is slightly more verbose:

+
from river import compose
+
+model = compose.Pipeline(scaler, log_reg)
+
+

By using a compose.TransformerUnion, you can define complex pipelines that apply +different steps to different parts of the data. For instance, we can extract word counts +from text data, and extract polynomial features from numeric data.

+
from river import feature_extraction as fx
+
+tfidf = fx.TFIDF('text')
+counts = fx.BagOfWords('text')
+text_part = compose.Select('text') | (tfidf + counts)
+
+num_part = compose.Select('a', 'b') | fx.PolynomialExtender()
+
+model = text_part + num_part
+model |= preprocessing.StandardScaler()
+model |= linear_model.LinearRegression()
+
+

The following shows an example of using debug_one to visualize how the information +flows and changes throughout the pipeline.

+

from river import compose
+from river import naive_bayes
+
+dataset = [
+    ('A positive comment', True),
+    ('A negative comment', False),
+    ('A happy comment', True),
+    ('A lovely comment', True),
+    ('A harsh comment', False)
+]
+
+tfidf = fx.TFIDF() | compose.Prefixer('tfidf_')
+counts = fx.BagOfWords() | compose.Prefixer('count_')
+mnb = naive_bayes.MultinomialNB()
+model = (tfidf + counts) | mnb
+
+for x, y in dataset:
+    model = model.learn_one(x, y)
+
+x = dataset[0][0]
+report = model.debug_one(dataset[0][0])
+print(report)
+
+
0. Input
+--------
+A positive comment
+1. Transformer union
+--------------------
+    1.0 TFIDF | Prefixer
+    --------------------
+    tfidf_comment: 0.43017 (float)
+    tfidf_positive: 0.90275 (float)
+    1.1 BagOfWords | Prefixer
+    -------------------------
+    count_comment: 1 (int)
+    count_positive: 1 (int)
+count_comment: 1 (int)
+count_positive: 1 (int)
+tfidf_comment: 0.43017 (float)
+tfidf_positive: 0.90275 (float)
+2. MultinomialNB
+----------------
+False: 0.19221
+True: 0.80779
+

+

Methods

+
+debug_one +

Displays the state of a set of features as it goes through the pipeline.

+

Parameters

+
    +
  • x'dict'
  • +
  • show_types — defaults to True
  • +
  • n_decimals — defaults to 5
  • +
+
+

+
+forecast +

Return a forecast.

+

Only works if each estimator has a transform_one method and the final estimator has a forecast method. This is the case of time series models from the time_series module.

+

Parameters

+
    +
  • horizon'int'
  • +
  • xs'list[dict] | None' — defaults to None
  • +
+
+

+
+learn_many +

Fit to a mini-batch.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series | None' — defaults to None
  • +
  • params
  • +
+
+

+
+learn_one +

Fit to a single instance.

+

Parameters

+
    +
  • x'dict'
  • +
  • y — defaults to None
  • +
  • params
  • +
+
+

+
+predict_many +

Call transform_many, and then predict_many on the final step.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+predict_one +

Call transform_one on the first steps and predict_one on the last step.

+

Parameters

+
    +
  • x'dict'
  • +
  • params
  • +
+
+

+
+predict_proba_many +

Call transform_many, and then predict_proba_many on the final step.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+predict_proba_one +

Call transform_one on the first steps and predict_proba_one on the last step.

+

Parameters

+
    +
  • x'dict'
  • +
  • params
  • +
+
+

+
+score_one +

Call transform_one on the first steps and score_one on the last step.

+

Parameters

+
    +
  • x'dict'
  • +
  • params
  • +
+
+

+
+transform_many +

Apply each transformer in the pipeline to some features.

+

The final step in the pipeline will be applied if it is a transformer. If not, then it will be ignored and the output from the penultimate step will be returned. Note that the steps that precede the final step are assumed to all be transformers.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+transform_one +

Apply each transformer in the pipeline to some features.

+

The final step in the pipeline will be applied if it is a transformer. If not, then it will be ignored and the output from the penultimate step will be returned. Note that the steps that precede the final step are assumed to all be transformers.

+

Parameters

+
    +
  • x'dict'
  • +
  • params
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Prefixer/index.html b/0.19.0/api/compose/Prefixer/index.html new file mode 100644 index 0000000000..d5f49a5d34 --- /dev/null +++ b/0.19.0/api/compose/Prefixer/index.html @@ -0,0 +1,3648 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Prefixer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Prefixer

+

Prepends a prefix on features names.

+

Parameters

+
    +
  • +

    prefix

    +

    Typestr

    +
  • +
+

Examples

+

from river import compose
+
+x = {'a': 42, 'b': 12}
+compose.Prefixer('prefix_').transform_one(x)
+
+
{'prefix_a': 42, 'prefix_b': 12}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Renamer/index.html b/0.19.0/api/compose/Renamer/index.html new file mode 100644 index 0000000000..c9943e91eb --- /dev/null +++ b/0.19.0/api/compose/Renamer/index.html @@ -0,0 +1,3650 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Renamer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Renamer

+

Renames features following substitution rules.

+

Parameters

+
    +
  • +

    mapping

    +

    Typedict[str, str]

    +

    Dictionnary describing substitution rules. Keys in mapping that are not a feature's name are silently ignored.

    +
  • +
+

Examples

+

from river import compose
+
+mapping = {'a': 'v', 'c': 'o'}
+x = {'a': 42, 'b': 12}
+compose.Renamer(mapping).transform_one(x)
+
+
{'b': 12, 'v': 42}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Select/index.html b/0.19.0/api/compose/Select/index.html new file mode 100644 index 0000000000..d53187b85f --- /dev/null +++ b/0.19.0/api/compose/Select/index.html @@ -0,0 +1,3717 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Select - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Select

+

Selects features.

+

This can be used in a pipeline when you want to select certain features. The transform_one method is pure, and therefore returns a fresh new dictionary instead of filtering the specified keys from the input.

+

Parameters

+
    +
  • +

    keys

    +

    Typetuple[base.typing.FeatureName]

    +

    Key(s) to keep.

    +
  • +
+

Examples

+

from river import compose
+
+x = {'a': 42, 'b': 12, 'c': 13}
+compose.Select('c').transform_one(x)
+
+
{'c': 13}
+

+

You can chain a selector with any estimator in order to apply said estimator to the +desired features.

+

from river import feature_extraction as fx
+
+x = {'sales': 10, 'shop': 'Ikea', 'country': 'Sweden'}
+
+pipeline = (
+    compose.Select('sales') |
+    fx.PolynomialExtender()
+)
+pipeline.transform_one(x)
+
+
{'sales': 10, 'sales*sales': 100}
+

+

This transformer also supports mini-batch processing:

+

import random
+from river import compose
+
+random.seed(42)
+X = [{"x_1": random.uniform(8, 12), "x_2": random.uniform(8, 12)} for _ in range(6)]
+for x in X:
+    print(x)
+
+
{'x_1': 10.557707193831535, 'x_2': 8.100043020890668}
+{'x_1': 9.100117273476478, 'x_2': 8.892842952595291}
+{'x_1': 10.94588485665605, 'x_2': 10.706797949691644}
+{'x_1': 11.568718270819382, 'x_2': 8.347755330517664}
+{'x_1': 9.687687278741082, 'x_2': 8.119188877752281}
+{'x_1': 8.874551899214413, 'x_2': 10.021421152413449}
+

+
import pandas as pd
+X = pd.DataFrame.from_dict(X)
+
+

You can then call transform_many to transform a mini-batch of features:

+

compose.Select('x_2').transform_many(X)
+
+
    x_2
+0   8.100043
+1   8.892843
+2  10.706798
+3   8.347755
+4   8.119189
+5  10.021421
+

+

Methods

+
+learn_many +

Update with a mini-batch of features.

+

A lot of transformers don't actually have to do anything during the learn_many step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_many can override this method.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

Transformer: self

+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Transform a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A new DataFrame.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/SelectType/index.html b/0.19.0/api/compose/SelectType/index.html new file mode 100644 index 0000000000..c19b631dde --- /dev/null +++ b/0.19.0/api/compose/SelectType/index.html @@ -0,0 +1,3652 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SelectType - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SelectType

+

Selects features based on their type.

+

This is practical when you want to apply different preprocessing steps to different kinds of features. For instance, a common usecase is to apply a preprocessing.StandardScaler to numeric features and a preprocessing.OneHotEncoder to categorical features.

+

Parameters

+
    +
  • +

    types

    +

    Typetuple[type]

    +

    Python types which you want to select. Under the hood, the isinstance method will be used to check if a value is of a given type.

    +
  • +
+

Examples

+
import numbers
+from river import compose
+from river import linear_model
+from river import preprocessing
+
+num = compose.SelectType(numbers.Number) | preprocessing.StandardScaler()
+cat = compose.SelectType(str) | preprocessing.OneHotEncoder()
+model = (num + cat) | linear_model.LogisticRegression()
+
+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/Suffixer/index.html b/0.19.0/api/compose/Suffixer/index.html new file mode 100644 index 0000000000..921ddca80d --- /dev/null +++ b/0.19.0/api/compose/Suffixer/index.html @@ -0,0 +1,3648 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Suffixer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Suffixer

+

Appends a suffix on features names.

+

Parameters

+
    +
  • +

    suffix

    +

    Typestr

    +
  • +
+

Examples

+

from river import compose
+
+x = {'a': 42, 'b': 12}
+compose.Suffixer('_suffix').transform_one(x)
+
+
{'a_suffix': 42, 'b_suffix': 12}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/TargetTransformRegressor/index.html b/0.19.0/api/compose/TargetTransformRegressor/index.html new file mode 100644 index 0000000000..67a0d28064 --- /dev/null +++ b/0.19.0/api/compose/TargetTransformRegressor/index.html @@ -0,0 +1,3676 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TargetTransformRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TargetTransformRegressor

+

Modifies the target before training.

+

The user is expected to check that func and inverse_func are coherent with each other.

+

Parameters

+
    +
  • +

    regressor

    +

    Typebase.Regressor

    +

    Regression model to wrap.

    +
  • +
  • +

    func

    +

    Typetyping.Callable

    +

    A function modifying the target before training.

    +
  • +
  • +

    inverse_func

    +

    Typetyping.Callable

    +

    A function to return to the target's original space.

    +
  • +
+

Examples

+

import math
+from river import compose
+from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+model = (
+    preprocessing.StandardScaler() |
+    compose.TargetTransformRegressor(
+        regressor=linear_model.LinearRegression(intercept_lr=0.15),
+        func=math.log,
+        inverse_func=math.exp
+    )
+)
+metric = metrics.MSE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MSE: 10.999752
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/TransformerProduct/index.html b/0.19.0/api/compose/TransformerProduct/index.html new file mode 100644 index 0000000000..ab71587f2c --- /dev/null +++ b/0.19.0/api/compose/TransformerProduct/index.html @@ -0,0 +1,3690 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TransformerProduct - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TransformerProduct

+

Computes interactions between the outputs of a set transformers.

+

This is for when you want to add interaction terms between groups of features. It may also be used an alternative to feature_extraction.PolynomialExtender when the latter is overkill.

+

Parameters

+
    +
  • +

    transformers

    +

    Ideally, a list of (name, estimator) tuples. A name is automatically inferred if none is provided.

    +
  • +
+

Examples

+

Let's say we have a certain set of features with two groups. In practice these may be different +namespaces, such one for items and the other for users.

+
x = dict(
+    a=0, b=1,  # group 1
+    x=2, y=3   # group 2
+)
+
+

We might want to add interaction terms between groups ('a', 'b') and ('x', 'y'), as so:

+

from pprint import pprint
+from river.compose import Select, TransformerProduct
+
+product = TransformerProduct(
+    Select('a', 'b'),
+    Select('x', 'y')
+)
+pprint(product.transform_one(x))
+
+
{'a*x': 0, 'a*y': 0, 'b*x': 2, 'b*y': 3}
+

+

This can also be done with the following shorthand:

+

product = Select('a', 'b') * Select('x', 'y')
+pprint(product.transform_one(x))
+
+
{'a*x': 0, 'a*y': 0, 'b*x': 2, 'b*y': 3}
+

+

If you want to include the original terms, you can do something like this:

+

group_1 = Select('a', 'b')
+group_2 = Select('x', 'y')
+product = group_1 + group_2 + group_1 * group_2
+pprint(product.transform_one(x))
+
+
{'a': 0, 'a*x': 0, 'a*y': 0, 'b': 1, 'b*x': 2, 'b*y': 3, 'x': 2, 'y': 3}
+

+

Methods

+
+learn_many +

Update each transformer.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series | None' — defaults to None
  • +
+
+

+
+learn_one +

Update each transformer.

+

Parameters

+
    +
  • x'dict'
  • +
  • y — defaults to None
  • +
+
+

+
+transform_many +

Passes the data through each transformer and packs the results together.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+transform_one +

Passes the data through each transformer and packs the results together.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/TransformerUnion/index.html b/0.19.0/api/compose/TransformerUnion/index.html new file mode 100644 index 0000000000..02c98f9b8a --- /dev/null +++ b/0.19.0/api/compose/TransformerUnion/index.html @@ -0,0 +1,3771 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TransformerUnion - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TransformerUnion

+

Packs multiple transformers into a single one.

+

Pipelines allow you to apply steps sequentially. Therefore, the output of a step becomes the input of the next one. In many cases, you may want to pass the output of a step to multiple steps. This simple transformer allows you to do so. In other words, it enables you to apply particular steps to different parts of an input. A typical example is when you want to scale numeric features and one-hot encode categorical features.

+

This transformer is essentially a list of transformers. Whenever it is updated, it loops through each transformer and updates them. Meanwhile, calling transform_one collects the output of each transformer and merges them into a single dictionary.

+

Parameters

+
    +
  • +

    transformers

    +

    Ideally, a list of (name, estimator) tuples. A name is automatically inferred if none is provided.

    +
  • +
+

Examples

+

Take the following dataset:

+
X = [
+    {'place': 'Taco Bell', 'revenue': 42},
+    {'place': 'Burger King', 'revenue': 16},
+    {'place': 'Burger King', 'revenue': 24},
+    {'place': 'Taco Bell', 'revenue': 58},
+    {'place': 'Burger King', 'revenue': 20},
+    {'place': 'Taco Bell', 'revenue': 50}
+]
+
+

As an example, let's assume we want to compute two aggregates of a dataset. We therefore +define two feature_extraction.Aggs and initialize a TransformerUnion with them:

+
from river import compose
+from river import feature_extraction
+from river import stats
+
+mean = feature_extraction.Agg(
+    on='revenue', by='place',
+    how=stats.Mean()
+)
+count = feature_extraction.Agg(
+    on='revenue', by='place',
+    how=stats.Count()
+)
+agg = compose.TransformerUnion(mean, count)
+
+

We can now update each transformer and obtain their output with a single function call:

+

from pprint import pprint
+for x in X:
+    agg = agg.learn_one(x)
+    pprint(agg.transform_one(x))
+
+
{'revenue_count_by_place': 1, 'revenue_mean_by_place': 42.0}
+{'revenue_count_by_place': 1, 'revenue_mean_by_place': 16.0}
+{'revenue_count_by_place': 2, 'revenue_mean_by_place': 20.0}
+{'revenue_count_by_place': 2, 'revenue_mean_by_place': 50.0}
+{'revenue_count_by_place': 3, 'revenue_mean_by_place': 20.0}
+{'revenue_count_by_place': 3, 'revenue_mean_by_place': 50.0}
+

+

Note that you can use the + operator as a shorthand notation:

+

agg = mean + count

+

This allows you to build complex pipelines in a very terse manner. For instance, we can +create a pipeline that scales each feature and fits a logistic regression as so:

+
from river import linear_model as lm
+from river import preprocessing as pp
+
+model = (
+    (mean + count) |
+    pp.StandardScaler() |
+    lm.LogisticRegression()
+)
+
+

Whice is equivalent to the following code:

+
model = compose.Pipeline(
+    compose.TransformerUnion(mean, count),
+    pp.StandardScaler(),
+    lm.LogisticRegression()
+)
+
+

Note that you access any part of a TransformerUnion by name:

+

model['TransformerUnion']['Agg']
+
+
Agg (
+    on="revenue"
+    by=['place']
+    how=Mean ()
+)
+

+

model['TransformerUnion']['Agg1']
+
+
Agg (
+    on="revenue"
+    by=['place']
+    how=Count ()
+)
+

+

You can also manually provide a name for each step:

+
agg = compose.TransformerUnion(
+    ('Mean revenue by place', mean),
+    ('# by place', count)
+)
+
+

Mini-batch example:

+
X = pd.DataFrame([
+    {"place": 2, "revenue": 42},
+    {"place": 3, "revenue": 16},
+    {"place": 3, "revenue": 24},
+    {"place": 2, "revenue": 58},
+    {"place": 3, "revenue": 20},
+    {"place": 2, "revenue": 50},
+])
+
+

Since we need a transformer with mini-batch support to demonstrate, we shall use +a StandardScaler.

+

from river import compose
+from river import preprocessing
+
+agg = (
+    compose.Select("place") +
+    (compose.Select("revenue") | preprocessing.StandardScaler())
+)
+
+_ = agg.learn_many(X)
+agg.transform_many(X)
+
+
   place   revenue
+0      2  0.441250
+1      3 -1.197680
+2      3 -0.693394
+3      2  1.449823
+4      3 -0.945537
+5      2  0.945537
+

+

Methods

+
+learn_many +

Update each transformer.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series | None' — defaults to None
  • +
+
+

+
+learn_one +

Update each transformer.

+

Parameters

+
    +
  • x'dict'
  • +
  • y — defaults to None
  • +
+
+

+
+transform_many +

Passes the data through each transformer and packs the results together.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+transform_one +

Passes the data through each transformer and packs the results together.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/compose/learn-during-predict/index.html b/0.19.0/api/compose/learn-during-predict/index.html new file mode 100644 index 0000000000..240ae9b6e6 --- /dev/null +++ b/0.19.0/api/compose/learn-during-predict/index.html @@ -0,0 +1,3662 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + learn_during_predict - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

learn_during_predict

+

A context manager for fitting unsupervised steps during prediction.

+

Usually, unsupervised parts of a pipeline are updated during learn_one. However, in the case of online learning, it is possible to update them before, during the prediction step. This context manager allows you to do so.

+

This usually brings a slight performance improvement. But it is not done by default because it is not intuitive and is more difficult to test. It also means that you have to call predict_one before learn_one in order for the whole pipeline to be updated.

+

Examples

+

Let's first see what methods are called if we just call predict_one.

+

import io
+import logging
+from river import compose
+from river import datasets
+from river import linear_model
+from river import preprocessing
+from river import utils
+
+model = compose.Pipeline(
+    preprocessing.StandardScaler(),
+    linear_model.LinearRegression()
+)
+
+class_condition = lambda x: x.__class__.__name__ in ('StandardScaler', 'LinearRegression')
+
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+logs = io.StringIO()
+sh = logging.StreamHandler(logs)
+sh.setLevel(logging.DEBUG)
+logger.addHandler(sh)
+
+with utils.log_method_calls(class_condition):
+    for x, y in datasets.TrumpApproval().take(1):
+        _ = model.predict_one(x)
+
+print(logs.getvalue())
+
+
StandardScaler.transform_one
+LinearRegression.predict_one
+

+

Now let's use the context manager and see what methods get called.

+

logs = io.StringIO()
+sh = logging.StreamHandler(logs)
+sh.setLevel(logging.DEBUG)
+logger.addHandler(sh)
+
+with utils.log_method_calls(class_condition), compose.learn_during_predict():
+    for x, y in datasets.TrumpApproval().take(1):
+        _ = model.predict_one(x)
+
+print(logs.getvalue())
+
+
StandardScaler.learn_one
+StandardScaler.transform_one
+LinearRegression.predict_one
+

+

We can see that the scaler did not get updated before transforming the data.

+

This also works when working with mini-batches.

+

logs = io.StringIO()
+sh = logging.StreamHandler(logs)
+sh.setLevel(logging.DEBUG)
+logger.addHandler(sh)
+
+with utils.log_method_calls(class_condition):
+    for x, y in datasets.TrumpApproval().take(1):
+        _ = model.predict_many(pd.DataFrame([x]))
+print(logs.getvalue())
+
+
StandardScaler.transform_many
+LinearRegression.predict_many
+

+

logs = io.StringIO()
+sh = logging.StreamHandler(logs)
+sh.setLevel(logging.DEBUG)
+logger.addHandler(sh)
+
+with utils.log_method_calls(class_condition), compose.learn_during_predict():
+    for x, y in datasets.TrumpApproval().take(1):
+        _ = model.predict_many(pd.DataFrame([x]))
+print(logs.getvalue())
+
+
StandardScaler.learn_many
+StandardScaler.transform_many
+LinearRegression.predict_many
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/conf/Interval/index.html b/0.19.0/api/conf/Interval/index.html new file mode 100644 index 0000000000..d4da34b2af --- /dev/null +++ b/0.19.0/api/conf/Interval/index.html @@ -0,0 +1,3400 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Interval - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Interval

+

An object to represent a (prediction) interval.

+

Users are not expected to use this class as-is. Instead, they should use the with_interval parameter of the predict_one method of any regressor or classifier wrapped with a conformal prediction method.

+

Parameters

+
    +
  • +

    lower

    +

    Typefloat

    +

    The lower bound of the interval.

    +
  • +
  • +

    upper

    +

    Typefloat

    +

    The upper bound of the interval.

    +
  • +
+

Attributes

+
    +
  • +

    center

    +

    The center of the interval.

    +
  • +
  • +

    width

    +

    The width of the interval.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/conf/RegressionJackknife/index.html b/0.19.0/api/conf/RegressionJackknife/index.html new file mode 100644 index 0000000000..daac1491e5 --- /dev/null +++ b/0.19.0/api/conf/RegressionJackknife/index.html @@ -0,0 +1,3486 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegressionJackknife - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RegressionJackknife

+

Jackknife method for regression.

+

This is a conformal prediction method for regression. It is based on the jackknife method. The idea is to compute the quantiles of the residuals of the regressor. The prediction interval is then computed as the prediction of the regressor plus the quantiles of the residuals.

+

This works naturally online, as the quantiles of the residuals are updated at each iteration. Each residual is produced before the regressor is updated, which ensures the predicted intervals are not optimistic.

+

Note that the produced intervals are marginal and not conditional. This means that the intervals are not adjusted for the features x. This is a limitation of the jackknife method. However, the jackknife method is very simple and efficient. It is also very robust to outliers.

+

Parameters

+
    +
  • +

    regressor

    +

    Typebase.Regressor

    +

    The regressor to be wrapped.

    +
  • +
  • +

    confidence_level

    +

    Typefloat

    +

    Default0.95

    +

    The confidence level of the prediction intervals.

    +
  • +
  • +

    window_size

    +

    Typeint | None

    +

    DefaultNone

    +

    The size of the window used to compute the quantiles of the residuals. If None, the quantiles are computed over the whole history. It is advised to set this if you expect the model's performance to change over time.

    +
  • +
+

Examples

+
from river import conf
+from river import datasets
+from river import linear_model
+from river import metrics
+from river import preprocessing
+from river import stats
+
+dataset = datasets.TrumpApproval()
+
+model = conf.RegressionJackknife(
+    (
+        preprocessing.StandardScaler() |
+        linear_model.LinearRegression(intercept_lr=.1)
+    ),
+    confidence_level=0.9
+)
+
+validity = stats.Mean()
+efficiency = stats.Mean()
+
+for x, y in dataset:
+    interval = model.predict_one(x, with_interval=True)
+    validity = validity.update(y in interval)
+    efficiency = efficiency.update(interval.width)
+    model = model.learn_one(x, y)
+
+

The interval's validity is the proportion of times the true value is within the interval. We +specified a confidence level of 90%, so we expect the validity to be around 90%.

+

validity
+
+
Mean: 0.939061
+

+

The interval's efficiency is the average width of the intervals.

+

efficiency
+
+
Mean: 4.078361
+

+

Lowering the confidence lowering will mechanically improve the efficiency.

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
  • with_interval — defaults to False
  • +
  • kwargs
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/covariance/EmpiricalCovariance/index.html b/0.19.0/api/covariance/EmpiricalCovariance/index.html new file mode 100644 index 0000000000..dfb5f8dfb3 --- /dev/null +++ b/0.19.0/api/covariance/EmpiricalCovariance/index.html @@ -0,0 +1,3492 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EmpiricalCovariance - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EmpiricalCovariance

+

Empirical covariance matrix.

+

Parameters

+
    +
  • +

    ddof

    +

    Default1

    +

    Delta Degrees of Freedom.

    +
  • +
+

Attributes

+
    +
  • matrix
  • +
+

Examples

+

import numpy as np
+import pandas as pd
+from river import covariance
+
+np.random.seed(42)
+X = pd.DataFrame(np.random.random((8, 3)), columns=["red", "green", "blue"])
+X
+
+
        red     green      blue
+0  0.374540  0.950714  0.731994
+1  0.598658  0.156019  0.155995
+2  0.058084  0.866176  0.601115
+3  0.708073  0.020584  0.969910
+4  0.832443  0.212339  0.181825
+5  0.183405  0.304242  0.524756
+6  0.431945  0.291229  0.611853
+7  0.139494  0.292145  0.366362
+

+

cov = covariance.EmpiricalCovariance()
+for x in X.to_dict(orient="records"):
+    cov = cov.update(x)
+cov
+
+
        blue     green    red
+ blue    0.076    0.020   -0.010
+green    0.020    0.113   -0.053
+  red   -0.010   -0.053    0.079
+

+

There is also an update_many method to process mini-batches. The results are identical.

+

cov = covariance.EmpiricalCovariance()
+cov = cov.update_many(X)
+cov
+
+
        blue     green    red
+ blue    0.076    0.020   -0.010
+green    0.020    0.113   -0.053
+  red   -0.010   -0.053    0.079
+

+

The covariances are stored in a dictionary, meaning any one of them can be accessed as such:

+

cov["blue", "green"]
+
+
Cov: 0.020292
+

+

Diagonal entries are variances:

+

cov["blue", "blue"]
+
+
Var: 0.076119
+

+

Methods

+
+revert +

Downdate with a single sample.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+
+update +

Update with a single sample.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+
+update_many +

Update with a dataframe of samples.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/covariance/EmpiricalPrecision/index.html b/0.19.0/api/covariance/EmpiricalPrecision/index.html new file mode 100644 index 0000000000..a57c2a6b11 --- /dev/null +++ b/0.19.0/api/covariance/EmpiricalPrecision/index.html @@ -0,0 +1,3462 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EmpiricalPrecision - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EmpiricalPrecision

+

Empirical precision matrix.

+

The precision matrix is the inverse of the covariance matrix.

+

This implementation leverages the Sherman-Morrison formula. The resulting inverse covariance matrix is not guaranteed to be identical to a batch computation. However, the difference shrinks with the number of observations.

+

Attributes

+
    +
  • matrix
  • +
+

Examples

+

import numpy as np
+import pandas as pd
+from river import covariance
+
+np.random.seed(42)
+X = pd.DataFrame(np.random.random((1000, 3)))
+X.head()
+
+
          0         1         2
+0  0.374540  0.950714  0.731994
+1  0.598658  0.156019  0.155995
+2  0.058084  0.866176  0.601115
+3  0.708073  0.020584  0.969910
+4  0.832443  0.212339  0.181825
+

+

prec = covariance.EmpiricalPrecision()
+for x in X.to_dict(orient="records"):
+    prec = prec.update(x)
+
+prec
+
+
    0        1        2
+0   12.026   -0.122   -0.214
+1   -0.122   11.276   -0.026
+2   -0.214   -0.026   11.632
+

+

pd.DataFrame(np.linalg.inv(np.cov(X.T, ddof=1)))
+
+
           0          1          2
+0  12.159791  -0.124966  -0.218671
+1  -0.124966  11.393394  -0.026662
+2  -0.218671  -0.026662  11.756907
+

+

Methods

+
+update +

Update with a single sample.

+

Parameters

+
    +
  • x
  • +
+
+

+
+update_many +

Update with a dataframe of samples.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/AirlinePassengers/index.html b/0.19.0/api/datasets/AirlinePassengers/index.html new file mode 100644 index 0000000000..83c4032103 --- /dev/null +++ b/0.19.0/api/datasets/AirlinePassengers/index.html @@ -0,0 +1,3946 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AirlinePassengers - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AirlinePassengers

+

Monthly number of international airline passengers.

+

The stream contains 144 items and only one single feature, which is the month. The goal is to predict the number of passengers each month by capturing the trend and the seasonality of the data.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Bananas/index.html b/0.19.0/api/datasets/Bananas/index.html new file mode 100644 index 0000000000..652157fef9 --- /dev/null +++ b/0.19.0/api/datasets/Bananas/index.html @@ -0,0 +1,3946 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bananas - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Bananas

+

Bananas dataset.

+

An artificial dataset where instances belongs to several clusters with a banana shape. There are two attributes that correspond to the x and y axis, respectively.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    OpenML page 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Bikes/index.html b/0.19.0/api/datasets/Bikes/index.html new file mode 100644 index 0000000000..4883810eeb --- /dev/null +++ b/0.19.0/api/datasets/Bikes/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bikes - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Bikes

+

Bike sharing station information from the city of Toulouse.

+

The goal is to predict the number of bikes in 5 different bike stations from the city of Toulouse.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/ChickWeights/index.html b/0.19.0/api/datasets/ChickWeights/index.html new file mode 100644 index 0000000000..f4123069ad --- /dev/null +++ b/0.19.0/api/datasets/ChickWeights/index.html @@ -0,0 +1,3946 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ChickWeights - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ChickWeights

+

Chick weights along time.

+

The stream contains 578 items and 3 features. The goal is to predict the weight of each chick along time, according to the diet the chick is on. The data is ordered by time and then by chick.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/CreditCard/index.html b/0.19.0/api/datasets/CreditCard/index.html new file mode 100644 index 0000000000..f61b53a59a --- /dev/null +++ b/0.19.0/api/datasets/CreditCard/index.html @@ -0,0 +1,3975 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CreditCard - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

CreditCard

+

Credit card frauds.

+

The datasets contains transactions made by credit cards in September 2013 by european cardholders. This dataset presents transactions that occurred in two days, where we have 492 frauds out of 284,807 transactions. The dataset is highly unbalanced, the positive class (frauds) account for 0.172% of all transactions.

+

It contains only numerical input variables which are the result of a PCA transformation. Unfortunately, due to confidentiality issues, we cannot provide the original features and more background information about the data. Features V1, V2, ... V28 are the principal components obtained with PCA, the only features which have not been transformed with PCA are 'Time' and 'Amount'. Feature 'Time' contains the seconds elapsed between each transaction and the first transaction in the dataset. The feature 'Amount' is the transaction Amount, this feature can be used for example-dependant cost-senstive learning. Feature 'Class' is the response variable and it takes value 1 in case of fraud and 0 otherwise.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    Andrea Dal Pozzolo, Olivier Caelen, Reid A. Johnson and Gianluca Bontempi. Calibrating Probability with Undersampling for Unbalanced Classification. In Symposium on Computational Intelligence and Data Mining (CIDM), IEEE, 2015 

    +
  2. +
  3. +

    Dal Pozzolo, Andrea; Caelen, Olivier; Le Borgne, Yann-Ael; Waterschoot, Serge; Bontempi, Gianluca. Learned lessons in credit card fraud detection from a practitioner perspective, Expert systems with applications,41,10,4915-4928,2014, Pergamon 

    +
  4. +
  5. +

    Dal Pozzolo, Andrea; Boracchi, Giacomo; Caelen, Olivier; Alippi, Cesare; Bontempi, Gianluca. Credit card fraud detection: a realistic modeling and a novel learning strategy, IEEE transactions on neural networks and learning systems,29,8,3784-3797,2018,IEEE 

    +
  6. +
  7. +

    Dal Pozzolo, Andrea Adaptive Machine learning for credit card fraud detection ULB MLG PhD thesis (supervised by G. Bontempi) 

    +
  8. +
  9. +

    Carcillo, Fabrizio; Dal Pozzolo, Andrea; Le Borgne, Yann-Ael; Caelen, Olivier; Mazzer, Yannis; Bontempi, Gianluca. Scarff: a scalable framework for streaming credit card fraud detection with Spark, Information fusion,41, 182-194,2018,Elsevier 

    +
  10. +
  11. +

    Carcillo, Fabrizio; Le Borgne, Yann-Ael; Caelen, Olivier; Bontempi, Gianluca. Streaming active learning strategies for real-life credit card fraud detection: assessment and visualization, International Journal of Data Science and Analytics, 5,4,285-300,2018,Springer International Publishing 

    +
  12. +
  13. +

    Bertrand Lebichot, Yann-Ael Le Borgne, Liyun He, Frederic Oble, Gianluca Bontempi Deep-Learning Domain Adaptation Techniques for Credit Cards Fraud Detection, INNSBDDL 2019: Recent Advances in Big Data and Deep Learning, pp 78-88, 2019 

    +
  14. +
  15. +

    Fabrizio Carcillo, Yann-Ael Le Borgne, Olivier Caelen, Frederic Oble, Gianluca Bontempi Combining Unsupervised and Supervised Learning in Credit Card Fraud Detection Information Sciences, 2019 

    +
  16. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Elec2/index.html b/0.19.0/api/datasets/Elec2/index.html new file mode 100644 index 0000000000..ac4ec9a711 --- /dev/null +++ b/0.19.0/api/datasets/Elec2/index.html @@ -0,0 +1,3957 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Elec2 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Elec2

+

Electricity prices in New South Wales.

+

This is a binary classification task, where the goal is to predict if the price of electricity will go up or down.

+

This data was collected from the Australian New South Wales Electricity Market. In this market, prices are not fixed and are affected by demand and supply of the market. They are set every five minutes. Electricity transfers to/from the neighboring state of Victoria were done to alleviate fluctuations.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/HTTP/index.html b/0.19.0/api/datasets/HTTP/index.html new file mode 100644 index 0000000000..d39ddf149b --- /dev/null +++ b/0.19.0/api/datasets/HTTP/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HTTP - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HTTP

+

HTTP dataset of the KDD 1999 cup.

+

The goal is to predict whether or not an HTTP connection is anomalous or not. The dataset only contains 2,211 (0.4%) positive labels.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Higgs/index.html b/0.19.0/api/datasets/Higgs/index.html new file mode 100644 index 0000000000..d85d12eaed --- /dev/null +++ b/0.19.0/api/datasets/Higgs/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Higgs - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Higgs

+

Higgs dataset.

+

The data has been produced using Monte Carlo simulations. The first 21 features (columns 2-22) are kinematic properties measured by the particle detectors in the accelerator. The last seven features are functions of the first 21 features; these are high-level features derived by physicists to help discriminate between the two classes.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    UCI page 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/ImageSegments/index.html b/0.19.0/api/datasets/ImageSegments/index.html new file mode 100644 index 0000000000..d2194eb7bf --- /dev/null +++ b/0.19.0/api/datasets/ImageSegments/index.html @@ -0,0 +1,3946 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ImageSegments - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ImageSegments

+

Image segments classification.

+

This dataset contains features that describe image segments into 7 classes: brickface, sky, foliage, cement, window, path, and grass.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    UCI page 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Insects/index.html b/0.19.0/api/datasets/Insects/index.html new file mode 100644 index 0000000000..1e6154877e --- /dev/null +++ b/0.19.0/api/datasets/Insects/index.html @@ -0,0 +1,4014 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Insects - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Insects

+

Insects dataset.

+

This dataset has different variants, which are:

+
    +
  • +

    abrupt_balanced

    +
  • +
  • +

    abrupt_imbalanced

    +
  • +
  • +

    gradual_balanced

    +
  • +
  • +

    gradual_imbalanced

    +
  • +
  • +

    incremental-abrupt_balanced

    +
  • +
  • +

    incremental-abrupt_imbalanced

    +
  • +
  • +

    incremental-reoccurring_balanced

    +
  • +
  • +

    incremental-reoccurring_imbalanced

    +
  • +
  • +

    incremental_balanced

    +
  • +
  • +

    incremental_imbalanced

    +
  • +
  • +

    out-of-control

    +
  • +
+

The number of samples and the difficulty change from one variant to another. The number of classes is always the same (6), except for the last variant (24).

+

Parameters

+
    +
  • +

    variant

    +

    Defaultabrupt_balanced

    +

    Indicates which variant of the dataset to load.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Keystroke/index.html b/0.19.0/api/datasets/Keystroke/index.html new file mode 100644 index 0000000000..f61ed2ac4d --- /dev/null +++ b/0.19.0/api/datasets/Keystroke/index.html @@ -0,0 +1,3954 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Keystroke - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Keystroke

+

CMU keystroke dataset.

+

Users are tasked to type in a password. The task is to determine which user is typing in the password.

+

The only difference with the original dataset is that the "sessionIndex" and "rep" attributes have been dropped.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/MaliciousURL/index.html b/0.19.0/api/datasets/MaliciousURL/index.html new file mode 100644 index 0000000000..abc9cf1903 --- /dev/null +++ b/0.19.0/api/datasets/MaliciousURL/index.html @@ -0,0 +1,3956 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MaliciousURL - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MaliciousURL

+

Malicious URLs dataset.

+

This dataset contains features about URLs that are classified as malicious or not.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/MovieLens100K/index.html b/0.19.0/api/datasets/MovieLens100K/index.html new file mode 100644 index 0000000000..2912d890f5 --- /dev/null +++ b/0.19.0/api/datasets/MovieLens100K/index.html @@ -0,0 +1,3975 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MovieLens100K - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MovieLens100K

+

MovieLens 100K dataset.

+

MovieLens datasets were collected by the GroupLens Research Project at the University of Minnesota. This dataset consists of 100,000 ratings (1-5) from 943 users on 1682 movies. Each user has rated at least 20 movies. User and movie information are provided. The data was collected through the MovieLens web site (movielens.umn.edu) during the seven-month period from September 19th, 1997 through April 22nd, 1998.

+

Parameters

+
    +
  • +

    unpack_user_and_item

    +

    DefaultFalse

    +

    Whether or not the user and item should be extracted from the context and included as extra keyword arguments.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Music/index.html b/0.19.0/api/datasets/Music/index.html new file mode 100644 index 0000000000..713293519c --- /dev/null +++ b/0.19.0/api/datasets/Music/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Music - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Music

+

Multi-label music mood prediction.

+

The goal is to predict to which kinds of moods a song pertains to.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Phishing/index.html b/0.19.0/api/datasets/Phishing/index.html new file mode 100644 index 0000000000..60c836804b --- /dev/null +++ b/0.19.0/api/datasets/Phishing/index.html @@ -0,0 +1,3946 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Phishing - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Phishing

+

Phishing websites.

+

This dataset contains features from web pages that are classified as phishing or not.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    UCI page 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Restaurants/index.html b/0.19.0/api/datasets/Restaurants/index.html new file mode 100644 index 0000000000..58116a437b --- /dev/null +++ b/0.19.0/api/datasets/Restaurants/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Restaurants - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Restaurants

+

Data from the Kaggle Recruit Restaurants challenge.

+

The goal is to predict the number of visitors in each of 829 Japanese restaurants over a priod of roughly 16 weeks. The data is ordered by date and then by restaurant ID.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/SMSSpam/index.html b/0.19.0/api/datasets/SMSSpam/index.html new file mode 100644 index 0000000000..f9212480d7 --- /dev/null +++ b/0.19.0/api/datasets/SMSSpam/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SMSSpam - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SMSSpam

+

SMS Spam Collection dataset.

+

The data contains 5,574 items and 1 feature (i.e. SMS body). Spam messages represent 13.4% of the dataset. The goal is to predict whether an SMS is a spam or not.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/SMTP/index.html b/0.19.0/api/datasets/SMTP/index.html new file mode 100644 index 0000000000..759e069a95 --- /dev/null +++ b/0.19.0/api/datasets/SMTP/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SMTP - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SMTP

+

SMTP dataset from the KDD 1999 cup.

+

The goal is to predict whether or not an SMTP connection is anomalous or not. The dataset only contains 2,211 (0.4%) positive labels.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/SolarFlare/index.html b/0.19.0/api/datasets/SolarFlare/index.html new file mode 100644 index 0000000000..e5e441b072 --- /dev/null +++ b/0.19.0/api/datasets/SolarFlare/index.html @@ -0,0 +1,3945 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SolarFlare - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SolarFlare

+

Solar flare multi-output regression.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    UCI page 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/TREC07/index.html b/0.19.0/api/datasets/TREC07/index.html new file mode 100644 index 0000000000..830d6af749 --- /dev/null +++ b/0.19.0/api/datasets/TREC07/index.html @@ -0,0 +1,3957 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TREC07 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TREC07

+

TREC's 2007 Spam Track dataset.

+

The data contains 75,419 chronologically ordered items, i.e. 3 months of emails delivered to a particular server in 2007. Spam messages represent 66.6% of the dataset. The goal is to predict whether an email is a spam or not.

+

The available raw features are: sender, recipients, date, subject, body.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/Taxis/index.html b/0.19.0/api/datasets/Taxis/index.html new file mode 100644 index 0000000000..6b5ca35dff --- /dev/null +++ b/0.19.0/api/datasets/Taxis/index.html @@ -0,0 +1,3953 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Taxis - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Taxis

+

Taxi ride durations in New York City.

+

The goal is to predict the duration of taxi rides in New York City.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/TrumpApproval/index.html b/0.19.0/api/datasets/TrumpApproval/index.html new file mode 100644 index 0000000000..db22a6bc9f --- /dev/null +++ b/0.19.0/api/datasets/TrumpApproval/index.html @@ -0,0 +1,3946 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TrumpApproval - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TrumpApproval

+

Donald Trump approval ratings.

+

This dataset was obtained by reshaping the data used by FiveThirtyEight for analyzing Donald Trump's approval ratings. It contains 5 features, which are approval ratings collected by 5 polling agencies. The target is the approval rating from FiveThirtyEight's model. The goal of this task is to see if we can reproduce FiveThirtyEight's model.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/WaterFlow/index.html b/0.19.0/api/datasets/WaterFlow/index.html new file mode 100644 index 0000000000..e4a18f6122 --- /dev/null +++ b/0.19.0/api/datasets/WaterFlow/index.html @@ -0,0 +1,3943 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WaterFlow - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WaterFlow

+

Water flow through a pipeline branch.

+

The series includes hourly values for about 2 months, March 2022 to May 2022. The values are expressed in liters per second. There are four anomalous segments in the series:

+
    +
  • 3 "low value moments": this is due to water losses or human intervention for maintenance * A small peak in the water inflow after the first 2 segments: this is due to a pumping operation into the main pipeline, when more water pressure is needed
  • +
+

This dataset is well suited for time series forecasting models, as well as anomaly detection methods. Ideally, the goal is to build a time series forecasting model that is robust to the anomalous segments.

+

This data has been kindly donated by the Tecnojest s.r.l. company (www.invidea.it) from Italy.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/base/Dataset/index.html b/0.19.0/api/datasets/base/Dataset/index.html new file mode 100644 index 0000000000..6cd5f15c20 --- /dev/null +++ b/0.19.0/api/datasets/base/Dataset/index.html @@ -0,0 +1,4068 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Dataset - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Dataset

+

Base class for all datasets.

+

All datasets inherit from this class, be they stored in a file or generated on the fly.

+

Parameters

+
    +
  • +

    task

    +

    Type of task the dataset is meant for. Should be one of the following: - "Regression" - "Binary classification" - "Multi-class classification" - "Multi-output binary classification" - "Multi-output regression"

    +
  • +
  • +

    n_features

    +

    Number of features in the dataset.

    +
  • +
  • +

    n_samples

    +

    DefaultNone

    +

    Number of samples in the dataset.

    +
  • +
  • +

    n_classes

    +

    DefaultNone

    +

    Number of classes in the dataset, only applies to classification datasets.

    +
  • +
  • +

    n_outputs

    +

    DefaultNone

    +

    Number of outputs the target is made of, only applies to multi-output datasets.

    +
  • +
  • +

    sparse

    +

    DefaultFalse

    +

    Whether the dataset is sparse or not.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/base/FileDataset/index.html b/0.19.0/api/datasets/base/FileDataset/index.html new file mode 100644 index 0000000000..42baf69318 --- /dev/null +++ b/0.19.0/api/datasets/base/FileDataset/index.html @@ -0,0 +1,4056 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FileDataset - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FileDataset

+

Base class for datasets that are stored in a local file.

+

Small datasets that are part of the river package inherit from this class.

+

Parameters

+
    +
  • +

    filename

    +

    The file's name.

    +
  • +
  • +

    directory

    +

    DefaultNone

    +

    The directory where the file is contained. Defaults to the location of the datasets module.

    +
  • +
  • +

    desc

    +

    Extra dataset parameters to pass as keyword arguments.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/base/RemoteDataset/index.html b/0.19.0/api/datasets/base/RemoteDataset/index.html new file mode 100644 index 0000000000..f7f224e25c --- /dev/null +++ b/0.19.0/api/datasets/base/RemoteDataset/index.html @@ -0,0 +1,4073 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RemoteDataset - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RemoteDataset

+

Base class for datasets that are stored in a remote file.

+

Medium and large datasets that are not part of the river package inherit from this class.

+

The filename doesn't have to be provided if unpack is False. Indeed in the latter case the filename will be inferred from the URL.

+

Parameters

+
    +
  • +

    url

    +

    The URL the dataset is located at.

    +
  • +
  • +

    size

    +

    The expected download size.

    +
  • +
  • +

    unpack

    +

    DefaultTrue

    +

    Whether to unpack the download or not.

    +
  • +
  • +

    filename

    +

    DefaultNone

    +

    An optional name to given to the file if the file is unpacked.

    +
  • +
  • +

    desc

    +

    Extra dataset parameters to pass as keyword arguments.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    is_downloaded

    +

    Indicate whether or the data has been correctly downloaded.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+download +
+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/base/SyntheticDataset/index.html b/0.19.0/api/datasets/base/SyntheticDataset/index.html new file mode 100644 index 0000000000..abcb0564bc --- /dev/null +++ b/0.19.0/api/datasets/base/SyntheticDataset/index.html @@ -0,0 +1,4067 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SyntheticDataset - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SyntheticDataset

+

A synthetic dataset.

+

Parameters

+
    +
  • +

    task

    +

    Type of task the dataset is meant for. Should be one of: - "Regression" - "Binary classification" - "Multi-class classification" - "Multi-output binary classification" - "Multi-output regression"

    +
  • +
  • +

    n_features

    +

    Number of features in the dataset.

    +
  • +
  • +

    n_samples

    +

    DefaultNone

    +

    Number of samples in the dataset.

    +
  • +
  • +

    n_classes

    +

    DefaultNone

    +

    Number of classes in the dataset, only applies to classification datasets.

    +
  • +
  • +

    n_outputs

    +

    DefaultNone

    +

    Number of outputs the target is made of, only applies to multi-output datasets.

    +
  • +
  • +

    sparse

    +

    DefaultFalse

    +

    Whether the dataset is sparse or not.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Agrawal/index.html b/0.19.0/api/datasets/synth/Agrawal/index.html new file mode 100644 index 0000000000..f7a73ceb2f --- /dev/null +++ b/0.19.0/api/datasets/synth/Agrawal/index.html @@ -0,0 +1,4480 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Agrawal - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Agrawal

+

Agrawal stream generator.

+

The generator was introduced by Agrawal et al. 1, and was a common source of data for early work on scaling up decision tree learners. The generator produces a stream containing nine features, six numeric and three categorical. There are 10 functions defined for generating binary class labels from the features. Presumably these determine whether the loan should be approved. Classification functions are listed in the original paper 1.

+

Feature | Description | Values

+
    +
  • +

    salary | salary | uniformly distributed from 20k to 150k

    +
  • +
  • +

    commission | commission | 0 if salary < 75k else uniformly distributed from 10k to 75k

    +
  • +
  • +

    age | age | uniformly distributed from 20 to 80

    +
  • +
  • +

    elevel | education level | uniformly chosen from 0 to 4

    +
  • +
  • +

    car | car maker | uniformly chosen from 1 to 20

    +
  • +
  • +

    zipcode | zip code of the town | uniformly chosen from 0 to 8

    +
  • +
  • +

    hvalue | house value | uniformly distributed from 50k x zipcode to 100k x zipcode

    +
  • +
  • +

    hyears | years house owned | uniformly distributed from 1 to 30

    +
  • +
  • +

    loan | total loan amount | uniformly distributed from 0 to 500k

    +
  • +
+

Parameters

+
    +
  • +

    classification_function

    +

    Typeint

    +

    Default0

    +

    The classification function to use for the generation. Valid values are from 0 to 9.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    balance_classes

    +

    Typebool

    +

    DefaultFalse

    +

    If True, the class distribution will converge to a uniform distribution.

    +
  • +
  • +

    perturbation

    +

    Typefloat

    +

    Default0.0

    +

    The probability that noise will happen in the generation. Each new sample will be perturbed by the magnitude of perturbation. Valid values are in the range [0.0 to 1.0].

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Agrawal(
+    classification_function=0,
+    seed=42
+)
+
+dataset
+
+
Synthetic data generator
+<BLANKLINE>
+    Name  Agrawal
+    Task  Binary classification
+ Samples  ∞
+Features  9
+ Outputs  1
+ Classes  2
+  Sparse  False
+<BLANKLINE>
+Configuration
+-------------
+classification_function  0
+                   seed  42
+        balance_classes  False
+           perturbation  0.0
+

+

for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[103125.4837, 0, 21, 2, 8, 3, 319768.9642, 4, 338349.7437] 1
+[135983.3438, 0, 25, 4, 14, 0, 423837.7755, 7, 116330.4466] 1
+[98262.4347, 0, 55, 1, 18, 6, 144088.1244, 19, 139095.3541] 0
+[133009.0417, 0, 68, 1, 14, 5, 233361.4025, 7, 478606.5361] 1
+[63757.2908, 16955.9382, 26, 2, 12, 4, 522851.3093, 24, 229712.4398] 1
+

+

Methods

+
+generate_drift +

Generate drift by switching the classification function randomly.

+
+

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

The sample generation works as follows: The 9 features are generated +with the random generator, initialized with the seed passed by the +user. Then, the classification function decides, as a function of all +the attributes, whether to classify the instance as class 0 or class +1. The next step is to verify if the classes should be balanced, and +if so, balance the classes. Finally, add noise if perturbation > 0.0.

+
+
+
    +
  1. +

    Rakesh Agrawal, Tomasz Imielinksi, and Arun Swami. "Database Mining: + A Performance Perspective", IEEE Transactions on Knowledge and + Data Engineering, 5(6), December 1993. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/AnomalySine/index.html b/0.19.0/api/datasets/synth/AnomalySine/index.html new file mode 100644 index 0000000000..8a2a6460c6 --- /dev/null +++ b/0.19.0/api/datasets/synth/AnomalySine/index.html @@ -0,0 +1,4424 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AnomalySine - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AnomalySine

+

Simulate a stream with anomalies in sine waves.

+

The amount of data generated by this generator is finite.

+

The data generated corresponds to sine and cosine functions. Anomalies are induced by replacing the cosine values with values from a different a sine function. The contextual flag can be used to introduce contextual anomalies which are values in the normal global range, but abnormal compared to the seasonal pattern. Contextual attributes are introduced by replacing cosine entries with sine values.

+

The target indicates whether or not the instances are anomalous.

+

Parameters

+
    +
  • +

    n_samples

    +

    Typeint

    +

    Default10000

    +

    The number of samples to generate. This generator creates a batch of data affected by contextual anomalies and noise.

    +
  • +
  • +

    n_anomalies

    +

    Typeint

    +

    Default2500

    +

    Number of anomalies. Can't be larger than n_samples.

    +
  • +
  • +

    contextual

    +

    Typebool

    +

    DefaultFalse

    +

    If True, will add contextual anomalies.

    +
  • +
  • +

    n_contextual

    +

    Typeint

    +

    Default2500

    +

    Number of contextual anomalies. Can't be larger than n_samples.

    +
  • +
  • +

    shift

    +

    Typeint

    +

    Default4

    +

    Shift in number of samples applied when retrieving contextual anomalies.

    +
  • +
  • +

    noise

    +

    Typefloat

    +

    Default0.5

    +

    Amount of noise.

    +
  • +
  • +

    replace

    +

    Typebool

    +

    DefaultTrue

    +

    If True, anomalies are randomly sampled with replacement.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.AnomalySine(
+    seed=12345,
+    n_samples=100,
+    n_anomalies=25,
+    contextual=True,
+    n_contextual=10
+)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{'sine': -0.7119, 'cosine': 0.8777} False
+{'sine': 0.8792, 'cosine': -0.0290} False
+{'sine': 0.0440, 'cosine': 3.0852} True
+{'sine': 0.5520, 'cosine': 3.4515} True
+{'sine': 0.8037, 'cosine': 0.4027} False
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/ConceptDriftStream/index.html b/0.19.0/api/datasets/synth/ConceptDriftStream/index.html new file mode 100644 index 0000000000..c16216fd8c --- /dev/null +++ b/0.19.0/api/datasets/synth/ConceptDriftStream/index.html @@ -0,0 +1,4445 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ConceptDriftStream - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ConceptDriftStream

+

Generates a stream with concept drift.

+

A stream generator that adds concept drift or change by joining two streams. This is done by building a weighted combination of two pure distributions that characterizes the target concepts before and after the change.

+

The sigmoid function is an elegant and practical solution to define the probability that each new instance of the stream belongs to the new concept after the drift. The sigmoid function introduces a gradual, smooth transition whose duration is controlled with two parameters:

+
    +
  • +

    \(p\), the position of the change.

    +
  • +
  • +

    \(w\), the width of the transition.

    +
  • +
+

The sigmoid function at sample \(t\) is

+
\[f(t) = 1/(1+e^{-4(t-p)/w})\]
+

Parameters

+
    +
  • +

    stream

    +

    Typedatasets.base.SyntheticDataset | None

    +

    DefaultNone

    +

    Original stream

    +
  • +
  • +

    drift_stream

    +

    Typedatasets.base.SyntheticDataset | None

    +

    DefaultNone

    +

    Drift stream

    +
  • +
  • +

    position

    +

    Typeint

    +

    Default5000

    +

    Central position of the concept drift change.

    +
  • +
  • +

    width

    +

    Typeint

    +

    Default1000

    +

    Width of concept drift change.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    alpha

    +

    Typefloat | None

    +

    DefaultNone

    +

    Angle of change used to estimate the width of concept drift change. If set, it will override the width parameter. Valid values are in the range (0.0, 90.0].

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.ConceptDriftStream(
+    stream=synth.SEA(seed=42, variant=0),
+    drift_stream=synth.SEA(seed=42, variant=1),
+    seed=1, position=5, width=2
+)
+
+for x, y in dataset.take(10):
+    print(x, y)
+
+
{0: 6.3942, 1: 0.2501, 2: 2.7502} False
+{0: 2.2321, 1: 7.3647, 2: 6.7669} True
+{0: 8.9217, 1: 0.8693, 2: 4.2192} True
+{0: 0.2979, 1: 2.1863, 2: 5.0535} False
+{0: 6.3942, 1: 0.2501, 2: 2.7502} False
+{0: 2.2321, 1: 7.3647, 2: 6.7669} True
+{0: 8.9217, 1: 0.8693, 2: 4.2192} True
+{0: 0.2979, 1: 2.1863, 2: 5.0535} False
+{0: 0.2653, 1: 1.9883, 2: 6.4988} False
+{0: 5.4494, 1: 2.2044, 2: 5.8926} False
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

An optional way to estimate the width of the transition \(w\) is based on +the angle \(\alpha\), \(w = 1/ tan(\alpha)\). Since width corresponds to +the number of samples for the transition, the width is rounded to the +nearest smaller integer. Notice that larger values of \(\alpha\) result in +smaller widths. For \(\alpha > 45.0\), the width is smaller than 1 so values +are rounded to 1 to avoid division by zero errors.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Friedman/index.html b/0.19.0/api/datasets/synth/Friedman/index.html new file mode 100644 index 0000000000..81550be8c5 --- /dev/null +++ b/0.19.0/api/datasets/synth/Friedman/index.html @@ -0,0 +1,4384 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Friedman - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Friedman

+

Friedman synthetic dataset.

+

Each observation is composed of 10 features. Each feature value is sampled uniformly in [0, 1]. The target is defined by the following function:

+
\[y = 10 sin(\pi x_0 x_1) + 20 (x_2 - 0.5)^2 + 10 x_3 + 5 x_4 + \epsilon\]
+

In the last expression, \(\epsilon \sim \mathcal{N}(0, 1)\), is the noise. Therefore, only the first 5 features are relevant.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed number used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Friedman(seed=42)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[0.63, 0.02, 0.27, 0.22, 0.73, 0.67, 0.89, 0.08, 0.42, 0.02] 7.66
+[0.02, 0.19, 0.64, 0.54, 0.22, 0.58, 0.80, 0.00, 0.80, 0.69] 8.33
+[0.34, 0.15, 0.95, 0.33, 0.09, 0.09, 0.84, 0.60, 0.80, 0.72] 7.04
+[0.37, 0.55, 0.82, 0.61, 0.86, 0.57, 0.70, 0.04, 0.22, 0.28] 18.16
+[0.07, 0.23, 0.10, 0.27, 0.63, 0.36, 0.37, 0.20, 0.26, 0.93] 8.90
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/FriedmanDrift/index.html b/0.19.0/api/datasets/synth/FriedmanDrift/index.html new file mode 100644 index 0000000000..7b72db6966 --- /dev/null +++ b/0.19.0/api/datasets/synth/FriedmanDrift/index.html @@ -0,0 +1,4437 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FriedmanDrift - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FriedmanDrift

+

Friedman synthetic dataset with concept drifts.

+

Each observation is composed of 10 features. Each feature value is sampled uniformly in [0, 1]. Only the first 5 features are relevant. The target is defined by different functions depending on the type of the drift.

+

The three available modes of operation of the data generator are described in 1.

+

Parameters

+
    +
  • +

    drift_type

    +

    Typestr

    +

    Defaultlea

    +

    The variant of concept drift.
    - 'lea': Local Expanding Abrupt drift. The concept drift appears in two distinct regions of the instance space, while the remaining regions are left unaltered. There are three points of abrupt change in the training dataset. At every consecutive change the regions of drift are expanded.
    - 'gra': Global Recurring Abrupt drift. The concept drift appears over the whole instance space. There are two points of concept drift. At the second point of drift the old concept reoccurs.
    - 'gsg': Global and Slow Gradual drift. The concept drift affects all the instance space. However, the change is gradual and not abrupt. After each one of the two change points covered by this variant, and during a window of length transition_window, examples from both old and the new concepts are generated with equal probability. After the transition period, only the examples from the new concept are generated.

    +
  • +
  • +

    position

    +

    Typetuple[int, ...]

    +

    Default(50000, 100000, 150000)

    +

    The amount of monitored instances after which each concept drift occurs. A tuple with at least two element must be passed, where each number is greater than the preceding one. If drift_type='lea', then the tuple must have three elements.

    +
  • +
  • +

    transition_window

    +

    Typeint

    +

    Default10000

    +

    The length of the transition window between two concepts. Only applicable when drift_type='gsg'. If set to zero, the drifts will be abrupt. Anytime transition_window > 0, it defines a window in which instances of the new concept are gradually introduced among the examples from the old concept. During this transition phase, both old and new concepts appear with equal probability.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed number used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.FriedmanDrift(
+    drift_type='lea',
+    position=(1, 2, 3),
+    seed=42
+)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[0.63, 0.02, 0.27, 0.22, 0.73, 0.67, 0.89, 0.08, 0.42, 0.02] 7.66
+[0.02, 0.19, 0.64, 0.54, 0.22, 0.58, 0.80, 0.00, 0.80, 0.69] 8.33
+[0.34, 0.15, 0.95, 0.33, 0.09, 0.09, 0.84, 0.60, 0.80, 0.72] 7.04
+[0.37, 0.55, 0.82, 0.61, 0.86, 0.57, 0.70, 0.04, 0.22, 0.28] 18.16
+[0.07, 0.23, 0.10, 0.27, 0.63, 0.36, 0.37, 0.20, 0.26, 0.93] -2.65
+

+

dataset = synth.FriedmanDrift(
+    drift_type='gra',
+    position=(2, 3),
+    seed=42
+)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[0.63, 0.02, 0.27, 0.22, 0.73, 0.67, 0.89, 0.08, 0.42, 0.02] 7.66
+[0.02, 0.19, 0.64, 0.54, 0.22, 0.58, 0.80, 0.00, 0.80, 0.69] 8.33
+[0.34, 0.15, 0.95, 0.33, 0.09, 0.09, 0.84, 0.60, 0.80, 0.72] 8.96
+[0.37, 0.55, 0.82, 0.61, 0.86, 0.57, 0.70, 0.04, 0.22, 0.28] 18.16
+[0.07, 0.23, 0.10, 0.27, 0.63, 0.36, 0.37, 0.20, 0.26, 0.93] 8.90
+

+

dataset = synth.FriedmanDrift(
+    drift_type='gsg',
+    position=(1, 4),
+    transition_window=2,
+    seed=42
+)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[0.63, 0.02, 0.27, 0.22, 0.73, 0.67, 0.89, 0.08, 0.42, 0.02] 7.66
+[0.02, 0.19, 0.64, 0.54, 0.22, 0.58, 0.80, 0.00, 0.80, 0.69] 8.33
+[0.34, 0.15, 0.95, 0.33, 0.09, 0.09, 0.84, 0.60, 0.80, 0.72] 8.92
+[0.37, 0.55, 0.82, 0.61, 0.86, 0.57, 0.70, 0.04, 0.22, 0.28] 17.32
+[0.07, 0.23, 0.10, 0.27, 0.63, 0.36, 0.37, 0.20, 0.26, 0.93] 6.05
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    Ikonomovska, E., Gama, J. and Džeroski, S., 2011. Learning model trees from evolving +data streams. Data mining and knowledge discovery, 23(1), pp.128-168. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Hyperplane/index.html b/0.19.0/api/datasets/synth/Hyperplane/index.html new file mode 100644 index 0000000000..7d7bbf3870 --- /dev/null +++ b/0.19.0/api/datasets/synth/Hyperplane/index.html @@ -0,0 +1,4446 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hyperplane - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Hyperplane

+

Hyperplane stream generator.

+

Generates a problem of prediction class of a rotation hyperplane. It was used as testbed for CVFDT and VFDT in 1.

+

A hyperplane in d-dimensional space is the set of points \(x\) that satisfy

+
\[\sum^{d}_{i=1} w_i x_i = w_0 = \sum^{d}_{i=1} w_i\]
+

where \(x_i\) is the i-th coordinate of \(x\).

+
    +
  • +

    Examples for which \(\sum^{d}_{i=1} w_i x_i > w_0\), are labeled positive.

    +
  • +
  • +

    Examples for which \(\sum^{d}_{i=1} w_i x_i \leq w_0\), are labeled negative.

    +
  • +
+

Hyperplanes are useful for simulating time-changing concepts because we can change the orientation and position of the hyperplane in a smooth manner by changing the relative size of the weights. We introduce change to this dataset by adding drift to each weighted feature \(w_i = w_i + d \sigma\), where \(\sigma\) is the probability that the direction of change is reversed and \(d\) is the change applied to each example.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    n_features

    +

    Typeint

    +

    Default10

    +

    The number of attributes to generate. Higher than 2.

    +
  • +
  • +

    n_drift_features

    +

    Typeint

    +

    Default2

    +

    The number of attributes with drift. Higher than 2.

    +
  • +
  • +

    mag_change

    +

    Typefloat

    +

    Default0.0

    +

    Magnitude of the change for every example. From 0.0 to 1.0.

    +
  • +
  • +

    noise_percentage

    +

    Typefloat

    +

    Default0.05

    +

    Percentage of noise to add to the data. From 0.0 to 1.0.

    +
  • +
  • +

    sigma

    +

    Typefloat

    +

    Default0.1

    +

    Probability that the direction of change is reversed. From 0.0 to 1.0.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Hyperplane(seed=42, n_features=2)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: 0.2750, 1: 0.2232} 0
+{0: 0.0869, 1: 0.4219} 1
+{0: 0.0265, 1: 0.1988} 0
+{0: 0.5892, 1: 0.8094} 0
+{0: 0.3402, 1: 0.1554} 0
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

The sample generation works as follows: The features are generated +with the random number generator, initialized with the seed passed by +the user. Then the classification function decides, as a function of +the sum of the weighted features and the sum of the weights, whether +the instance belongs to class 0 or class 1. The last step is to add +noise and generate drift.

+
+
+
    +
  1. +

    G. Hulten, L. Spencer, and P. Domingos. Mining time-changing data streams. + In KDD'01, pages 97-106, San Francisco, CA, 2001. ACM Press. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/LED/index.html b/0.19.0/api/datasets/synth/LED/index.html new file mode 100644 index 0000000000..4ba0fc50f0 --- /dev/null +++ b/0.19.0/api/datasets/synth/LED/index.html @@ -0,0 +1,4420 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LED - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LED

+

LED stream generator.

+

This data source originates from the CART book 1. An implementation in C was donated to the UCI 2 machine learning repository by David Aha. The goal is to predict the digit displayed on a seven-segment LED display, where each attribute has a 10% chance of being inverted. It has an optimal Bayes classification rate of 74%. The particular configuration of the generator used for experiments (LED) produces 24 binary attributes, 17 of which are irrelevant.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    noise_percentage

    +

    Typefloat

    +

    Default0.0

    +

    The probability that noise will happen in the generation. At each new sample generated, a random number is generated, and if it is equal or less than the noise_percentage, the led value will be switched

    +
  • +
  • +

    irrelevant_features

    +

    Typebool

    +

    DefaultFalse

    +

    Adds 17 non-relevant attributes to the stream.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.LED(seed = 112, noise_percentage = 0.28, irrelevant_features= False)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: 1, 1: 0, 2: 1, 3: 0, 4: 0, 5: 1, 6: 0} 7
+{0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 0} 8
+{0: 1, 1: 1, 2: 1, 3: 1, 4: 0, 5: 1, 6: 0} 9
+{0: 0, 1: 0, 2: 1, 3: 0, 4: 0, 5: 1, 6: 0} 1
+{0: 0, 1: 1, 2: 1, 3: 0, 4: 0, 5: 0, 6: 0} 1
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

An instance is generated based on the parameters passed. If has_noise +is set then the total number of attributes will be 24, otherwise there will +be 7 attributes.

+
+
+
    +
  1. +

    Leo Breiman, Jerome Friedman, R. Olshen, and Charles J. Stone. + Classification and Regression Trees. Wadsworth and Brooks, + Monterey, CA,1984. 

    +
  2. +
  3. +

    A. Asuncion and D. J. Newman. UCI Machine Learning Repository + [http://www.ics.uci.edu/~mlearn/mlrepository.html]. + University of California, Irvine, School of Information and + Computer Sciences,2007. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/LEDDrift/index.html b/0.19.0/api/datasets/synth/LEDDrift/index.html new file mode 100644 index 0000000000..04012f9f78 --- /dev/null +++ b/0.19.0/api/datasets/synth/LEDDrift/index.html @@ -0,0 +1,4411 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LEDDrift - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LEDDrift

+

LED stream generator with concept drift.

+

This class is an extension of the LED generator whose purpose is to add concept drift to the stream.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    noise_percentage

    +

    Typefloat

    +

    Default0.0

    +

    The probability that noise will happen in the generation. At each new sample generated, a random number is generated, and if it is equal or less than the noise_percentage, the led value will be switched

    +
  • +
  • +

    irrelevant_features

    +

    Typebool

    +

    DefaultFalse

    +

    Adds 17 non-relevant attributes to the stream.

    +
  • +
  • +

    n_drift_features

    +

    Typeint

    +

    Default0

    +

    The number of attributes that have drift.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.LEDDrift(seed = 112, noise_percentage = 0.28,
+                         irrelevant_features= True, n_drift_features=4)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1] 7
+[1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] 6
+[0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1] 1
+[1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1] 6
+[1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0] 7
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

An instance is generated based on the parameters passed. If has_noise +is set then the total number of attributes will be 24, otherwise there will +be 7 attributes.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Logical/index.html b/0.19.0/api/datasets/synth/Logical/index.html new file mode 100644 index 0000000000..599956ad5d --- /dev/null +++ b/0.19.0/api/datasets/synth/Logical/index.html @@ -0,0 +1,4387 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Logical - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Logical

+

Logical functions stream generator.

+

Make a toy dataset with three labels that represent the logical functions: OR, XOR, AND (functions of the 2D input).

+

Data is generated in 'tiles' which contain the complete set of logical operations results. The tiles are repeated n_tiles times. Optionally, the generated data can be shuffled.

+

Parameters

+
    +
  • +

    n_tiles

    +

    Typeint

    +

    Default1

    +

    Number of tiles to generate.

    +
  • +
  • +

    shuffle

    +

    Typebool

    +

    DefaultTrue

    +

    If set, generated data will be shuffled.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Logical(n_tiles=2, shuffle=True, seed=42)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{'A': 1, 'B': 1} {'OR': 1, 'XOR': 0, 'AND': 1}
+{'A': 0, 'B': 0} {'OR': 0, 'XOR': 0, 'AND': 0}
+{'A': 1, 'B': 0} {'OR': 1, 'XOR': 1, 'AND': 0}
+{'A': 1, 'B': 1} {'OR': 1, 'XOR': 0, 'AND': 1}
+{'A': 1, 'B': 0} {'OR': 1, 'XOR': 1, 'AND': 0}
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Mixed/index.html b/0.19.0/api/datasets/synth/Mixed/index.html new file mode 100644 index 0000000000..3e3b4d8a9d --- /dev/null +++ b/0.19.0/api/datasets/synth/Mixed/index.html @@ -0,0 +1,4433 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Mixed - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Mixed

+

Mixed data stream generator.

+

This generator is an implementation of a data stream with abrupt concept drift and boolean noise-free examples as described in 1.

+

It has four relevant attributes, two boolean attributes \(v, w\) and two numeric attributes \(x, y\) uniformly distributed from 0 to 1. The examples are labeled depending on the classification function chosen from below.

+
    +
  • +

    function 0: if \(v\) and \(w\) are true or \(v\) and \(z\) are true or \(w\) and \(z\) are true then 0 else 1, where \(z\) is \(y < 0.5 + 0.3 sin(3 \pi x)\)

    +
  • +
  • +

    function 1: The opposite of function 0.

    +
  • +
+

Concept drift can be introduced by changing the classification function. This can be done manually or using ConceptDriftStream.

+

Parameters

+
    +
  • +

    classification_function

    +

    Typeint

    +

    Default0

    +

    Which of the two classification functions to use for the generation. Valid options are 0 or 1.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    balance_classes

    +

    Typebool

    +

    DefaultFalse

    +

    Whether to balance classes or not. If balanced, the class distribution will converge to a uniform distribution.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+dataset = synth.Mixed(seed = 42, classification_function=1, balance_classes = True)
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: True, 1: False, 2: 0.2750, 3: 0.2232} 1
+{0: False, 1: False, 2: 0.2186, 3: 0.5053} 0
+{0: False, 1: True, 2: 0.8094, 3: 0.0064} 1
+{0: False, 1: False, 2: 0.1010, 3: 0.2779} 0
+{0: True, 1: False, 2: 0.37018, 3: 0.2095} 1
+

+

Methods

+
+generate_drift +

Generate drift by switching the classification function.

+
+

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

The sample generation works as follows: The two numeric attributes are +generated with the random generator initialized with the seed passed by +the user (optional). The boolean attributes are either 0 or 1 +based on the comparison of the random number generator and 0.5, +the classification function decides whether to classify the instance +as class 0 or class 1. The next step is to verify if the classes should +be balanced, and if so, balance the classes.

+

The generated sample will have 4 relevant features and 1 label (it is a +binary-classification task).

+
+
+
    +
  1. +

    Gama, Joao, et al. "Learning with drift detection." Advances in + artificial intelligence-SBIA 2004. Springer Berlin Heidelberg, + 2004. 286-295" 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Mv/index.html b/0.19.0/api/datasets/synth/Mv/index.html new file mode 100644 index 0000000000..95325c5dc7 --- /dev/null +++ b/0.19.0/api/datasets/synth/Mv/index.html @@ -0,0 +1,4452 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Mv - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Mv

+

Mv artificial dataset.

+

Artificial dataset composed of both nominal and numeric features, whose features present co-dependencies. Originally described in 1.

+

The features are generated using the following expressions:

+
    +
  • +

    \(x_1\): uniformly distributed over [-5, 5].

    +
  • +
  • +

    \(x_2\): uniformly distributed over [-15, -10].

    +
  • +
  • +

    \(x_3\):

    +
      +
    • +

      if \(x_1 > 0\), \(x_3 \leftarrow\) 'green'

      +
    • +
    • +

      else \(x_3 \leftarrow\) 'red' with probability \(0.4\) and \(x_3 \leftarrow\) 'brown' with probability \(0.6\).

      +
    • +
    +
  • +
  • +

    \(x_4\):

    +
      +
    • +

      if \(x_3 =\) 'green', \(x_4 \leftarrow x_1 + 2 x_2\)

      +
    • +
    • +

      else \(x_4 = \frac{x_1}{2}\) with probability \(0.3\) and \(x_4 = \frac{x_2}{2}\) with probability \(0.7\).

      +
    • +
    +
  • +
  • +

    \(x_5\): uniformly distributed over [-1, 1].

    +
  • +
  • +

    \(x_6 \leftarrow x_4 \times \epsilon\), where \(\epsilon\) is uniformly distributed

    +
  • +
+

over [0, 5].

+
    +
  • +

    \(x_7\): 'yes' with probability \(0.3\), and 'no' with probability \(0.7\).

    +
  • +
  • +

    \(x_8\): 'normal' if \(x_5 < 0.5\) else 'large'.

    +
  • +
  • +

    \(x_9\): uniformly distributed over [100, 500].

    +
  • +
  • +

    \(x_{10}\): uniformly distributed integer over the interval [1000, 1200].

    +
  • +
+

The target value is generated using the following rules:

+
    +
  • +

    if \(x_2 > 2\), \(y \leftarrow 35 - 0.5 x_4\)

    +
  • +
  • +

    else if \(-2 \le x_4 \le 2\), \(y \leftarrow 10 - 2 x_1\)

    +
  • +
  • +

    else if \(x_7 =\) 'yes', \(y \leftarrow 3 - \frac{x_1}{x_4}\)

    +
  • +
  • +

    else if \(x_8 =\) 'normal', \(y \leftarrow x_6 + x_1\)

    +
  • +
  • +

    else \(y \leftarrow \frac{x_1}{2}\).

    +
  • +
+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed number used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Mv(seed=42)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[1.39, -14.87, 'green', -28.35, -0.44, -31.64, 'no', 'normal', 370.67, 1178.43] -30.25
+[-4.13, -12.89, 'red', -2.06, 0.01, -0.27, 'yes', 'normal', 359.95, 1108.98] 1.00
+[-2.79, -12.05, 'brown', -1.39, 0.61, -4.87, 'no', 'large', 162.19, 1191.44] 15.59
+[-1.63, -14.53, 'red', -7.26, 0.20, -29.33, 'no', 'normal', 314.49, 1194.62] -30.96
+[-1.21, -12.23, 'brown', -6.11, 0.72, -17.66, 'no', 'large', 118.32, 1045.57] -0.60
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Planes2D/index.html b/0.19.0/api/datasets/synth/Planes2D/index.html new file mode 100644 index 0000000000..7f4436c5b0 --- /dev/null +++ b/0.19.0/api/datasets/synth/Planes2D/index.html @@ -0,0 +1,4392 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Planes2D - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Planes2D

+

2D Planes synthetic dataset.

+

This dataset is described in 1 and was adapted from 2. The features are generated using the following probabilities:

+
\[P(x_1 = -1) = P(x_1 = 1) = \frac{1}{2}\]
+
\[P(x_m = -1) = P(x_m = 0) = P(x_m = 1) = \frac{1}{3}, m=2,\ldots, 10\]
+

The target value is defined by the following rule:

+
\[\text{if}~x_1 = 1, y \leftarrow 3 + 3x_2 + 2x_3 + x_4 + \epsilon\]
+
\[\text{if}~x_1 = -1, y \leftarrow -3 + 3x_5 + 2x_6 + x_7 + \epsilon\]
+

In the expressions, \(\epsilon \sim \mathcal{N}(0, 1)\), is the noise.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed number used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Planes2D(seed=42)
+
+for x, y in dataset.take(5):
+    print(list(x.values()), y)
+
+
[-1, -1, 1, 0, -1, -1, -1, 1, -1, 1] -9.07
+[1, -1, -1, -1, -1, -1, 1, 1, -1, 1] -4.25
+[-1, 1, 1, 1, 1, 0, -1, 0, 1, 0] -0.95
+[-1, 1, 0, 0, 0, -1, -1, 0, -1, -1] -6.10
+[1, -1, 0, 0, 1, 0, -1, 1, 0, 1] 1.60
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    2DPlanes in Luís Torgo regression datasets 

    +
  2. +
  3. +

    Breiman, L., Friedman, J., Stone, C.J. and Olshen, R.A., 1984. Classification and +regression trees. CRC press. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/RandomRBF/index.html b/0.19.0/api/datasets/synth/RandomRBF/index.html new file mode 100644 index 0000000000..0e5ddb2a67 --- /dev/null +++ b/0.19.0/api/datasets/synth/RandomRBF/index.html @@ -0,0 +1,4398 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomRBF - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomRBF

+

Random Radial Basis Function generator.

+

Produces a radial basis function stream. A number of centroids, having a random central position, a standard deviation, a class label and weight are generated. A new sample is created by choosing one of the centroids at random, taking into account their weights, and offsetting the attributes in a random direction from the centroid's center. The offset length is drawn from a Gaussian distribution.

+

This process will create a normally distributed hypersphere of samples on the surrounds of each centroid.

+

Parameters

+
    +
  • +

    seed_model

    +

    Typeint | None

    +

    DefaultNone

    +

    Model's random seed to generate centroids.

    +
  • +
  • +

    seed_sample

    +

    Typeint | None

    +

    DefaultNone

    +

    Sample's random seed.

    +
  • +
  • +

    n_classes

    +

    Typeint

    +

    Default2

    +

    The number of class labels to generate.

    +
  • +
  • +

    n_features

    +

    Typeint

    +

    Default10

    +

    The number of numerical features to generate.

    +
  • +
  • +

    n_centroids

    +

    Typeint

    +

    Default50

    +

    The number of centroids to generate.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+dataset = synth.RandomRBF(seed_model=42, seed_sample=42,
+                          n_classes=4, n_features=4, n_centroids=20)
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: 1.0989, 1: 0.3840, 2: 0.7759, 3: 0.6592} 2
+{0: 0.2366, 1: 1.3233, 2: 0.5691, 3: 0.2083} 0
+{0: 1.3540, 1: -0.3306, 2: 0.1683, 3: 0.8865} 0
+{0: 0.2585, 1: -0.2217, 2: 0.4739, 3: 0.6522} 0
+{0: 0.1295, 1: 0.5953, 2: 0.1774, 3: 0.6673} 1
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/RandomRBFDrift/index.html b/0.19.0/api/datasets/synth/RandomRBFDrift/index.html new file mode 100644 index 0000000000..9c8e4aa1e7 --- /dev/null +++ b/0.19.0/api/datasets/synth/RandomRBFDrift/index.html @@ -0,0 +1,4411 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomRBFDrift - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomRBFDrift

+

Random Radial Basis Function generator with concept drift.

+

This class is an extension from the RandomRBF generator. Concept drift can be introduced in instances of this class.

+

The drift is created by adding a "speed" to certain centroids. As the samples are generated each of the moving centroids' centers is changed by an amount determined by its speed.

+

Parameters

+
    +
  • +

    seed_model

    +

    Typeint | None

    +

    DefaultNone

    +

    Model's random seed to generate centroids.

    +
  • +
  • +

    seed_sample

    +

    Typeint | None

    +

    DefaultNone

    +

    Sample's random seed.

    +
  • +
  • +

    n_classes

    +

    Typeint

    +

    Default2

    +

    The number of class labels to generate.

    +
  • +
  • +

    n_features

    +

    Typeint

    +

    Default10

    +

    The number of numerical features to generate.

    +
  • +
  • +

    n_centroids

    +

    Typeint

    +

    Default50

    +

    The number of centroids to generate.

    +
  • +
  • +

    change_speed

    +

    Typefloat

    +

    Default0.0

    +

    The concept drift speed.

    +
  • +
  • +

    n_drift_centroids

    +

    Typeint

    +

    Default50

    +

    The number of centroids that will drift.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+dataset = synth.RandomRBFDrift(seed_model=42, seed_sample=42,
+                               n_classes=4, n_features=4, n_centroids=20,
+                               change_speed=0.87, n_drift_centroids=10)
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: 1.0989, 1: 0.3840, 2: 0.7759, 3: 0.6592} 2
+{0: 1.1496, 1: 1.9014, 2: 1.5393, 3: 0.3210} 0
+{0: 0.7146, 1: -0.2414, 2: 0.8933, 3: 1.6633} 0
+{0: 0.3797, 1: -0.1027, 2: 0.8717, 3: 1.1635} 0
+{0: 0.1295, 1: 0.5953, 2: 0.1774, 3: 0.6673} 1
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/RandomTree/index.html b/0.19.0/api/datasets/synth/RandomTree/index.html new file mode 100644 index 0000000000..60f9e01f5a --- /dev/null +++ b/0.19.0/api/datasets/synth/RandomTree/index.html @@ -0,0 +1,4437 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomTree - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomTree

+

Random Tree generator.

+

This generator is based on 1. The generator creates a random tree by splitting features at random and setting labels at its leaves.

+

The tree structure is composed of node objects, which can be either inner nodes or leaf nodes. The choice comes as a function of the parameters passed to its initializer.

+

Since the concepts are generated and classified according to a tree structure, in theory, it should favor decision tree learners.

+

Parameters

+
    +
  • +

    seed_tree

    +

    Typeint | None

    +

    DefaultNone

    +

    Seed for random generation of tree.

    +
  • +
  • +

    seed_sample

    +

    Typeint | None

    +

    DefaultNone

    +

    Seed for random generation of instances.

    +
  • +
  • +

    n_classes

    +

    Typeint

    +

    Default2

    +

    The number of classes to generate.

    +
  • +
  • +

    n_num_features

    +

    Typeint

    +

    Default5

    +

    The number of numerical features to generate.

    +
  • +
  • +

    n_cat_features

    +

    Typeint

    +

    Default5

    +

    The number of categorical features to generate.

    +
  • +
  • +

    n_categories_per_feature

    +

    Typeint

    +

    Default5

    +

    The number of values to generate per categorical feature.

    +
  • +
  • +

    max_tree_depth

    +

    Typeint

    +

    Default5

    +

    The maximum depth of the tree concept.

    +
  • +
  • +

    first_leaf_level

    +

    Typeint

    +

    Default3

    +

    The first level of the tree above max_tree_depth that can have leaves.

    +
  • +
  • +

    fraction_leaves_per_level

    +

    Typefloat

    +

    Default0.15

    +

    The fraction of leaves per level from first_leaf_level onwards.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.RandomTree(seed_tree=42, seed_sample=42, n_classes=2,
+                           n_num_features=2, n_cat_features=2,
+                           n_categories_per_feature=2, max_tree_depth=6,
+                           first_leaf_level=3, fraction_leaves_per_level=0.15)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{'x_num_0': 0.6394, 'x_num_1': 0.0250, 'x_cat_0': 1, 'x_cat_1': 0} 0
+{'x_num_0': 0.2232, 'x_num_1': 0.7364, 'x_cat_0': 0, 'x_cat_1': 1} 1
+{'x_num_0': 0.0317, 'x_num_1': 0.0936, 'x_cat_0': 0, 'x_cat_1': 0} 0
+{'x_num_0': 0.5612, 'x_num_1': 0.7160, 'x_cat_0': 1, 'x_cat_1': 0} 0
+{'x_num_0': 0.4492, 'x_num_1': 0.2781, 'x_cat_0': 0, 'x_cat_1': 0} 0
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    Domingos, Pedro, and Geoff Hulten. "Mining high-speed data streams." + In Proceedings of the sixth ACM SIGKDD international conference on + Knowledge discovery and data mining, pp. 71-80. 2000. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/SEA/index.html b/0.19.0/api/datasets/synth/SEA/index.html new file mode 100644 index 0000000000..d1b512e427 --- /dev/null +++ b/0.19.0/api/datasets/synth/SEA/index.html @@ -0,0 +1,4406 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SEA - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SEA

+

SEA synthetic dataset.

+

Implementation of the data stream with abrupt drift described in 1. Each observation is composed of 3 features. Only the first two features are relevant. The target is binary, and is positive if the sum of the features exceeds a certain threshold. There are 4 thresholds to choose from. Concept drift can be introduced by switching the threshold anytime during the stream.

+
    +
  • +

    Variant 0: True if \(att1 + att2 > 8\)

    +
  • +
  • +

    Variant 1: True if \(att1 + att2 > 9\)

    +
  • +
  • +

    Variant 2: True if \(att1 + att2 > 7\)

    +
  • +
  • +

    Variant 3: True if \(att1 + att2 > 9.5\)

    +
  • +
+

Parameters

+
    +
  • +

    variant

    +

    Default0

    +

    Determines the classification function to use. Possible choices are 0, 1, 2, 3.

    +
  • +
  • +

    noise

    +

    Default0.0

    +

    Determines the amount of observations for which the target sign will be flipped.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed number used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.SEA(variant=0, seed=42)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: 6.39426, 1: 0.25010, 2: 2.75029} False
+{0: 2.23210, 1: 7.36471, 2: 6.76699} True
+{0: 8.92179, 1: 0.86938, 2: 4.21921} True
+{0: 0.29797, 1: 2.18637, 2: 5.05355} False
+{0: 0.26535, 1: 1.98837, 2: 6.49884} False
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/STAGGER/index.html b/0.19.0/api/datasets/synth/STAGGER/index.html new file mode 100644 index 0000000000..912820802b --- /dev/null +++ b/0.19.0/api/datasets/synth/STAGGER/index.html @@ -0,0 +1,4435 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + STAGGER - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

STAGGER

+

STAGGER concepts stream generator.

+

This generator is an implementation of the dara stream with abrupt concept drift, as described in 1.

+

The STAGGER concepts are boolean functions f with three features describing objects: size (small, medium and large), shape (circle, square and triangle) and colour (red, blue and green).

+

f options:

+
    +
  1. +

    True if the size is small and the color is red.

    +
  2. +
  3. +

    True if the color is green or the shape is a circle.

    +
  4. +
  5. +

    True if the size is medium or large

    +
  6. +
+

Concept drift can be introduced by changing the classification function. This can be done manually or using datasets.synth.ConceptDriftStream.

+

One important feature is the possibility to balance classes, which means the class distribution will tend to a uniform one.

+

Parameters

+
    +
  • +

    classification_function

    +

    Typeint

    +

    Default0

    +

    Classification functions to use. From 0 to 2.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    balance_classes

    +

    Typebool

    +

    DefaultFalse

    +

    Whether to balance classes or not. If balanced, the class distribution will converge to an uniform distribution.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.STAGGER(classification_function = 2, seed = 112,
+                     balance_classes = False)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{'size': 1, 'color': 2, 'shape': 2} 1
+{'size': 2, 'color': 1, 'shape': 2} 1
+{'size': 1, 'color': 1, 'shape': 2} 1
+{'size': 0, 'color': 1, 'shape': 0} 0
+{'size': 2, 'color': 1, 'shape': 0} 1
+

+

Methods

+
+generate_drift +

Generate drift by switching the classification function at random.

+
+

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

The sample generation works as follows: The 3 attributes are +generated with the random number generator. The classification function +defines whether to classify the instance as class 0 or class 1. Finally, +data is balanced, if this option is set by the user.

+
+
+
    +
  1. +

    Schlimmer, J. C., & Granger, R. H. (1986). Incremental learning + from noisy data. Machine learning, 1(3), 317-354. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Sine/index.html b/0.19.0/api/datasets/synth/Sine/index.html new file mode 100644 index 0000000000..6b2a45c341 --- /dev/null +++ b/0.19.0/api/datasets/synth/Sine/index.html @@ -0,0 +1,4446 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sine - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Sine

+

Sine generator.

+

This generator is an implementation of the dara stream with abrupt concept drift, as described in Gama, Joao, et al. 1.

+

It generates up to 4 relevant numerical features, that vary from 0 to 1, where only 2 of them are relevant to the classification task and the other 2 are optionally added by as noise. A classification function is chosen among four options:

+
    +
  1. +

    SINE1. Abrupt concept drift, noise-free examples. It has two relevant attributes. Each attributes has values uniformly distributed in [0, 1]. In the first context all points below the curve \(y = sin(x)\) are classified as positive.

    +
  2. +
  3. +

    Reversed SINE1. The reversed classification of SINE1.

    +
  4. +
  5. +

    SINE2. The same two relevant attributes. The classification function is \(y < 0.5 + 0.3 sin(3 \pi x)\).

    +
  6. +
  7. +

    Reversed SINE2. The reversed classification of SINE2.

    +
  8. +
+

Concept drift can be introduced by changing the classification function. This can be done manually or using ConceptDriftStream.

+

Two important features are the possibility to balance classes, which means the class distribution will tend to a uniform one, and the possibility to add noise, which will, add two non relevant attributes.

+

Parameters

+
    +
  • +

    classification_function

    +

    Typeint

    +

    Default0

    +

    Classification functions to use. From 0 to 3.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    balance_classes

    +

    Typebool

    +

    DefaultFalse

    +

    Whether to balance classes or not. If balanced, the class distribution will converge to an uniform distribution.

    +
  • +
  • +

    has_noise

    +

    Typebool

    +

    DefaultFalse

    +

    Adds 2 non relevant features to the stream.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Sine(classification_function = 2, seed = 112,
+                     balance_classes = False, has_noise = True)
+
+for x, y in dataset.take(5):
+    print(x, y)
+
+
{0: 0.4812, 1: 0.6660, 2: 0.6198, 3: 0.6994} 1
+{0: 0.9022, 1: 0.7518, 2: 0.1625, 3: 0.2209} 0
+{0: 0.4547, 1: 0.3901, 2: 0.9629, 3: 0.7287} 0
+{0: 0.4683, 1: 0.3515, 2: 0.2273, 3: 0.6027} 0
+{0: 0.9238, 1: 0.1673, 2: 0.4522, 3: 0.3447} 0
+

+

Methods

+
+generate_drift +

Generate drift by switching the classification function at random.

+
+

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

The sample generation works as follows: The two attributes are +generated with the random number generator. The classification function +defines whether to classify the instance as class 0 or class 1. Finally, +data is balanced and noise is added, if these options are set by the user.

+

The generated sample will have 2 relevant features, and an additional +two noise features if has_noise is set.

+
+
+
    +
  1. +

    Gama, Joao, et al.'s 'Learning with drift detection.' + Advances in artificial intelligence-SBIA 2004. + Springer Berlin Heidelberg, 2004. 286-295." 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/datasets/synth/Waveform/index.html b/0.19.0/api/datasets/synth/Waveform/index.html new file mode 100644 index 0000000000..c7e17ee125 --- /dev/null +++ b/0.19.0/api/datasets/synth/Waveform/index.html @@ -0,0 +1,4404 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Waveform - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Waveform

+

Waveform stream generator.

+

Generates samples with 21 numeric features and 3 classes, based on a random differentiation of some base waveforms. Supports noise addition, in this case the samples will have 40 features.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    has_noise

    +

    Typebool

    +

    DefaultFalse

    +

    Adds 19 unrelated features to the stream.

    +
  • +
+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
+

Examples

+

from river.datasets import synth
+
+dataset = synth.Waveform(seed=42, has_noise=True)
+
+for x, y in dataset:
+    break
+
+x
+
+
{0: -0.0397, 1: -0.7484, 2: 0.2974, 3: 0.3574, 4: -0.0735, 5: -0.3647, 6: 1.5631,     7: 2.5291, 8: 4.1599, 9: 4.9587, 10: 4.52587, 11: 4.0097, 12: 3.6705, 13: 1.7033,     14: 1.4898, 15: 1.9743, 16: 0.0898, 17: 2.319, 18: 0.2552, 19: -0.4775, 20: -0.71339,     21: 0.3770, 22: 0.3671, 23: 1.6579, 24: 0.7828, 25: 0.5855, 26: -0.5807, 27: 0.7112,     28: -0.0271, 29: 0.2968, 30: -0.4997, 31: 0.1302, 32: 0.3578, 33: -0.1900, 34: -0.3771,     35: 1.3560, 36: 0.7124, 37: -0.6245, 38: 0.1346, 39: 0.3550}
+

+

y
+
+
2
+

+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+

Notes

+

An instance is generated based on the parameters passed. +The generator will randomly choose one of the hard coded waveforms, as +well as random multipliers. For each feature, the actual value generated +will be a a combination of the hard coded functions, with the multipliers +and a random value.

+

If noise is added then the features 21 to 40 will be replaced with a +random normal value.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/ADWIN/index.html b/0.19.0/api/drift/ADWIN/index.html new file mode 100644 index 0000000000..db2a20452c --- /dev/null +++ b/0.19.0/api/drift/ADWIN/index.html @@ -0,0 +1,3654 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ADWIN - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ADWIN

+

Adaptive Windowing method for concept drift detection.

+

ADWIN (ADaptive WINdowing) is a popular drift detection method with mathematical guarantees. ADWIN efficiently keeps a variable-length window of recent items; such that it holds that there has no been change in the data distribution. This window is further divided into two sub-windows \((W_0, W_1)\) used to determine if a change has happened. ADWIN compares the average of \(W_0\) and \(W_1\) to confirm that they correspond to the same distribution. Concept drift is detected if the distribution equality no longer holds. Upon detecting a drift, \(W_0\) is replaced by \(W_1\) and a new \(W_1\) is initialized. ADWIN uses a significance value \(\delta=\in(0,1)\) to determine if the two sub-windows correspond to the same distribution.

+

Parameters

+
    +
  • +

    delta

    +

    Default0.002

    +

    Significance value.

    +
  • +
  • +

    clock

    +

    Default32

    +

    How often ADWIN should check for change. 1 means every new data point, default is 32. Higher values speed up processing, but may also lead to increased delay in change detection.

    +
  • +
  • +

    max_buckets

    +

    Default5

    +

    The maximum number of buckets of each size that ADWIN should keep before merging buckets (default is 5).

    +
  • +
  • +

    min_window_length

    +

    Default5

    +

    The minimum length of each subwindow (default is 5). Lower values may decrease delay in change detection but may also lead to more false positives.

    +
  • +
  • +

    grace_period

    +

    Default10

    +

    ADWIN does not perform any change detection until at least this many data points have arrived (default is 10).

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    estimation

    +

    Estimate of mean value in the window.

    +
  • +
  • +

    n_detections

    +
  • +
  • +

    total

    +
  • +
  • +

    variance

    +
  • +
  • +

    width

    +

    Window size

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(12345)
+adwin = drift.ADWIN()
+
+data_stream = rng.choices([0, 1], k=1000) + rng.choices(range(4, 8), k=1000)
+
+for i, val in enumerate(data_stream):
+    _ = adwin.update(val)
+    if adwin.drift_detected:
+        print(f"Change detected at index {i}, input value: {val}")
+
+
Change detected at index 1023, input value: 4
+

+

Methods

+
+update +

Update the change detector with a single data point.

+

Apart from adding the element value to the window, by inserting it in the correct bucket, it will also update the relevant statistics, in this case the total sum of all values, the window width and the total variance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+

Returns

+

DriftDetector: self

+
+

+
+
+
    +
  1. +

    Albert Bifet and Ricard Gavalda. +"Learning from time-changing data with adaptive windowing." +In Proceedings of the 2007 SIAM international conference on data mining, +pp. 443-448. Society for Industrial and Applied Mathematics, 2007. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/DriftRetrainingClassifier/index.html b/0.19.0/api/drift/DriftRetrainingClassifier/index.html new file mode 100644 index 0000000000..83b1d4bd0e --- /dev/null +++ b/0.19.0/api/drift/DriftRetrainingClassifier/index.html @@ -0,0 +1,3626 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DriftRetrainingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DriftRetrainingClassifier

+

Drift retraining classifier.

+

This classifier is a wrapper for any classifier. It monitors the incoming data for concept drifts and warnings in the model's accurary. In case a warning is detected, a background model starts to train. If a drift is detected, the model will be replaced by the background model, and the background model will be reset.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier and background classifier class.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftAndWarningDetector | base.BinaryDriftAndWarningDetector | None

    +

    DefaultNone

    +

    Algorithm to track warnings and concept drifts. Attention! If the parameter train_in_background is True, the drift_detector must have a warning tracker.

    +
  • +
  • +

    train_in_background

    +

    Typebool

    +

    DefaultTrue

    +

    Parameter to determine if a background model will be used.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import drift
+from river import metrics
+from river import tree
+
+dataset = datasets.Elec2().take(3000)
+
+model = drift.DriftRetrainingClassifier(
+    model=tree.HoeffdingTreeClassifier(),
+    drift_detector=drift.binary.DDM()
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 86.46%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/DummyDriftDetector/index.html b/0.19.0/api/drift/DummyDriftDetector/index.html new file mode 100644 index 0000000000..5adf8add1b --- /dev/null +++ b/0.19.0/api/drift/DummyDriftDetector/index.html @@ -0,0 +1,3706 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DummyDriftDetector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DummyDriftDetector

+

Baseline drift detector that generates pseudo drift detection signals.

+

There are two approaches1:

+
    +
  • +

    fixed where the drift signal is generated every t_0 samples.

    +
  • +
  • +

    random corresponds to a pseudo-random drift detection strategy.

    +
  • +
+

Parameters

+
    +
  • +

    trigger_method

    +

    Typestr

    +

    Defaultfixed

    +

    The trigger method to use.
    * fixed
    * random

    +
  • +
  • +

    t_0

    +

    Typeint

    +

    Default300

    +

    Reference point to define triggers.

    +
  • +
  • +

    w

    +

    Typeint

    +

    Default0

    +

    Auxiliary parameter whose purpose is twofold:
    - if trigger_method="fixed", the periodic drift signals will only start after an initial warm-up period randomly defined between [0, w]. Useful to avoid that all ensemble members are reset at the same time when periodic triggers are used as the adaptation strategy.
    - if trigger_method="random", w defines the probability bounds of triggering a drift. The chance of triggering a drift is \(0.5\) after observing t_0 instances and becomes \(1\) after monitoring t_0 + w / 2 instances. A sigmoid function is used to produce values between [0, 1] that are used as the reset probabilities.

    +
  • +
  • +

    dynamic_cloning

    +

    Typebool

    +

    DefaultFalse

    +

    Whether to change the seed and w values each time clone() is called.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+
import random
+from river import drift
+
+rng = random.Random(42)
+
+

The observed values will not affect the periodic triggers.

+
data = [rng.gauss(0, 1) for _ in range(1000)]
+
+

Let's start with the fixed drift signals:

+

ptrigger = DummyDriftDetector(t_0=500, seed=42)
+for i, v in enumerate(data):
+    _ = ptrigger.update(v)
+    if ptrigger.drift_detected:
+        print(f"Drift detected at instance {i}.")
+
+
Drift detected at instance 499.
+Drift detected at instance 999.
+

+

Now, the random drift signals:

+

rtrigger = DummyDriftDetector(
+    trigger_method="random",
+    t_0=500,
+    w=100,
+    dynamic_cloning=True,
+    seed=42
+)
+for i, v in enumerate(data):
+    _ = rtrigger.update(v)
+    if rtrigger.drift_detected:
+        print(f"Drift detected at instance {i}.")
+
+
Drift detected at instance 368.
+Drift detected at instance 817.
+

+

Remember to set a w > 0 value if random triggers are used:

+

try:
+    DummyDriftDetector(trigger_method="random")
+except ValueError as ve:
+    print(ve)
+
+
The 'w' value must be greater than zero when 'trigger_method' is 'random'.
+

+

Since we set dynamic_cloning to True, a clone of the periodic trigger will +have its internal paramenters changed:

+

rtrigger = rtrigger.clone()
+for i, v in enumerate(data):
+    _ = rtrigger.update(v)
+    if rtrigger.drift_detected:
+        print(f"Drift detected at instance {i}.")
+
+
Drift detected at instance 429.
+Drift detected at instance 728.
+

+

Methods

+
+update +

Update the detector with a single data point.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+

Returns

+

DriftDetector: self

+
+

+

Notes

+

When used in ensembles, a naive implementation of periodic drift signals would make all ensemble members +reset at the same time. To avoid that, the dynamic_cloning parameter can be set to True. In this case, +every time the clone method of this detector is called in an ensemble a new seed is defined. If +dynamic_cloning=True and trigger_method="fixed", a new w between [0, t_0] will also be created +for the new cloned instance.

+
+
+
    +
  1. +

    Heitor Gomes, Jacob Montiel, Saulo Martiello Mastelini, Bernhard Pfahringer, and Albert Bifet. +On Ensemble Techniques for Data Stream Regression. IJCNN'20. International Joint Conference on +Neural Networks. 2020. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/KSWIN/index.html b/0.19.0/api/drift/KSWIN/index.html new file mode 100644 index 0000000000..e4e8dcf5df --- /dev/null +++ b/0.19.0/api/drift/KSWIN/index.html @@ -0,0 +1,3670 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KSWIN - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

KSWIN

+

Kolmogorov-Smirnov Windowing method for concept drift detection.

+

Parameters

+
    +
  • +

    alpha

    +

    Typefloat

    +

    Default0.005

    +

    Probability for the test statistic of the Kolmogorov-Smirnov-Test. The alpha parameter is very sensitive, therefore should be set below 0.01.

    +
  • +
  • +

    window_size

    +

    Typeint

    +

    Default100

    +

    Size of the sliding window.

    +
  • +
  • +

    stat_size

    +

    Typeint

    +

    Default30

    +

    Size of the statistic window.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
  • +

    window

    +

    Typetyping.Iterable | None

    +

    DefaultNone

    +

    Already collected data to avoid cold start.

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(12345)
+kswin = drift.KSWIN(alpha=0.0001, seed=42)
+
+data_stream = rng.choices([0, 1], k=1000) + rng.choices(range(4, 8), k=1000)
+
+for i, val in enumerate(data_stream):
+    _ = kswin.update(val)
+    if kswin.drift_detected:
+        print(f"Change detected at index {i}, input value: {val}")
+
+
Change detected at index 1016, input value: 6
+

+

Methods

+
+update +

Update the change detector with a single data point.

+

Adds an element on top of the sliding window and removes the oldest one from the window. Afterwards, the KS-test is performed.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+

Returns

+

DriftDetector: self

+
+

+

Notes

+

KSWIN (Kolmogorov-Smirnov Windowing) is a concept change detection method based +on the Kolmogorov-Smirnov (KS) statistical test. KS-test is a statistical test with +no assumption of underlying data distribution. KSWIN can monitor data or performance +distributions. Note that the detector accepts one dimensional input as array.

+

KSWIN maintains a sliding window \(\Psi\) of fixed size \(n\) (window_size). The +last \(r\) (stat_size) samples of \(\Psi\) are assumed to represent the last +concept considered as \(R\). From the first \(n-r\) samples of \(\Psi\), +\(r\) samples are uniformly drawn, representing an approximated last concept \(W\).

+

The KS-test is performed on the windows \(R\) and \(W\) of the same size. KS +-test compares the distance of the empirical cumulative data distribution \(dist(R,W)\).

+

A concept drift is detected by KSWIN if:

+
\[ +dist(R,W) > \sqrt{-\frac{ln\alpha}{r}} +\]
+

The difference in empirical data distributions between the windows \(R\) and \(W\) is too large +since \(R\) and \(W\) come from the same distribution.

+
+
+
    +
  1. +

    Christoph Raab, Moritz Heusinger, Frank-Michael Schleif, Reactive Soft Prototype Computing for +Concept Drift Streams, Neurocomputing, 2020, 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/PageHinkley/index.html b/0.19.0/api/drift/PageHinkley/index.html new file mode 100644 index 0000000000..6c0cb71050 --- /dev/null +++ b/0.19.0/api/drift/PageHinkley/index.html @@ -0,0 +1,3644 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PageHinkley - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PageHinkley

+

Page-Hinkley method for concept drift detection.

+

This change detection method works by computing the observed values and their mean up to the current moment. Page-Hinkley does not signal warning zones, only change detections.

+

This detector implements the CUSUM control chart for detecting changes. This implementation also supports the two-sided Page-Hinkley test to detect increasing and decreasing changes in the mean of the input values.

+

Parameters

+
    +
  • +

    min_instances

    +

    Typeint

    +

    Default30

    +

    The minimum number of instances before detecting change.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default0.005

    +

    The delta factor for the Page-Hinkley test.

    +
  • +
  • +

    threshold

    +

    Typefloat

    +

    Default50.0

    +

    The change detection threshold (lambda).

    +
  • +
  • +

    alpha

    +

    Typefloat

    +

    Default0.9999

    +

    The forgetting factor, used to weight the observed value and the mean.

    +
  • +
  • +

    mode

    +

    Typestr

    +

    Defaultboth

    +

    Whether to consider increases ("up"), decreases ("down") or both ("both") when monitoring the fading mean.

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(12345)
+ph = drift.PageHinkley()
+
+data_stream = rng.choices([0, 1], k=1000) + rng.choices(range(4, 8), k=1000)
+
+for i, val in enumerate(data_stream):
+    _ = ph.update(val)
+    if ph.drift_detected:
+        print(f"Change detected at index {i}, input value: {val}")
+
+
Change detected at index 1006, input value: 5
+

+

Methods

+
+update +

Update the detector with a single data point.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+

Returns

+

DriftDetector: self

+
+

+
+
+
    +
  1. +

    E. S. Page. 1954. Continuous Inspection Schemes. Biometrika 41, 1/2 (1954), 100-115. 

    +
  2. +
  3. +

    Sebastião, R., & Fernandes, J. M. (2017, June). Supporting the Page-Hinkley test with +empirical mode decomposition for change detection. In International Symposium on Methodologies +for Intelligent Systems (pp. 492-498). Springer, Cham. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/binary/DDM/index.html b/0.19.0/api/drift/binary/DDM/index.html new file mode 100644 index 0000000000..36a54d325c --- /dev/null +++ b/0.19.0/api/drift/binary/DDM/index.html @@ -0,0 +1,3765 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DDM - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DDM

+

Drift Detection Method.

+

DDM (Drift Detection Method) is a concept change detection method based on the PAC learning model premise, that the learner's error rate will decrease as the number of analysed samples increase, as long as the data distribution is stationary.

+

If the algorithm detects an increase in the error rate, that surpasses a calculated threshold, either change is detected or the algorithm will warn the user that change may occur in the near future, which is called the warning zone.

+

The detection threshold is calculated in function of two statistics, obtained when \((p_i + s_i)\) is minimum:

+
    +
  • +

    \(p_{min}\): The minimum recorded error rate.

    +
  • +
  • +

    \(s_{min}\): The minimum recorded standard deviation.

    +
  • +
+

At instant \(i\), the detection algorithm uses:

+
    +
  • +

    \(p_i\): The error rate at instant \(i\).

    +
  • +
  • +

    \(s_i\): The standard deviation at instant \(i\).

    +
  • +
+

The conditions for entering the warning zone and detecting change are as follows [see implementation note below]:

+
    +
  • +

    if \(p_i + s_i \geq p_{min} + w_l * s_{min}\) -> Warning zone

    +
  • +
  • +

    if \(p_i + s_i \geq p_{min} + d_l * s_{min}\) -> Change detected

    +
  • +
+

In the above expressions, \(w_l\) and \(d_l\) represent, respectively, the warning and drift thresholds.

+

Input: x is an entry in a stream of bits, where 1 indicates error/failure and 0 represents correct/normal values.

+

For example, if a classifier's prediction \(y'\) is right or wrong w.r.t. the true target label \(y\):

+
    +
  • +

    0: Correct, \(y=y'\)

    +
  • +
  • +

    1: Error, \(y \neq y'\)

    +
  • +
+

Parameters

+
    +
  • +

    warm_start

    +

    Typeint

    +

    Default30

    +

    The minimum required number of analyzed samples so change can be detected. Warm start parameter for the drift detector.

    +
  • +
  • +

    warning_threshold

    +

    Typefloat

    +

    Default2.0

    +

    Threshold to decide if the detector is in a warning zone. The default value gives 95\% of confidence level to the warning assessment.

    +
  • +
  • +

    drift_threshold

    +

    Typefloat

    +

    Default3.0

    +

    Threshold to decide if a drift was detected. The default value gives a 99\% of confidence level to the drift assessment.

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    warning_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(42)
+ddm = drift.binary.DDM()
+
+data_stream = rng.choices([0, 1], k=1000)
+data_stream = data_stream + rng.choices([0, 1], k=1000, weights=[0.3, 0.7])
+
+print_warning = True
+for i, x in enumerate(data_stream):
+    _ = ddm.update(x)
+    if ddm.warning_detected and print_warning:
+        print(f"Warning detected at index {i}")
+        print_warning = False
+    if ddm.drift_detected:
+        print(f"Change detected at index {i}")
+        print_warning = True
+
+
Warning detected at index 1084
+Change detected at index 1334
+Warning detected at index 1492
+

+

Methods

+
+update +

Update the detector with a single boolean input.

+

Parameters

+
    +
  • x'bool'
  • +
+

Returns

+

BinaryDriftDetector: self

+
+

+
+
+
    +
  1. +

    João Gama, Pedro Medas, Gladys Castillo, Pedro Pereira Rodrigues: Learning with Drift Detection. SBIA 2004: 286-295 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/binary/EDDM/index.html b/0.19.0/api/drift/binary/EDDM/index.html new file mode 100644 index 0000000000..447921117f --- /dev/null +++ b/0.19.0/api/drift/binary/EDDM/index.html @@ -0,0 +1,3747 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EDDM - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EDDM

+

Early Drift Detection Method.

+

EDDM (Early Drift Detection Method) aims to improve the detection rate of gradual concept drift in DDM, while keeping a good performance against abrupt concept drift.

+

This method works by keeping track of the average distance between two errors instead of only the error rate. For this, it is necessary to keep track of the running average distance and the running standard deviation, as well as the maximum distance and the maximum standard deviation.

+

The algorithm works similarly to the DDM algorithm, by keeping track of statistics only. It works with the running average distance (\(p_i'\)) and the running standard deviation (\(s_i'\)), as well as \(p'_{max}\) and \(s'_{max}\), which are the values of \(p_i'\) and \(s_i'\) when \((p_i' + 2 * s_i')\) reaches its maximum.

+

Like DDM, there are two threshold values that define the borderline between no change, warning zone, and drift detected. These are as follows:

+
    +
  • +

    if \((p_i' + 2 * s_i') / (p'_{max} + 2 * s'_{max}) < \alpha\) -> Warning zone

    +
  • +
  • +

    if \((p_i' + 2 * s_i') / (p'_{max} + 2 * s'_{max}) < \beta\) -> Change detected

    +
  • +
+

\(\alpha\) and \(\beta\) are set to 0.95 and 0.9, respectively.

+

Input: x is an entry in a stream of bits, where 1 indicates error/failure and 0 represents correct/normal values.

+

For example, if a classifier's prediction \(y'\) is right or wrong w.r.t. the true target label \(y\):

+
    +
  • +

    0: Correct, \(y=y'\)

    +
  • +
  • +

    1: Error, \(y \\neq y'\)

    +
  • +
+

Parameters

+
    +
  • +

    warm_start

    +

    Typeint

    +

    Default30

    +

    The minimum required number of monitored errors/failures so change can be detected. Warm start parameter for the drift detector.

    +
  • +
  • +

    alpha

    +

    Typefloat

    +

    Default0.95

    +

    Threshold for triggering a warning. Must be between 0 and 1. The smaller the value, the more conservative the detector becomes.

    +
  • +
  • +

    beta

    +

    Typefloat

    +

    Default0.9

    +

    Threshold for triggering a drift. Must be between 0 and 1. The smaller the value, the more conservative the detector becomes.

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    warning_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(42)
+eddm = drift.binary.EDDM(alpha=0.8, beta=0.75)
+
+data_stream = rng.choices([0, 1], k=1000)
+data_stream = data_stream + rng.choices([0, 1], k=1000, weights=[0.3, 0.7])
+
+print_warning = True
+for i, x in enumerate(data_stream):
+    _ = eddm.update(x)
+    if eddm.warning_detected and print_warning:
+        print(f"Warning detected at index {i}")
+        print_warning = False
+    if eddm.drift_detected:
+        print(f"Change detected at index {i}")
+        print_warning = True
+
+
Warning detected at index 1059
+Change detected at index 1278
+

+

Methods

+
+update +

Update the change detector with a single data point.

+

Parameters

+
    +
  • x'bool'
  • +
+

Returns

+

BinaryDriftDetector: self

+
+

+
+
+
    +
  1. +

    Early Drift Detection Method. Manuel Baena-Garcia, Jose Del Campo-Avila, Raúl Fidalgo, Albert Bifet, Ricard Gavalda, Rafael Morales-Bueno. In Fourth International Workshop on Knowledge Discovery from Data Streams, 2006. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/binary/HDDM-A/index.html b/0.19.0/api/drift/binary/HDDM-A/index.html new file mode 100644 index 0000000000..9641fc911d --- /dev/null +++ b/0.19.0/api/drift/binary/HDDM-A/index.html @@ -0,0 +1,3738 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HDDM_A - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HDDM_A

+

Drift Detection Method based on Hoeffding's bounds with moving average-test.

+

HDDM_A is a drift detection method based on the Hoeffding's inequality which uses the input average as estimator.

+

Input: x is an entry in a stream of bits, where 1 indicates error/failure and 0 represents correct/normal values.

+

For example, if a classifier's prediction \(y'\) is right or wrong w.r.t. the true target label \(y\):

+
    +
  • +

    0: Correct, \(y=y'\)

    +
  • +
  • +

    1: Error, \(y \neq y'\)

    +
  • +
+

Implementation based on MOA.

+

Parameters

+
    +
  • +

    drift_confidence

    +

    Default0.001

    +

    Confidence to the drift

    +
  • +
  • +

    warning_confidence

    +

    Default0.005

    +

    Confidence to the warning

    +
  • +
  • +

    two_sided_test

    +

    DefaultFalse

    +

    If True, will monitor error increments and decrements (two-sided). By default will only monitor increments (one-sided).

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    warning_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(42)
+hddm_a = drift.binary.HDDM_A()
+
+data_stream = rng.choices([0, 1], k=1000)
+data_stream = data_stream + rng.choices([0, 1], k=1000, weights=[0.3, 0.7])
+
+print_warning = True
+for i, x in enumerate(data_stream):
+    _ = hddm_a.update(x)
+    if hddm_a.warning_detected and print_warning:
+        print(f"Warning detected at index {i}")
+        print_warning = False
+    if hddm_a.drift_detected:
+        print(f"Change detected at index {i}")
+        print_warning = True
+
+
Warning detected at index 451
+Change detected at index 1206
+

+

Methods

+
+update +

Update the change detector with a single data point.

+

Parameters

+
    +
  • x'bool'
  • +
+

Returns

+

BinaryDriftDetector: self

+
+

+
+
+
    +
  1. +

    Frías-Blanco I, del Campo-Ávila J, Ramos-Jimenez G, et al. Online and non-parametric drift detection +methods based on Hoeffding's bounds. IEEE Transactions on Knowledge and Data Engineering, 2014, 27(3): 810-823. 

    +
  2. +
  3. +

    Albert Bifet, Geoff Holmes, Richard Kirkby, Bernhard Pfahringer. MOA: Massive Online Analysis; Journal +of Machine Learning Research 11: 1601-1604, 2010. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/binary/HDDM-W/index.html b/0.19.0/api/drift/binary/HDDM-W/index.html new file mode 100644 index 0000000000..af872b7ff2 --- /dev/null +++ b/0.19.0/api/drift/binary/HDDM-W/index.html @@ -0,0 +1,3744 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HDDM_W - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HDDM_W

+

Drift Detection Method based on Hoeffding's bounds with moving weighted average-test.

+

HDDM_W is an online drift detection method based on McDiarmid's bounds. HDDM_W uses the Exponentially Weighted Moving Average (EWMA) statistic as estimator.

+

Input: x is an entry in a stream of bits, where 1 indicates error/failure and 0 represents correct/normal values.

+

For example, if a classifier's prediction \(y'\) is right or wrong w.r.t. the true target label \(y\):

+
    +
  • +

    0: Correct, \(y=y'\)

    +
  • +
  • +

    1: Error, \(y \neq y'\)

    +
  • +
+

Implementation based on MOA.

+

Parameters

+
    +
  • +

    drift_confidence

    +

    Default0.001

    +

    Confidence to the drift

    +
  • +
  • +

    warning_confidence

    +

    Default0.005

    +

    Confidence to the warning

    +
  • +
  • +

    lambda_val

    +

    Default0.05

    +

    The weight given to recent data. Smaller values mean less weight given to recent data.

    +
  • +
  • +

    two_sided_test

    +

    DefaultFalse

    +

    If True, will monitor error increments and decrements (two-sided). By default will only monitor increments (one-sided).

    +
  • +
+

Attributes

+
    +
  • +

    drift_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
  • +

    warning_detected

    +

    Whether or not a drift is detected following the last update.

    +
  • +
+

Examples

+

import random
+from river import drift
+
+rng = random.Random(42)
+hddm_w = drift.binary.HDDM_W()
+
+data_stream = rng.choices([0, 1], k=1000)
+data_stream = data_stream + rng.choices([0, 1], k=1000, weights=[0.3, 0.7])
+
+print_warning = True
+for i, x in enumerate(data_stream):
+    _ = hddm_w.update(x)
+    if hddm_w.warning_detected and print_warning:
+        print(f"Warning detected at index {i}")
+        print_warning = False
+    if hddm_w.drift_detected:
+        print(f"Change detected at index {i}")
+        print_warning = True
+
+
Warning detected at index 451
+Change detected at index 1077
+

+

Methods

+
+update +

Update the change detector with a single data point.

+

Parameters

+
    +
  • x'bool'
  • +
+

Returns

+

BinaryDriftDetector: self

+
+

+
+
+
    +
  1. +

    Frías-Blanco I, del Campo-Ávila J, Ramos-Jimenez G, et al. Online and non-parametric drift + detection methods based on Hoeffding’s bounds. IEEE Transactions on Knowledge and Data + Engineering, 2014, 27(3): 810-823. 

    +
  2. +
  3. +

    Albert Bifet, Geoff Holmes, Richard Kirkby, Bernhard Pfahringer. MOA: Massive Online Analysis; + Journal of Machine Learning Research 11: 1601-1604, 2010. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/AirlinePassengers/index.html b/0.19.0/api/drift/datasets/AirlinePassengers/index.html new file mode 100644 index 0000000000..88f2edf6cf --- /dev/null +++ b/0.19.0/api/drift/datasets/AirlinePassengers/index.html @@ -0,0 +1,3710 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AirlinePassengers - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AirlinePassengers

+

JFK Airline Passengers

+

This dataset gives the number of passengers arriving and departing at JFK. The data is obtained from New York State's official Kaggle page for this dataset.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    https://www.kaggle.com/new-york-state/nys-air-passenger-traffic,-port-authority-of-ny-nj#air-passenger-traffic-per-month-port-authority-of-ny-nj-beginning-1977.csv 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/Apple/index.html b/0.19.0/api/drift/datasets/Apple/index.html new file mode 100644 index 0000000000..b8610e03f1 --- /dev/null +++ b/0.19.0/api/drift/datasets/Apple/index.html @@ -0,0 +1,3710 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Apple - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Apple

+

Apple Stock

+

This dataset concerns the daily close price and volume of Apple stock around the year 2000. The dataset is sampled every 3 observations to reduce the length of the time series. This dataset is retrieved from Yahoo Finance.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    https://finance.yahoo.com/quote/AAPL/history?period1=850348800&period2=1084579200&interval=1d&filter=history&frequency=1d 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/Bitcoin/index.html b/0.19.0/api/drift/datasets/Bitcoin/index.html new file mode 100644 index 0000000000..ab9d885412 --- /dev/null +++ b/0.19.0/api/drift/datasets/Bitcoin/index.html @@ -0,0 +1,3710 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bitcoin - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Bitcoin

+

Bitcoin Market Price

+

This is a regression task, where the goal is to predict the average USD market price across major bitcoin exchanges. This data was collected from the official Blockchain website. There is only one feature given, the day of exchange, which is in increments of three. The first 500 lines have been removed because they are not interesting.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    https://www.blockchain.com/fr/explorer/charts/market-price?timespan=all 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/BrentSpotPrice/index.html b/0.19.0/api/drift/datasets/BrentSpotPrice/index.html new file mode 100644 index 0000000000..133420c0b1 --- /dev/null +++ b/0.19.0/api/drift/datasets/BrentSpotPrice/index.html @@ -0,0 +1,3715 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BrentSpotPrice - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BrentSpotPrice

+

Brent Spot Price

+

This is the USD price for Brent Crude oil, measured daily. We include the time series from 2000 onwards. The data is sampled at every 10 original observations to reduce the length of the series.

+

The data is obtained from the U.S. Energy Information Administration. Since the data is in the public domain, we distribute it as part of this repository.

+

Since the original data has observations only on trading days, there are arguably gaps in this time series (on non-trading days). However we consider these to be consecutive, and thus also consider the sampled time series to have consecutive observations.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    U.S. Energy Information Administration (Sep. 2019) 

    +
  2. +
  3. +

    https://www.eia.gov/opendata/v1/qb.php?sdid=PET.RBRTE.D 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/Occupancy/index.html b/0.19.0/api/drift/datasets/Occupancy/index.html new file mode 100644 index 0000000000..3dd001406f --- /dev/null +++ b/0.19.0/api/drift/datasets/Occupancy/index.html @@ -0,0 +1,3704 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Occupancy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Occupancy

+

Room occupancy data.

+

Dataset on detecting room occupancy based on several variables. The dataset contains temperature, humidity, light, and CO2 variables.

+

The data is sampled at every 16 observations to reduce the length of the series.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+Candanedo, Luis M., and Véronique Feldheim. "Accurate occupancy detection of an office room from light, temperature, humidity and CO2 measurements using statistical learning models." Energy and Buildings 112 (2016): 28-39.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/RunLog/index.html b/0.19.0/api/drift/datasets/RunLog/index.html new file mode 100644 index 0000000000..abdf8e065c --- /dev/null +++ b/0.19.0/api/drift/datasets/RunLog/index.html @@ -0,0 +1,3702 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RunLog - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RunLog

+

Interval Training Running Pace.

+

This dataset shows the pace of a runner during an interval training session, where a mobile application provides instructions on when to run and when to walk.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/drift/datasets/UKCoalEmploy/index.html b/0.19.0/api/drift/datasets/UKCoalEmploy/index.html new file mode 100644 index 0000000000..118be0590e --- /dev/null +++ b/0.19.0/api/drift/datasets/UKCoalEmploy/index.html @@ -0,0 +1,3710 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + UKCoalEmploy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

UKCoalEmploy

+

Historic Employment in UK Coal Mines

+

This is historic data obtained from the UK government. We use the employment column for the number of workers employed in the British coal mines Missing values in the data are replaced with the value of the preceding year.

+

Attributes

+
    +
  • +

    desc

    +

    Return the description from the docstring.

    +
  • +
  • +

    path

    +
  • +
+

Methods

+
+take +

Iterate over the k samples.

+

Parameters

+
    +
  • k'int'
  • +
+
+

+
+
+
    +
  1. +

    https://www.gov.uk/government/statistical-data-sets/historical-coal-data-coal-production-availability-and-consumption 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/dummy/NoChangeClassifier/index.html b/0.19.0/api/dummy/NoChangeClassifier/index.html new file mode 100644 index 0000000000..418baaa04e --- /dev/null +++ b/0.19.0/api/dummy/NoChangeClassifier/index.html @@ -0,0 +1,3483 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NoChangeClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

NoChangeClassifier

+

Dummy classifier which returns the last class seen.

+

The predict_one method will output the last class seen whilst predict_proba_one will return 1 for the last class seen and 0 for the others.

+

Attributes

+
    +
  • +

    last_class

    +

    The last class seen.

    +
  • +
  • +

    classes

    +

    The set of classes seen.

    +
  • +
+

Examples

+

Taken from example 2.1 from +this page.

+

import pprint
+from river import dummy
+
+sentences = [
+    ('glad happy glad', '+'),
+    ('glad glad joyful', '+'),
+    ('glad pleasant', '+'),
+    ('miserable sad glad', '−')
+]
+
+model = dummy.NoChangeClassifier()
+
+for sentence, label in sentences:
+    model = model.learn_one(sentence, label)
+
+new_sentence = 'glad sad miserable pleasant glad'
+model.predict_one(new_sentence)
+
+
'−'
+

+

pprint.pprint(model.predict_proba_one(new_sentence))
+
+
{'+': 0, '−': 1}
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/dummy/PriorClassifier/index.html b/0.19.0/api/dummy/PriorClassifier/index.html new file mode 100644 index 0000000000..0f3b296688 --- /dev/null +++ b/0.19.0/api/dummy/PriorClassifier/index.html @@ -0,0 +1,3491 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PriorClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PriorClassifier

+

Dummy classifier which uses the prior distribution.

+

The predict_one method will output the most common class whilst predict_proba_one will return the normalized class counts.

+

Attributes

+
    +
  • +

    counts (collections.Counter)

    +

    Class counts.

    +
  • +
  • +

    n (int)

    +

    Total number of seen instances.

    +
  • +
+

Examples

+

Taken from example 2.1 from +this page

+

from river import dummy
+
+sentences = [
+    ('glad happy glad', '+'),
+    ('glad glad joyful', '+'),
+    ('glad pleasant', '+'),
+    ('miserable sad glad', '−')
+]
+
+model = dummy.PriorClassifier()
+
+for sentence, label in sentences:
+    model = model.learn_one(sentence, label)
+
+new_sentence = 'glad sad miserable pleasant glad'
+model.predict_one(new_sentence)
+
+
'+'
+
+
model.predict_proba_one(new_sentence)
+
+
{'+': 0.75, '−': 0.25}
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/dummy/StatisticRegressor/index.html b/0.19.0/api/dummy/StatisticRegressor/index.html new file mode 100644 index 0000000000..00c89ceb6c --- /dev/null +++ b/0.19.0/api/dummy/StatisticRegressor/index.html @@ -0,0 +1,3462 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + StatisticRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

StatisticRegressor

+

Dummy regressor that uses a univariate statistic to make predictions.

+

Parameters

+ +

Examples

+

from pprint import pprint
+from river import dummy
+from river import stats
+
+sentences = [
+    ('glad happy glad', 3),
+    ('glad glad joyful', 3),
+    ('glad pleasant', 2),
+    ('miserable sad glad', -3)
+]
+
+model = dummy.StatisticRegressor(stats.Mean())
+
+for sentence, score in sentences:
+    model = model.learn_one(sentence, score)
+
+new_sentence = 'glad sad miserable pleasant glad'
+model.predict_one(new_sentence)
+
+
1.25
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/ADWINBaggingClassifier/index.html b/0.19.0/api/ensemble/ADWINBaggingClassifier/index.html new file mode 100644 index 0000000000..c09f7bf327 --- /dev/null +++ b/0.19.0/api/ensemble/ADWINBaggingClassifier/index.html @@ -0,0 +1,3705 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ADWINBaggingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ADWINBaggingClassifier

+

ADWIN Bagging classifier.

+

ADWIN Bagging 1 is the online bagging method of Oza and Russell 2 with the addition of the ADWIN algorithm as a change detector. If concept drift is detected, the worst member of the ensemble (based on the error estimation by ADWIN) is replaced by a new (empty) classifier.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier to bag.

    +
  • +
  • +

    n_models

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+
+model = ensemble.ADWINBaggingClassifier(
+    model=(
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    n_models=3,
+    seed=42
+)
+
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.65%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Averages the predictions of each classifier.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+
+

+
+
+
    +
  1. +

    Albert Bifet, Geoff Holmes, Bernhard Pfahringer, Richard Kirkby, +and Ricard Gavaldà. "New ensemble methods for evolving data streams." +In 15th ACM SIGKDD International Conference on Knowledge Discovery and +Data Mining, 2009. 

    +
  2. +
  3. +

    Oza, N., Russell, S. "Online bagging and boosting." +In: Artificial Intelligence and Statistics 2001, pp. 105–112. +Morgan Kaufmann, 2001. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/ADWINBoostingClassifier/index.html b/0.19.0/api/ensemble/ADWINBoostingClassifier/index.html new file mode 100644 index 0000000000..8472d6dfb3 --- /dev/null +++ b/0.19.0/api/ensemble/ADWINBoostingClassifier/index.html @@ -0,0 +1,3704 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ADWINBoostingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ADWINBoostingClassifier

+

ADWIN Boosting classifier.

+

ADWIN Boosting 1 is the online boosting method of Oza and Russell 2 with the addition of the ADWIN algorithm as a change detector. If concept drift is detected, the worst member of the ensemble (based on the error estimation by ADWIN) is replaced by a new (empty) classifier.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier to boost.

    +
  • +
  • +

    n_models

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.Phishing()
+model = ensemble.ADWINBoostingClassifier(
+    model=(
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    n_models=3,
+    seed=42
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.61%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+
+
    +
  1. +

    Albert Bifet, Geoff Holmes, Bernhard Pfahringer, Richard Kirkby, +and Ricard Gavaldà. "New ensemble methods for evolving data streams." +In 15th ACM SIGKDD International Conference on Knowledge Discovery and +Data Mining, 2009. 

    +
  2. +
  3. +

    Oza, N., Russell, S. "Online bagging and boosting." +In: Artificial Intelligence and Statistics 2001, pp. 105–112. +Morgan Kaufmann, 2001. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/AdaBoostClassifier/index.html b/0.19.0/api/ensemble/AdaBoostClassifier/index.html new file mode 100644 index 0000000000..b6cd2b2558 --- /dev/null +++ b/0.19.0/api/ensemble/AdaBoostClassifier/index.html @@ -0,0 +1,3706 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdaBoostClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdaBoostClassifier

+

Boosting for classification.

+

For each incoming observation, each model's learn_one method is called k times where k is sampled from a Poisson distribution of parameter lambda. The lambda parameter is updated when the weaks learners fit successively the same observation.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier to boost.

    +
  • +
  • +

    n_models

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

In the following example three tree classifiers are boosted together. The performance is +slightly better than when using a single tree.

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import metrics
+from river import tree
+
+dataset = datasets.Phishing()
+
+metric = metrics.LogLoss()
+
+model = ensemble.AdaBoostClassifier(
+    model=(
+        tree.HoeffdingTreeClassifier(
+            split_criterion='gini',
+            delta=1e-5,
+            grace_period=2000
+        )
+    ),
+    n_models=5,
+    seed=42
+)
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
LogLoss: 0.370805
+

+

print(model)
+
+
AdaBoostClassifier(HoeffdingTreeClassifier)
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/BOLEClassifier/index.html b/0.19.0/api/ensemble/BOLEClassifier/index.html new file mode 100644 index 0000000000..6bdba42188 --- /dev/null +++ b/0.19.0/api/ensemble/BOLEClassifier/index.html @@ -0,0 +1,3706 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BOLEClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BOLEClassifier

+

Boosting Online Learning Ensemble (BOLE).

+

A modified version of Oza Online Boosting Algorithm 1. For each incoming observation, each model's learn_one method is called k times where k is sampled from a Poisson distribution of parameter lambda. The first model to be trained will be the one with worst correct_weight / (correct_weight + wrong_weight). The worst model not yet trained will receive lambda values for training from the models that incorrectly classified an instance, and the best model's not yet trained will receive lambda values for training from the models that correctly classified an instance. For more details, see 2.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier to boost.

    +
  • +
  • +

    n_models

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
  • +

    error_bound

    +

    Default0.5

    +

    Error bound percentage for allowing models to vote.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import drift
+from river import metrics
+from river import tree
+
+dataset = datasets.Elec2().take(3000)
+
+model = ensemble.BOLEClassifier(
+    model=drift.DriftRetrainingClassifier(
+        model=tree.HoeffdingTreeClassifier(),
+        drift_detector=drift.binary.DDM()
+    ),
+    n_models=10,
+    seed=42
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 93.63%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+
+
    +
  1. +

    Oza, N.C., 2005, October. Online bagging and boosting. In 2005 IEEE international conference on systems, man and cybernetics (Vol. 3, pp. 2340-2345). Ieee. 

    +
  2. +
  3. +

    R. S. M. d. Barros, S. Garrido T. de Carvalho Santos and P. M. Gonçalves Júnior, "A Boosting-like Online Learning Ensemble," 2016 International Joint Conference on Neural Networks (IJCNN), 2016, pp. 1871-1878, doi: 10.1109/IJCNN.2016.7727427. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/BaggingClassifier/index.html b/0.19.0/api/ensemble/BaggingClassifier/index.html new file mode 100644 index 0000000000..4a16d29da5 --- /dev/null +++ b/0.19.0/api/ensemble/BaggingClassifier/index.html @@ -0,0 +1,3693 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BaggingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BaggingClassifier

+

Online bootstrap aggregation for classification.

+

For each incoming observation, each model's learn_one method is called k times where k is sampled from a Poisson distribution of parameter 1. k thus has a 36% chance of being equal to 0, a 36% chance of being equal to 1, an 18% chance of being equal to 2, a 6% chance of being equal to 3, a 1% chance of being equal to 4, etc. You can do scipy.stats.utils.random.poisson(1).pmf(k) to obtain more detailed values.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier to bag.

    +
  • +
  • +

    n_models

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

In the following example three logistic regressions are bagged together. The performance is +slightly better than when using a single logistic regression.

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+
+model = ensemble.BaggingClassifier(
+    model=(
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    n_models=3,
+    seed=42
+)
+
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.65%
+

+

print(model)
+
+
BaggingClassifier(StandardScaler | LogisticRegression)
+

+

Methods

+
+learn_one +
+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Averages the predictions of each classifier.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/BaggingRegressor/index.html b/0.19.0/api/ensemble/BaggingRegressor/index.html new file mode 100644 index 0000000000..b8e72597ba --- /dev/null +++ b/0.19.0/api/ensemble/BaggingRegressor/index.html @@ -0,0 +1,3675 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BaggingRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BaggingRegressor

+

Online bootstrap aggregation for regression.

+

For each incoming observation, each model's learn_one method is called k times where k is sampled from a Poisson distribution of parameter 1. k thus has a 36% chance of being equal to 0, a 36% chance of being equal to 1, an 18% chance of being equal to 2, a 6% chance of being equal to 3, a 1% chance of being equal to 4, etc. You can do scipy.stats.utils.random.poisson(1).pmf(k) for more detailed values.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Regressor

    +

    The regressor to bag.

    +
  • +
  • +

    n_models

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

In the following example three logistic regressions are bagged together. The performance is +slightly better than when using a single logistic regression.

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+
+model = preprocessing.StandardScaler()
+model |= ensemble.BaggingRegressor(
+    model=linear_model.LinearRegression(intercept_lr=0.1),
+    n_models=3,
+    seed=42
+)
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.677586
+

+

Methods

+
+learn_one +
+
+predict_one +

Averages the predictions of each regressor.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/EWARegressor/index.html b/0.19.0/api/ensemble/EWARegressor/index.html new file mode 100644 index 0000000000..ac163ac75c --- /dev/null +++ b/0.19.0/api/ensemble/EWARegressor/index.html @@ -0,0 +1,3718 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EWARegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EWARegressor

+

Exponentially Weighted Average regressor.

+

Parameters

+
    +
  • +

    models

    +

    Typelist[base.Regressor]

    +

    The regressors to hedge.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    The loss function that has to be minimized. Defaults to optim.losses.Squared.

    +
  • +
  • +

    learning_rate

    +

    Default0.5

    +

    The learning rate by which the model weights are multiplied at each iteration.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+from river import stream
+
+optimizers = [
+    optim.SGD(0.01),
+    optim.RMSProp(),
+    optim.AdaGrad()
+]
+
+for optimizer in optimizers:
+
+    dataset = datasets.TrumpApproval()
+    metric = metrics.MAE()
+    model = (
+        preprocessing.StandardScaler() |
+        linear_model.LinearRegression(
+            optimizer=optimizer,
+            intercept_lr=.1
+        )
+    )
+
+    print(optimizer, evaluate.progressive_val_score(dataset, model, metric))
+
+
SGD MAE: 0.558735
+RMSProp MAE: 0.522449
+AdaGrad MAE: 0.477289
+

+

dataset = datasets.TrumpApproval()
+metric = metrics.MAE()
+hedge = (
+    preprocessing.StandardScaler() |
+    ensemble.EWARegressor(
+        [
+            linear_model.LinearRegression(optimizer=o, intercept_lr=.1)
+            for o in optimizers
+        ],
+        learning_rate=0.005
+    )
+)
+
+evaluate.progressive_val_score(dataset, hedge, metric)
+
+
MAE: 0.496298
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+learn_predict_one +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/LeveragingBaggingClassifier/index.html b/0.19.0/api/ensemble/LeveragingBaggingClassifier/index.html new file mode 100644 index 0000000000..705072bdcd --- /dev/null +++ b/0.19.0/api/ensemble/LeveragingBaggingClassifier/index.html @@ -0,0 +1,3715 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LeveragingBaggingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LeveragingBaggingClassifier

+

Leveraging Bagging ensemble classifier.

+

Leveraging Bagging [^1] is an improvement over the Oza Bagging algorithm. The bagging performance is leveraged by increasing the re-sampling. It uses a poisson distribution to simulate the re-sampling process. To increase re-sampling it uses a higher w value of the Poisson distribution (agerage number of events), 6 by default, increasing the input space diversity, by attributing a different range of weights to the data samples.

+

To deal with concept drift, Leveraging Bagging uses the ADWIN algorithm to monitor the performance of each member of the enemble If concept drift is detected, the worst member of the ensemble (based on the error estimation by ADWIN) is replaced by a new (empty) classifier.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    The classifier to bag.

    +
  • +
  • +

    n_models

    +

    Typeint

    +

    Default10

    +

    The number of models in the ensemble.

    +
  • +
  • +

    w

    +

    Typefloat

    +

    Default6

    +

    Indicates the average number of events. This is the lambda parameter of the Poisson distribution used to compute the re-sampling weight.

    +
  • +
  • +

    adwin_delta

    +

    Typefloat

    +

    Default0.002

    +

    The delta parameter for the ADWIN change detector.

    +
  • +
  • +

    bagging_method

    +

    Typestr

    +

    Defaultbag

    +

    The bagging method to use. Can be one of the following:
    * 'bag' - Leveraging Bagging using ADWIN.
    * 'me' - Assigns \(weight=1\) if sample is misclassified, otherwise \(weight=error/(1-error)\).
    * 'half' - Use resampling without replacement for half of the instances.
    * 'wt' - Resample without taking out all instances.
    * 'subag' - Resampling without replacement.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    bagging_methods

    +

    Valid bagging_method options.

    +
  • +
  • +

    models

    +
  • +
+

Examples

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+
+model = ensemble.LeveragingBaggingClassifier(
+    model=(
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    n_models=3,
+    seed=42
+)
+
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 88.55%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Averages the predictions of each classifier.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/SRPClassifier/index.html b/0.19.0/api/ensemble/SRPClassifier/index.html new file mode 100644 index 0000000000..d32a54b7d8 --- /dev/null +++ b/0.19.0/api/ensemble/SRPClassifier/index.html @@ -0,0 +1,3763 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SRPClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SRPClassifier

+

Streaming Random Patches ensemble classifier.

+

The Streaming Random Patches (SRP) 1 is an ensemble method that simulates bagging or random subspaces. The default algorithm uses both bagging and random subspaces, namely Random Patches. The default base estimator is a Hoeffding Tree, but other base estimators can be used (differently from random forest variations).

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Estimator | None

    +

    DefaultNone

    +

    The base estimator.

    +
  • +
  • +

    n_models

    +

    Typeint

    +

    Default10

    +

    Number of members in the ensemble.

    +
  • +
  • +

    subspace_size

    +

    Typeint | float | str

    +

    Default0.6

    +

    Number of features per subset for each classifier where M is the total number of features.
    A negative value means M - subspace_size.
    Only applies when using random subspaces or random patches.
    * If int indicates the number of features to use. Valid range [2, M].
    * If float indicates the percentage of features to use, Valid range (0., 1.].
    * 'sqrt' - sqrt(M)+1
    * 'rmsqrt' - Residual from M-(sqrt(M)+1)

    +
  • +
  • +

    training_method

    +

    Typestr

    +

    Defaultpatches

    +

    The training method to use.
    * 'subspaces' - Random subspaces.
    * 'resampling' - Resampling.
    * 'patches' - Random patches.

    +
  • +
  • +

    lam

    +

    Typeint

    +

    Default6

    +

    Lambda value for resampling.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Drift detector.

    +
  • +
  • +

    warning_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Warning detector.

    +
  • +
  • +

    disable_detector

    +

    Typestr

    +

    Defaultoff

    +

    Option to disable drift detectors:
    * If 'off', detectors are enabled.
    * If 'drift', disables concept drift detection and the background learner.
    * If 'warning', disables the background learner and ensemble members are reset if drift is detected.

    +
  • +
  • +

    disable_weighted_vote

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disables weighted voting.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
  • +

    metric

    +

    TypeClassificationMetric | None

    +

    DefaultNone

    +

    The metric to track members performance within the ensemble. This implementation assumes that larger values are better when using weighted votes.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import ensemble
+from river import evaluate
+from river import metrics
+from river.datasets import synth
+from river import tree
+
+dataset = synth.ConceptDriftStream(
+    seed=42,
+    position=500,
+    width=50
+).take(1000)
+
+base_model = tree.HoeffdingTreeClassifier(
+    grace_period=50, delta=0.01,
+    nominal_attributes=['age', 'car', 'zipcode']
+)
+model = ensemble.SRPClassifier(
+    model=base_model, n_models=3, seed=42,
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 72.77%
+

+

Methods

+
+learn_one +
+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+reset +
+

Notes

+

This implementation uses n_models=10 as default given the impact on +processing time. The optimal number of models depends on the data and +resources available.

+
+
+
    +
  1. +

    Heitor Murilo Gomes, Jesse Read, Albert Bifet. + Streaming Random Patches for Evolving Data Stream Classification. + IEEE International Conference on Data Mining (ICDM), 2019. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/SRPRegressor/index.html b/0.19.0/api/ensemble/SRPRegressor/index.html new file mode 100644 index 0000000000..a6d5e5e83f --- /dev/null +++ b/0.19.0/api/ensemble/SRPRegressor/index.html @@ -0,0 +1,3770 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SRPRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SRPRegressor

+

Streaming Random Patches ensemble regressor.

+

The Streaming Random Patches 1 ensemble method for regression trains each base learner on a subset of features and instances from the original data, namely a random patch. This strategy to enforce diverse base models is similar to the one in the random forest, yet it is not restricted to using decision trees as base learner.

+

This method is an adaptation of 2 for regression.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    The base estimator.

    +
  • +
  • +

    n_models

    +

    Typeint

    +

    Default10

    +

    Number of members in the ensemble.

    +
  • +
  • +

    subspace_size

    +

    Typeint | float | str

    +

    Default0.6

    +

    Number of features per subset for each classifier where M is the total number of features.
    A negative value means M - subspace_size.
    Only applies when using random subspaces or random patches.
    * If int indicates the number of features to use. Valid range [2, M].
    * If float indicates the percentage of features to use, Valid range (0., 1.].
    * 'sqrt' - sqrt(M)+1
    * 'rmsqrt' - Residual from M-(sqrt(M)+1)

    +
  • +
  • +

    training_method

    +

    Typestr

    +

    Defaultpatches

    +

    The training method to use.
    * 'subspaces' - Random subspaces.
    * 'resampling' - Resampling.
    * 'patches' - Random patches.

    +
  • +
  • +

    lam

    +

    Typeint

    +

    Default6

    +

    Lambda value for bagging.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Drift detector.

    +
  • +
  • +

    warning_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Warning detector.

    +
  • +
  • +

    disable_detector

    +

    Typestr

    +

    Defaultoff

    +

    Option to disable drift detectors:
    * If 'off', detectors are enabled.
    * If 'drift', disables concept drift detection and the background learner.
    * If 'warning', disables the background learner and ensemble members are reset if drift is detected.

    +
  • +
  • +

    disable_weighted_vote

    +

    Typebool

    +

    DefaultTrue

    +

    If True, disables weighted voting.

    +
  • +
  • +

    drift_detection_criteria

    +

    Typestr

    +

    Defaulterror

    +

    The criteria used to track drifts.
    * 'error' - absolute error.
    * 'prediction' - predicted target values.

    +
  • +
  • +

    aggregation_method

    +

    Typestr

    +

    Defaultmean

    +

    The method to use to aggregate predictions in the ensemble.
    * 'mean'
    * 'median'

    +
  • +
  • +

    seed

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
  • +

    metric

    +

    TypeRegressionMetric | None

    +

    DefaultNone

    +

    The metric to track members performance within the ensemble.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import ensemble
+from river import evaluate
+from river import metrics
+from river.datasets import synth
+from river import tree
+
+dataset = synth.FriedmanDrift(
+    drift_type='gsg',
+    position=(350, 750),
+    transition_window=200,
+    seed=42
+).take(1000)
+
+base_model = tree.HoeffdingTreeRegressor(grace_period=50)
+model = ensemble.SRPRegressor(
+    model=base_model,
+    training_method="patches",
+    n_models=3,
+    seed=42
+)
+
+metric = metrics.R2()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
R2: 0.571117
+

+

Methods

+
+learn_one +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The prediction.

+
+

+
+reset +
+

Notes

+

This implementation uses n_models=10 as default given the impact on +processing time. The optimal number of models depends on the data and +resources available.

+
+
+
    +
  1. +

    Heitor Gomes, Jacob Montiel, Saulo Martiello Mastelini, + Bernhard Pfahringer, and Albert Bifet. + On Ensemble Techniques for Data Stream Regression. + IJCNN'20. International Joint Conference on Neural Networks. 2020. 

    +
  2. +
  3. +

    Heitor Murilo Gomes, Jesse Read, Albert Bifet. + Streaming Random Patches for Evolving Data Stream Classification. + IEEE International Conference on Data Mining (ICDM), 2019. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/StackingClassifier/index.html b/0.19.0/api/ensemble/StackingClassifier/index.html new file mode 100644 index 0000000000..546452dcff --- /dev/null +++ b/0.19.0/api/ensemble/StackingClassifier/index.html @@ -0,0 +1,3696 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + StackingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

StackingClassifier

+

Stacking for binary classification.

+

Parameters

+
    +
  • +

    models

    +

    Typelist[base.Classifier]

    +
  • +
  • +

    meta_classifier

    +

    Typebase.Classifier

    +
  • +
  • +

    include_features

    +

    DefaultTrue

    +

    Indicates whether or not the original features should be provided to the meta-model along with the predictions from each model.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import compose
+from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model as lm
+from river import metrics
+from river import preprocessing as pp
+
+dataset = datasets.Phishing()
+
+model = compose.Pipeline(
+    ('scale', pp.StandardScaler()),
+    ('stack', ensemble.StackingClassifier(
+        [
+            lm.LogisticRegression(),
+            lm.PAClassifier(mode=1, C=0.01),
+            lm.PAClassifier(mode=2, C=0.01),
+        ],
+        meta_classifier=lm.LogisticRegression()
+    ))
+)
+
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 88.14%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/ensemble/VotingClassifier/index.html b/0.19.0/api/ensemble/VotingClassifier/index.html new file mode 100644 index 0000000000..fcfb11c493 --- /dev/null +++ b/0.19.0/api/ensemble/VotingClassifier/index.html @@ -0,0 +1,3683 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VotingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

VotingClassifier

+

Voting classifier.

+

A classification is made by aggregating the predictions of each model in the ensemble. The probabilities for each class are summed up if use_probabilities is set to True. If not, the probabilities are ignored and each prediction is weighted the same. In this case, it's important that you use an odd number of classifiers. A random class will be picked if the number of classifiers is even.

+

Parameters

+
    +
  • +

    models

    +

    Typelist[base.Classifier]

    +

    The classifiers.

    +
  • +
  • +

    use_probabilities

    +

    DefaultTrue

    +

    Whether or to weight each prediction with its associated probability.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import ensemble
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import naive_bayes
+from river import preprocessing
+from river import tree
+
+dataset = datasets.Phishing()
+
+model = (
+    preprocessing.StandardScaler() |
+    ensemble.VotingClassifier([
+        linear_model.LogisticRegression(),
+        tree.HoeffdingTreeClassifier(),
+        naive_bayes.GaussianNB()
+    ])
+)
+
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 86.94%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/evaluate/BinaryClassificationTrack/index.html b/0.19.0/api/evaluate/BinaryClassificationTrack/index.html new file mode 100644 index 0000000000..a4a306d829 --- /dev/null +++ b/0.19.0/api/evaluate/BinaryClassificationTrack/index.html @@ -0,0 +1,3445 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryClassificationTrack - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryClassificationTrack

+

This track evaluates a model's performance on binary classification tasks. These do not include synthetic datasets.

+

Methods

+
+run +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/evaluate/MultiClassClassificationTrack/index.html b/0.19.0/api/evaluate/MultiClassClassificationTrack/index.html new file mode 100644 index 0000000000..436815fa80 --- /dev/null +++ b/0.19.0/api/evaluate/MultiClassClassificationTrack/index.html @@ -0,0 +1,3445 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiClassClassificationTrack - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiClassClassificationTrack

+

This track evaluates a model's performance on multi-class classification tasks. These do not include synthetic datasets.

+

Methods

+
+run +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/evaluate/RegressionTrack/index.html b/0.19.0/api/evaluate/RegressionTrack/index.html new file mode 100644 index 0000000000..000f1cefc2 --- /dev/null +++ b/0.19.0/api/evaluate/RegressionTrack/index.html @@ -0,0 +1,3445 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegressionTrack - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RegressionTrack

+

This track evaluates a model's performance on regression tasks. These do not include synthetic datasets.

+

Methods

+
+run +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/evaluate/Track/index.html b/0.19.0/api/evaluate/Track/index.html new file mode 100644 index 0000000000..420657f9f1 --- /dev/null +++ b/0.19.0/api/evaluate/Track/index.html @@ -0,0 +1,3485 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Track - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Track

+

A track evaluate a model's performance.

+

The following metrics are recorded:

+
    +
  • +

    Time, which should be interpreted with wisdom. Indeed time can depend on the architecture

    +

    and local resource situations. Comparison via FLOPS should be preferred. - The model's memory footprint.

    +
  • +
  • +

    The model's predictive performance on the track's dataset.

    +
  • +
+

Parameters

+
    +
  • +

    name

    +

    Typestr

    +

    The name of the track.

    +
  • +
  • +

    datasets

    +

    The datasets that compose the track.

    +
  • +
  • +

    metric

    +

    The metric(s) used to track performance.

    +
  • +
+

Methods

+
+run +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/evaluate/iter-progressive-val-score/index.html b/0.19.0/api/evaluate/iter-progressive-val-score/index.html new file mode 100644 index 0000000000..0b9c73cb2d --- /dev/null +++ b/0.19.0/api/evaluate/iter-progressive-val-score/index.html @@ -0,0 +1,3544 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_progressive_val_score - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_progressive_val_score

+

Evaluates the performance of a model on a streaming dataset and yields results.

+

This does exactly the same as evaluate.progressive_val_score. The only difference is that this function returns an iterator, yielding results at every step. This can be useful if you want to have control over what you do with the results. For instance, you might want to plot the results.

+

Parameters

+
    +
  • +

    dataset

    +

    Typebase.typing.Dataset

    +

    The stream of observations against which the model will be evaluated.

    +
  • +
  • +

    model

    +

    The model to evaluate.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.Metric

    +

    The metric used to evaluate the model's predictions.

    +
  • +
  • +

    moment

    +

    Typestr | typing.Callable | None

    +

    DefaultNone

    +

    The attribute used for measuring time. If a callable is passed, then it is expected to take as input a dict of features. If None, then the observations are implicitly timestamped in the order in which they arrive.

    +
  • +
  • +

    delay

    +

    Typestr | int | dt.timedelta | typing.Callable | None

    +

    DefaultNone

    +

    The amount to wait before revealing the target associated with each observation to the model. This value is expected to be able to sum with the moment value. For instance, if moment is a datetime.date, then delay is expected to be a datetime.timedelta. If a callable is passed, then it is expected to take as input a dict of features and the target. If a str is passed, then it will be used to access the relevant field from the features. If None is passed, then no delay will be used, which leads to doing standard online validation.

    +
  • +
  • +

    step

    +

    Default1

    +

    Iteration number at which to yield results. This only takes into account the predictions, and not the training steps.

    +
  • +
  • +

    measure_time

    +

    DefaultFalse

    +

    Whether or not to measure the elapsed time.

    +
  • +
  • +

    measure_memory

    +

    DefaultFalse

    +

    Whether or not to measure the memory usage of the model.

    +
  • +
+

Examples

+

Take the following model:

+
from river import linear_model
+from river import preprocessing
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression()
+)
+
+

We can evaluate it on the Phishing dataset as so:

+

from river import datasets
+from river import evaluate
+from river import metrics
+
+steps = evaluate.iter_progressive_val_score(
+    model=model,
+    dataset=datasets.Phishing(),
+    metric=metrics.ROCAUC(),
+    step=200
+)
+
+for step in steps:
+    print(step)
+
+
{'ROCAUC': ROCAUC: 90.20%, 'Step': 200}
+{'ROCAUC': ROCAUC: 92.25%, 'Step': 400}
+{'ROCAUC': ROCAUC: 93.23%, 'Step': 600}
+{'ROCAUC': ROCAUC: 94.05%, 'Step': 800}
+{'ROCAUC': ROCAUC: 94.79%, 'Step': 1000}
+{'ROCAUC': ROCAUC: 95.07%, 'Step': 1200}
+{'ROCAUC': ROCAUC: 95.07%, 'Step': 1250}
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/evaluate/progressive-val-score/index.html b/0.19.0/api/evaluate/progressive-val-score/index.html new file mode 100644 index 0000000000..15c777cfc5 --- /dev/null +++ b/0.19.0/api/evaluate/progressive-val-score/index.html @@ -0,0 +1,3597 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + progressive_val_score - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

progressive_val_score

+

Evaluates the performance of a model on a streaming dataset.

+

This method is the canonical way to evaluate a model's performance. When used correctly, it allows you to exactly assess how a model would have performed in a production scenario.

+

dataset is converted into a stream of questions and answers. At each step the model is either asked to predict an observation, or is either updated. The target is only revealed to the model after a certain amount of time, which is determined by the delay parameter. Note that under the hood this uses the stream.simulate_qa function to go through the data in arrival order.

+

By default, there is no delay, which means that the samples are processed one after the other. When there is no delay, this function essentially performs progressive validation. When there is a delay, then we refer to it as delayed progressive validation.

+

It is recommended to use this method when you want to determine a model's performance on a dataset. In particular, it is advised to use the delay parameter in order to get a reliable assessment. Indeed, in a production scenario, it is often the case that ground truths are made available after a certain amount of time. By using this method, you can reproduce this scenario and therefore truthfully assess what would have been the performance of a model on a given dataset.

+

Parameters

+
    +
  • +

    dataset

    +

    Typebase.typing.Dataset

    +

    The stream of observations against which the model will be evaluated.

    +
  • +
  • +

    model

    +

    The model to evaluate.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.Metric

    +

    The metric used to evaluate the model's predictions.

    +
  • +
  • +

    moment

    +

    Typestr | typing.Callable | None

    +

    DefaultNone

    +

    The attribute used for measuring time. If a callable is passed, then it is expected to take as input a dict of features. If None, then the observations are implicitly timestamped in the order in which they arrive.

    +
  • +
  • +

    delay

    +

    Typestr | int | dt.timedelta | typing.Callable | None

    +

    DefaultNone

    +

    The amount to wait before revealing the target associated with each observation to the model. This value is expected to be able to sum with the moment value. For instance, if moment is a datetime.date, then delay is expected to be a datetime.timedelta. If a callable is passed, then it is expected to take as input a dict of features and the target. If a str is passed, then it will be used to access the relevant field from the features. If None is passed, then no delay will be used, which leads to doing standard online validation.

    +
  • +
  • +

    print_every

    +

    Default0

    +

    Iteration number at which to print the current metric. This only takes into account the predictions, and not the training steps.

    +
  • +
  • +

    show_time

    +

    DefaultFalse

    +

    Whether or not to display the elapsed time.

    +
  • +
  • +

    show_memory

    +

    DefaultFalse

    +

    Whether or not to display the memory usage of the model.

    +
  • +
  • +

    print_kwargs

    +

    Extra keyword arguments are passed to the print function. For instance, this allows providing a file argument, which indicates where to output progress.

    +
  • +
+

Examples

+

Take the following model:

+
from river import linear_model
+from river import preprocessing
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression()
+)
+
+

We can evaluate it on the Phishing dataset as so:

+

from river import datasets
+from river import evaluate
+from river import metrics
+
+evaluate.progressive_val_score(
+    model=model,
+    dataset=datasets.Phishing(),
+    metric=metrics.ROCAUC(),
+    print_every=200
+)
+
+
[200] ROCAUC: 90.20%
+[400] ROCAUC: 92.25%
+[600] ROCAUC: 93.23%
+[800] ROCAUC: 94.05%
+[1,000] ROCAUC: 94.79%
+[1,200] ROCAUC: 95.07%
+[1,250] ROCAUC: 95.07%
+ROCAUC: 95.07%
+

+

We haven't specified a delay, therefore this is strictly equivalent to the following piece +of code:

+

model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression()
+)
+
+metric = metrics.ROCAUC()
+
+for x, y in datasets.Phishing():
+    y_pred = model.predict_proba_one(x)
+    metric = metric.update(y, y_pred)
+    model = model.learn_one(x, y)
+
+metric
+
+
ROCAUC: 95.07%
+

+

When print_every is specified, the current state is printed at regular intervals. Under +the hood, Python's print method is being used. You can pass extra keyword arguments to +modify its behavior. For instance, you may use the file argument if you want to log the +progress to a file of your choice.

+

with open('progress.log', 'w') as f:
+    metric = evaluate.progressive_val_score(
+        model=model,
+        dataset=datasets.Phishing(),
+        metric=metrics.ROCAUC(),
+        print_every=200,
+        file=f
+    )
+
+with open('progress.log') as f:
+    for line in f.read().splitlines():
+        print(line)
+
+
[200] ROCAUC: 94.00%
+[400] ROCAUC: 94.70%
+[600] ROCAUC: 95.17%
+[800] ROCAUC: 95.42%
+[1,000] ROCAUC: 95.82%
+[1,200] ROCAUC: 96.00%
+[1,250] ROCAUC: 96.04%
+

+

Note that the performance is slightly better than above because we haven't used a fresh +copy of the model. Instead, we've reused the existing model which has already done a full +pass on the data.

+
import os; os.remove('progress.log')
+
+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/FFMClassifier/index.html b/0.19.0/api/facto/FFMClassifier/index.html new file mode 100644 index 0000000000..104a2fb2f6 --- /dev/null +++ b/0.19.0/api/facto/FFMClassifier/index.html @@ -0,0 +1,3708 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FFMClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FFMClassifier

+

Field-aware Factorization Machine for binary classification.

+

The model equation is defined by:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{j=1}^{p} \sum_{j'=j+1}^{p} \langle \mathbf{v}_{j, f_{j'}}, \mathbf{v}_{j', f_j} \rangle x_{j} x_{j'}\]
+

Where \(\mathbf{v}_{j, f_{j'}}\) is the latent vector corresponding to \(j\) feature for \(f_{j'}\) field, and \(\mathbf{v}_{j', f_j}\) is the latent vector corresponding to \(j'\) feature for \(f_j\) field.

+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables. Field names are inferred from feature names by taking everything before the first underscore: feature_name.split('_')[0].

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.BinaryLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman', 'time': .12}, True),
+    ({'user': 'Alice', 'item': 'Terminator', 'time': .13}, True),
+    ({'user': 'Alice', 'item': 'Star Wars', 'time': .14}, True),
+    ({'user': 'Alice', 'item': 'Notting Hill', 'time': .15}, False),
+    ({'user': 'Alice', 'item': 'Harry Potter ', 'time': .16}, True),
+    ({'user': 'Bob', 'item': 'Superman', 'time': .13}, True),
+    ({'user': 'Bob', 'item': 'Terminator', 'time': .12}, True),
+    ({'user': 'Bob', 'item': 'Star Wars', 'time': .16}, True),
+    ({'user': 'Bob', 'item': 'Notting Hill', 'time': .10}, False)
+)
+
+model = facto.FFMClassifier(
+    n_factors=10,
+    intercept=.5,
+    seed=42,
+)
+
+for x, y in dataset:
+    model = model.learn_one(x, y)
+
+model.predict_one({'user': 'Bob', 'item': 'Harry Potter', 'time': .14})
+
+
True
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/FFMRegressor/index.html b/0.19.0/api/facto/FFMRegressor/index.html new file mode 100644 index 0000000000..8bb51cc05e --- /dev/null +++ b/0.19.0/api/facto/FFMRegressor/index.html @@ -0,0 +1,3709 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FFMRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FFMRegressor

+

Field-aware Factorization Machine for regression.

+

The model equation is defined by:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{j=1}^{p} \sum_{j'=j+1}^{p} \langle \mathbf{v}_{j, f_{j'}}, \mathbf{v}_{j', f_j} \rangle x_{j} x_{j'}\]
+

Where \(\mathbf{v}_{j, f_{j'}}\) is the latent vector corresponding to \(j\) feature for \(f_{j'}\) field, and \(\mathbf{v}_{j', f_j}\) is the latent vector corresponding to \(j'\) feature for \(f_j\) field.

+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables. Field names are inferred from feature names by taking everything before the first underscore: feature_name.split('_')[0].

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman', 'time': .12}, 8),
+    ({'user': 'Alice', 'item': 'Terminator', 'time': .13}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars', 'time': .14}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill', 'time': .15}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter ', 'time': .16}, 5),
+    ({'user': 'Bob', 'item': 'Superman', 'time': .13}, 8),
+    ({'user': 'Bob', 'item': 'Terminator', 'time': .12}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars', 'time': .16}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill', 'time': .10}, 2)
+)
+
+model = facto.FFMRegressor(
+    n_factors=10,
+    intercept=5,
+    seed=42,
+)
+
+for x, y in dataset:
+    model = model.learn_one(x, y)
+
+model.predict_one({'user': 'Bob', 'item': 'Harry Potter', 'time': .14})
+
+
5.319945
+

+

report = model.debug_one({'user': 'Bob', 'item': 'Harry Potter', 'time': .14})
+
+print(report)
+
+
Name                                       Value      Weight     Contribution
+                               Intercept    1.00000    5.23501        5.23501
+                                user_Bob    1.00000    0.11438        0.11438
+                                    time    0.14000    0.03186        0.00446
+    item_Harry Potter(time) - time(item)    0.14000    0.03153        0.00441
+             user_Bob(time) - time(user)    0.14000    0.02864        0.00401
+                       item_Harry Potter    1.00000    0.00000        0.00000
+user_Bob(item) - item_Harry Potter(user)    1.00000   -0.04232       -0.04232
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/FMClassifier/index.html b/0.19.0/api/facto/FMClassifier/index.html new file mode 100644 index 0000000000..5c89c084b9 --- /dev/null +++ b/0.19.0/api/facto/FMClassifier/index.html @@ -0,0 +1,3710 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FMClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FMClassifier

+

Factorization Machine for binary classification.

+

The model equation is defined as:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{j=1}^{p} \sum_{j'=j+1}^{p} \langle \mathbf{v}_j, \mathbf{v}_{j'} \rangle x_{j} x_{j'}\]
+

Where \(\mathbf{v}_j\) and \(\mathbf{v}_{j'}\) are \(j\) and \(j'\) latent vectors, respectively.

+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables.

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.BinaryLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, True),
+    ({'user': 'Alice', 'item': 'Terminator'}, True),
+    ({'user': 'Alice', 'item': 'Star Wars'}, True),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, False),
+    ({'user': 'Alice', 'item': 'Harry Potter '}, True),
+    ({'user': 'Bob', 'item': 'Superman'}, True),
+    ({'user': 'Bob', 'item': 'Terminator'}, True),
+    ({'user': 'Bob', 'item': 'Star Wars'}, True),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, False)
+)
+
+model = facto.FMClassifier(
+    n_factors=10,
+    seed=42,
+)
+
+for x, y in dataset:
+    _ = model.learn_one(x, y)
+
+model.predict_one({'Bob': 1, 'Harry Potter': 1})
+
+
True
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/FMRegressor/index.html b/0.19.0/api/facto/FMRegressor/index.html new file mode 100644 index 0000000000..1cf778906b --- /dev/null +++ b/0.19.0/api/facto/FMRegressor/index.html @@ -0,0 +1,3709 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FMRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FMRegressor

+

Factorization Machine for regression.

+

The model equation is defined as:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{j=1}^{p} \sum_{j'=j+1}^{p} \langle \mathbf{v}_j, \mathbf{v}_{j'} \rangle x_{j} x_{j'}\]
+

Where \(\mathbf{v}_j\) and \(\mathbf{v}_{j'}\) are \(j\) and \(j'\) latent vectors, respectively.

+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables.

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, 8),
+    ({'user': 'Alice', 'item': 'Terminator'}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars'}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter '}, 5),
+    ({'user': 'Bob', 'item': 'Superman'}, 8),
+    ({'user': 'Bob', 'item': 'Terminator'}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars'}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, 2)
+)
+
+model = facto.FMRegressor(
+    n_factors=10,
+    intercept=5,
+    seed=42,
+)
+
+for x, y in dataset:
+    _ = model.learn_one(x, y)
+
+model.predict_one({'Bob': 1, 'Harry Potter': 1})
+
+
5.236504
+

+

report = model.debug_one({'Bob': 1, 'Harry Potter': 1})
+
+print(report)
+
+
Name                 Value      Weight     Contribution
+         Intercept    1.00000    5.23426        5.23426
+Bob - Harry Potter    1.00000    0.00224        0.00224
+      Harry Potter    1.00000    0.00000        0.00000
+               Bob    1.00000    0.00000        0.00000
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/FwFMClassifier/index.html b/0.19.0/api/facto/FwFMClassifier/index.html new file mode 100644 index 0000000000..3ea7753978 --- /dev/null +++ b/0.19.0/api/facto/FwFMClassifier/index.html @@ -0,0 +1,3717 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FwFMClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FwFMClassifier

+

Field-weighted Factorization Machine for binary classification.

+

The model equation is defined as:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{j=1}^{p} \sum_{j'=j+1}^{p} r_{f_j, f_{j'}} \langle \mathbf{v}_j, \mathbf{v}_{j'} \rangle x_{j} x_{j'}\]
+

Where \(f_j\) and \(f_{j'}\) are \(j\) and \(j'\) fields, respectively, and \(\mathbf{v}_j\) and \(\mathbf{v}_{j'}\) are \(j\) and \(j'\) latent vectors, respectively.

+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables. Field names are inferred from feature names by taking everything before the first underscore: feature_name.split('_')[0].

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    int_weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the field pairs interaction weights.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.BinaryLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
  • +

    interaction_weights

    +

    The current interaction strengths of field pairs.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, True),
+    ({'user': 'Alice', 'item': 'Terminator'}, True),
+    ({'user': 'Alice', 'item': 'Star Wars'}, True),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, False),
+    ({'user': 'Alice', 'item': 'Harry Potter '}, True),
+    ({'user': 'Bob', 'item': 'Superman'}, True),
+    ({'user': 'Bob', 'item': 'Terminator'}, True),
+    ({'user': 'Bob', 'item': 'Star Wars'}, True),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, False)
+)
+
+model = facto.FwFMClassifier(
+    n_factors=10,
+    seed=42,
+)
+
+for x, y in dataset:
+    model = model.learn_one(x, y)
+
+model.predict_one({'Bob': 1, 'Harry Potter': 1})
+
+
True
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/FwFMRegressor/index.html b/0.19.0/api/facto/FwFMRegressor/index.html new file mode 100644 index 0000000000..46cc70aae5 --- /dev/null +++ b/0.19.0/api/facto/FwFMRegressor/index.html @@ -0,0 +1,3716 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FwFMRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FwFMRegressor

+

Field-weighted Factorization Machine for regression.

+

The model equation is defined as:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{j=1}^{p} \sum_{j'=j+1}^{p} r_{f_j, f_{j'}} \langle \mathbf{v}_j, \mathbf{v}_{j'} \rangle x_{j} x_{j'}\]
+

Where \(f_j\) and \(f_{j'}\) are \(j\) and \(j'\) fields, respectively, and \(\mathbf{v}_j\) and \(\mathbf{v}_{j'}\) are \(j\) and \(j'\) latent vectors, respectively.

+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables. Field names are inferred from feature names by taking everything before the first underscore: feature_name.split('_')[0].

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    int_weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the field pairs interaction weights.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
  • +

    interaction_weights

    +

    The current interaction strengths of field pairs.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, 8),
+    ({'user': 'Alice', 'item': 'Terminator'}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars'}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter '}, 5),
+    ({'user': 'Bob', 'item': 'Superman'}, 8),
+    ({'user': 'Bob', 'item': 'Terminator'}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars'}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, 2)
+)
+
+model = facto.FwFMRegressor(
+    n_factors=10,
+    intercept=5,
+    seed=42,
+)
+
+for x, y in dataset:
+    model = model.learn_one(x, y)
+
+model.predict_one({'Bob': 1, 'Harry Potter': 1})
+
+
5.236501
+

+

report = model.debug_one({'Bob': 1, 'Harry Potter': 1})
+
+print(report)
+
+
Name                                    Value      Weight     Contribution
+                            Intercept    1.00000    5.23426        5.23426
+Bob(Harry Potter) - Harry Potter(Bob)    1.00000    0.00224        0.00224
+                         Harry Potter    1.00000    0.00000        0.00000
+                                  Bob    1.00000    0.00000        0.00000
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/HOFMClassifier/index.html b/0.19.0/api/facto/HOFMClassifier/index.html new file mode 100644 index 0000000000..081b610162 --- /dev/null +++ b/0.19.0/api/facto/HOFMClassifier/index.html @@ -0,0 +1,3713 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HOFMClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HOFMClassifier

+

Higher-Order Factorization Machine for binary classification.

+

The model equation is defined as:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{l=2}^{d} \sum_{j_1=1}^{p} \cdots \sum_{j_l=j_{l-1}+1}^{p} \left(\prod_{j'=1}^{l} x_{j_{j'}} \right) \left(\sum_{f=1}^{k_l} \prod_{j'=1}^{l} v_{j_{j'}, f}^{(l)} \right)\]
+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables.

+

Parameters

+
    +
  • +

    degree

    +

    Default3

    +

    Polynomial degree or model order.

    +
  • +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note that the intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.BinaryLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman', 'time': .12}, True),
+    ({'user': 'Alice', 'item': 'Terminator', 'time': .13}, True),
+    ({'user': 'Alice', 'item': 'Star Wars', 'time': .14}, True),
+    ({'user': 'Alice', 'item': 'Notting Hill', 'time': .15}, False),
+    ({'user': 'Alice', 'item': 'Harry Potter ', 'time': .16}, True),
+    ({'user': 'Bob', 'item': 'Superman', 'time': .13}, True),
+    ({'user': 'Bob', 'item': 'Terminator', 'time': .12}, True),
+    ({'user': 'Bob', 'item': 'Star Wars', 'time': .16}, True),
+    ({'user': 'Bob', 'item': 'Notting Hill', 'time': .10}, False)
+)
+
+model = facto.HOFMClassifier(
+    degree=3,
+    n_factors=10,
+    intercept=.5,
+    seed=42,
+)
+
+for x, y in dataset:
+    _ = model.learn_one(x, y)
+
+model.predict_one({'user': 'Bob', 'item': 'Harry Potter', 'time': .14})
+
+
True
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/facto/HOFMRegressor/index.html b/0.19.0/api/facto/HOFMRegressor/index.html new file mode 100644 index 0000000000..d04c17655e --- /dev/null +++ b/0.19.0/api/facto/HOFMRegressor/index.html @@ -0,0 +1,3715 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HOFMRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HOFMRegressor

+

Higher-Order Factorization Machine for regression.

+

The model equation is defined as:

+
\[\hat{y}(x) = w_{0} + \sum_{j=1}^{p} w_{j} x_{j} + \sum_{l=2}^{d} \sum_{j_1=1}^{p} \cdots \sum_{j_l=j_{l-1}+1}^{p} \left(\prod_{j'=1}^{l} x_{j_{j'}} \right) \left(\sum_{f=1}^{k_l} \prod_{j'=1}^{l} v_{j_{j'}, f}^{(l)} \right)\]
+

For more efficiency, this model automatically one-hot encodes strings features considering them as categorical variables.

+

Parameters

+
    +
  • +

    degree

    +

    Default3

    +

    Polynomial degree or model order.

    +
  • +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    weight_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the feature weights. Note thatthe intercept is handled separately.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    sample_normalization

    +

    DefaultFalse

    +

    Whether to divide each element of x by x's L2-norm.

    +
  • +
  • +

    l1_weight

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0.

    +
  • +
  • +

    l2_weight

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    l1_latent

    +

    Default0.0

    +

    Amount of L1 regularization used to push latent weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    intercept

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. An instance of optim.schedulers.Constant is used if a float is passed. No intercept will be used if this is set to 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme. Defaults to optim.initializers.Zeros()`.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme. Defaults to optim.initializers.Normal(mu=.0, sigma=.1, random_state=self.random_state)`.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Randomization seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights assigned to the features.

    +
  • +
  • +

    latents

    +

    The current latent weights assigned to the features.

    +
  • +
+

Examples

+

from river import facto
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman', 'time': .12}, 8),
+    ({'user': 'Alice', 'item': 'Terminator', 'time': .13}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars', 'time': .14}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill', 'time': .15}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter ', 'time': .16}, 5),
+    ({'user': 'Bob', 'item': 'Superman', 'time': .13}, 8),
+    ({'user': 'Bob', 'item': 'Terminator', 'time': .12}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars', 'time': .16}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill', 'time': .10}, 2)
+)
+
+model = facto.HOFMRegressor(
+    degree=3,
+    n_factors=10,
+    intercept=5,
+    seed=42,
+)
+
+for x, y in dataset:
+    _ = model.learn_one(x, y)
+
+model.predict_one({'user': 'Bob', 'item': 'Harry Potter', 'time': .14})
+
+
5.311745
+

+

report = model.debug_one({'user': 'Bob', 'item': 'Harry Potter', 'time': .14})
+
+print(report)
+
+
Name                                  Value      Weight     Contribution
+                          Intercept    1.00000    5.23495        5.23495
+                           user_Bob    1.00000    0.11436        0.11436
+                               time    0.14000    0.03185        0.00446
+                    user_Bob - time    0.14000    0.00884        0.00124
+user_Bob - item_Harry Potter - time    0.14000    0.00117        0.00016
+                  item_Harry Potter    1.00000    0.00000        0.00000
+           item_Harry Potter - time    0.14000   -0.00695       -0.00097
+       user_Bob - item_Harry Potter    1.00000   -0.04246       -0.04246
+

+

Methods

+
+debug_one +

Debugs the output of the FM regressor.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-extraction/Agg/index.html b/0.19.0/api/feature-extraction/Agg/index.html new file mode 100644 index 0000000000..8d13eb5b8f --- /dev/null +++ b/0.19.0/api/feature-extraction/Agg/index.html @@ -0,0 +1,3668 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Agg - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Agg

+

Computes a streaming aggregate.

+

This transformer allows to compute an aggregate statistic, very much like the groupby method from pandas, but on a streaming dataset. This makes use of the streaming statistics from the stats module.

+

When learn_one is called, the running statistic how of group by is updated with the value of on. Meanwhile, the output of transform_one is a single-element dictionary, where the key is the name of the aggregate and the value is the current value of the statistic for the relevant group. The key is automatically inferred from the parameters.

+

Note that you can use a compose.TransformerUnion to extract many aggregate statistics in a concise manner.

+

Parameters

+
    +
  • +

    on

    +

    Typestr

    +

    The feature on which to compute the aggregate statistic.

    +
  • +
  • +

    by

    +

    Typestr | list[str] | None

    +

    The feature by which to group the data. All the data is included in the aggregate if this is None.

    +
  • +
  • +

    how

    +

    Typestats.base.Univariate | utils.Rolling | utils.TimeRolling

    +

    The statistic to compute.

    +
  • +
+

Attributes

+
    +
  • +

    state

    +

    Return the current values for each group as a series.

    +
  • +
+

Examples

+

Consider the following dataset:

+
X = [
+    {'country': 'France', 'place': 'Taco Bell', 'revenue': 42},
+    {'country': 'Sweden', 'place': 'Burger King', 'revenue': 16},
+    {'country': 'France', 'place': 'Burger King', 'revenue': 24},
+    {'country': 'Sweden', 'place': 'Taco Bell', 'revenue': 58},
+    {'country': 'Sweden', 'place': 'Burger King', 'revenue': 20},
+    {'country': 'France', 'place': 'Taco Bell', 'revenue': 50},
+    {'country': 'France', 'place': 'Burger King', 'revenue': 10},
+    {'country': 'Sweden', 'place': 'Taco Bell', 'revenue': 80}
+]
+
+

As an example, we can calculate the average (how) revenue (on) for each place (by):

+

from river import feature_extraction as fx
+from river import stats
+
+agg = fx.Agg(
+    on='revenue',
+    by='place',
+    how=stats.Mean()
+)
+
+for x in X:
+    agg = agg.learn_one(x)
+    print(agg.transform_one(x))
+
+
{'revenue_mean_by_place': 42.0}
+{'revenue_mean_by_place': 16.0}
+{'revenue_mean_by_place': 20.0}
+{'revenue_mean_by_place': 50.0}
+{'revenue_mean_by_place': 20.0}
+{'revenue_mean_by_place': 50.0}
+{'revenue_mean_by_place': 17.5}
+{'revenue_mean_by_place': 57.5}
+

+

You can compute an aggregate over multiple keys by passing a tuple to the by argument. +For instance, we can compute the maximum (how) revenue (on) per place as well as per +day (by):

+

agg = fx.Agg(
+    on='revenue',
+    by=['place', 'country'],
+    how=stats.Max()
+)
+
+for x in X:
+    agg = agg.learn_one(x)
+    print(agg.transform_one(x))
+
+
{'revenue_max_by_place_and_country': 42}
+{'revenue_max_by_place_and_country': 16}
+{'revenue_max_by_place_and_country': 24}
+{'revenue_max_by_place_and_country': 58}
+{'revenue_max_by_place_and_country': 20}
+{'revenue_max_by_place_and_country': 50}
+{'revenue_max_by_place_and_country': 24}
+{'revenue_max_by_place_and_country': 80}
+

+

You can use a compose.TransformerUnion in order to calculate multiple aggregates in one +go. The latter can be constructed by using the + operator:

+

agg = (
+    fx.Agg(on='revenue', by='place', how=stats.Mean()) +
+    fx.Agg(on='revenue', by=['place', 'country'], how=stats.Max())
+)
+
+import pprint
+for x in X:
+    agg = agg.learn_one(x)
+    pprint.pprint(agg.transform_one(x))
+
+
{'revenue_max_by_place_and_country': 42, 'revenue_mean_by_place': 42.0}
+{'revenue_max_by_place_and_country': 16, 'revenue_mean_by_place': 16.0}
+{'revenue_max_by_place_and_country': 24, 'revenue_mean_by_place': 20.0}
+{'revenue_max_by_place_and_country': 58, 'revenue_mean_by_place': 50.0}
+{'revenue_max_by_place_and_country': 20, 'revenue_mean_by_place': 20.0}
+{'revenue_max_by_place_and_country': 50, 'revenue_mean_by_place': 50.0}
+{'revenue_max_by_place_and_country': 24, 'revenue_mean_by_place': 17.5}
+{'revenue_max_by_place_and_country': 80, 'revenue_mean_by_place': 57.5}
+

+

The state property returns a pandas.Series, which can be useful for visualizing the +current state.

+

agg[0].state
+
+
Taco Bell      57.5
+Burger King    17.5
+Name: revenue_mean_by_place, dtype: float64
+

+

agg[1].state
+
+
place        country
+Taco Bell    France     50
+Burger King  Sweden     20
+             France     24
+Taco Bell    Sweden     80
+Name: revenue_max_by_place_and_country, dtype: int64
+

+

This transformer can also be used in conjunction with utils.TimeRolling. The latter requires +a t argument, which is a timestamp that indicates when the current row was observed. For +instance, we can calculate the average (how) revenue (on) for each place (by) over the last +7 days (t):

+

import datetime as dt
+import random
+import string
+from river import utils
+
+agg = fx.Agg(
+    on="value",
+    by="group",
+    how=utils.TimeRolling(stats.Mean(), dt.timedelta(days=7))
+)
+
+for day in range(366):
+    g = random.choice(string.ascii_lowercase)
+    x = {
+        "group": g,
+        "value": string.ascii_lowercase.index(g) + random.random(),
+    }
+    t = dt.datetime(2023, 1, 1) + dt.timedelta(days=day)
+    agg = agg.learn_one(x, t=t)
+
+len(agg.state)
+
+
26
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
  • t — defaults to None
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-extraction/BagOfWords/index.html b/0.19.0/api/feature-extraction/BagOfWords/index.html new file mode 100644 index 0000000000..a8235bcf8b --- /dev/null +++ b/0.19.0/api/feature-extraction/BagOfWords/index.html @@ -0,0 +1,3627 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BagOfWords - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BagOfWords

+

Counts tokens in sentences.

+

This transformer can be used to counts tokens in a given piece of text. It takes care of normalizing the text before tokenizing it. In mini-batch settings, this transformers allows to convert a series of pandas of text into sparse dataframe.

+

Note that the parameters are identical to those of feature_extraction.TFIDF.

+

Parameters

+
    +
  • +

    on

    +

    Typestr | None

    +

    DefaultNone

    +

    The name of the feature that contains the text to vectorize. If None, then each learn_one and transform_one will assume that each x that is provided is a str, andnot a dict.

    +
  • +
  • +

    strip_accents

    +

    DefaultTrue

    +

    Whether or not to strip accent characters.

    +
  • +
  • +

    lowercase

    +

    DefaultTrue

    +

    Whether or not to convert all characters to lowercase.

    +
  • +
  • +

    preprocessor

    +

    Typetyping.Callable | None

    +

    DefaultNone

    +

    An optional preprocessing function which overrides the strip_accents and lowercase steps, while preserving the tokenizing and n-grams generation steps.

    +
  • +
  • +

    stop_words

    +

    Typeset[str] | None

    +

    DefaultNone

    +

    An optional set of tokens to remove.

    +
  • +
  • +

    tokenizer_pattern

    +

    Default(?u)\b\w[\w\-]+\b

    +

    The tokenization pattern which is used when no tokenizer function is passed. A single capture group may optionally be specified.

    +
  • +
  • +

    tokenizer

    +

    Typetyping.Callable | None

    +

    DefaultNone

    +

    A function used to convert preprocessed text into a dict of tokens. By default, a regex formula that works well in most cases is used.

    +
  • +
  • +

    ngram_range

    +

    Default(1, 1)

    +

    The lower and upper boundary of the range n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used. For example an ngram_range of (1, 1) means only unigrams, (1, 2) means unigrams and bigrams, and (2, 2) means only bigrams.

    +
  • +
+

Examples

+

By default, BagOfWords will take as input a sentence, preprocess it, tokenize the +preprocessed text, and then return a collections.Counter containing the number of +occurrences of each token.

+

from river import feature_extraction as fx
+
+corpus = [
+    'This is the first document.',
+    'This document is the second document.',
+    'And this is the third one.',
+    'Is this the first document?',
+]
+
+bow = fx.BagOfWords()
+
+for sentence in corpus:
+    print(bow.transform_one(sentence))
+
+
{'this': 1, 'is': 1, 'the': 1, 'first': 1, 'document': 1}
+{'this': 1, 'document': 2, 'is': 1, 'the': 1, 'second': 1}
+{'and': 1, 'this': 1, 'is': 1, 'the': 1, 'third': 1, 'one': 1}
+{'is': 1, 'this': 1, 'the': 1, 'first': 1, 'document': 1}
+

+

Note that learn_one does not have to be called because BagOfWords is stateless. You can +call it but it won't do anything.

+

In the above example, a string is passed to transform_one. You can also indicate which +field to access if the string is stored in a dictionary:

+

bow = fx.BagOfWords(on='sentence')
+
+for sentence in corpus:
+    x = {'sentence': sentence}
+    print(bow.transform_one(x))
+
+
{'this': 1, 'is': 1, 'the': 1, 'first': 1, 'document': 1}
+{'this': 1, 'document': 2, 'is': 1, 'the': 1, 'second': 1}
+{'and': 1, 'this': 1, 'is': 1, 'the': 1, 'third': 1, 'one': 1}
+{'is': 1, 'this': 1, 'the': 1, 'first': 1, 'document': 1}
+

+

The ngram_range parameter can be used to extract n-grams (including unigrams):

+

ngrammer = fx.BagOfWords(ngram_range=(1, 2))
+
+ngrams = ngrammer.transform_one('I love the smell of napalm in the morning')
+for ngram, count in ngrams.items():
+    print(ngram, count)
+
+
love 1
+the 2
+smell 1
+of 1
+napalm 1
+in 1
+morning 1
+('love', 'the') 1
+('the', 'smell') 1
+('smell', 'of') 1
+('of', 'napalm') 1
+('napalm', 'in') 1
+('in', 'the') 1
+('the', 'morning') 1
+

+

BagOfWord allows to build a term-frequency pandas sparse dataframe with the transform_many method.

+

import pandas as pd
+X = pd.Series(['Hello world', 'Hello River'], index = ['river', 'rocks'])
+bow = fx.BagOfWords()
+bow.transform_many(X=X)
+
+
       hello  world  river
+river      1      1      0
+rocks      1      0      1
+

+

Methods

+
+learn_many +
+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+process_text +
+
+transform_many +

Transform pandas series of string into term-frequency pandas sparse dataframe.

+

Parameters

+
    +
  • X'pd.Series'
  • +
+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-extraction/PolynomialExtender/index.html b/0.19.0/api/feature-extraction/PolynomialExtender/index.html new file mode 100644 index 0000000000..2c8441c7af --- /dev/null +++ b/0.19.0/api/feature-extraction/PolynomialExtender/index.html @@ -0,0 +1,3571 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PolynomialExtender - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PolynomialExtender

+

Polynomial feature extender.

+

Generate features consisting of all polynomial combinations of the features with degree less than or equal to the specified degree.

+

Be aware that the number of outputted features scales polynomially in the number of input features and exponentially in the degree. High degrees can cause overfitting.

+

Parameters

+
    +
  • +

    degree

    +

    Default2

    +

    The maximum degree of the polynomial features.

    +
  • +
  • +

    interaction_only

    +

    DefaultFalse

    +

    If True then only combinations that include an element at most once will be computed.

    +
  • +
  • +

    include_bias

    +

    DefaultFalse

    +

    Whether or not to include a dummy feature which is always equal to 1.

    +
  • +
  • +

    bias_name

    +

    Defaultbias

    +

    Name to give to the bias feature.

    +
  • +
+

Examples

+

from river import feature_extraction as fx
+
+X = [
+    {'x': 0, 'y': 1},
+    {'x': 2, 'y': 3},
+    {'x': 4, 'y': 5}
+]
+
+poly = fx.PolynomialExtender(degree=2, include_bias=True)
+for x in X:
+    print(poly.transform_one(x))
+
+
{'x': 0, 'y': 1, 'x*x': 0, 'x*y': 0, 'y*y': 1, 'bias': 1}
+{'x': 2, 'y': 3, 'x*x': 4, 'x*y': 6, 'y*y': 9, 'bias': 1}
+{'x': 4, 'y': 5, 'x*x': 16, 'x*y': 20, 'y*y': 25, 'bias': 1}
+

+

X = [
+    {'x': 0, 'y': 1, 'z': 2},
+    {'x': 2, 'y': 3, 'z': 2},
+    {'x': 4, 'y': 5, 'z': 2}
+]
+
+poly = fx.PolynomialExtender(degree=3, interaction_only=True)
+for x in X:
+    print(poly.transform_one(x))
+
+
{'x': 0, 'y': 1, 'z': 2, 'x*y': 0, 'x*z': 0, 'y*z': 2, 'x*y*z': 0}
+{'x': 2, 'y': 3, 'z': 2, 'x*y': 6, 'x*z': 4, 'y*z': 6, 'x*y*z': 12}
+{'x': 4, 'y': 5, 'z': 2, 'x*y': 20, 'x*z': 8, 'y*z': 10, 'x*y*z': 40}
+

+

Polynomial features are typically used for a linear model to capture interactions between +features. This may done by setting up a pipeline, as so:

+

from river import datasets
+from river import evaluate
+from river import linear_model as lm
+from river import metrics
+from river import preprocessing as pp
+
+dataset = datasets.Phishing()
+
+model = (
+    fx.PolynomialExtender() |
+    pp.StandardScaler() |
+    lm.LogisticRegression()
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 88.88%
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-extraction/RBFSampler/index.html b/0.19.0/api/feature-extraction/RBFSampler/index.html new file mode 100644 index 0000000000..23336ef047 --- /dev/null +++ b/0.19.0/api/feature-extraction/RBFSampler/index.html @@ -0,0 +1,3558 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RBFSampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RBFSampler

+

Extracts random features which approximate an RBF kernel.

+

This is a powerful way to give non-linear capacity to linear classifiers. This method is also called "random Fourier features" in the literature.

+

Parameters

+
    +
  • +

    gamma

    +

    Default1.0

    +

    RBF kernel parameter in (-gamma * x^2).

    +
  • +
  • +

    n_components

    +

    Default100

    +

    Number of samples per original feature. Equals the dimensionality of the computed feature space.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number seed.

    +
  • +
+

Examples

+

from river import feature_extraction as fx
+from river import linear_model as lm
+from river import optim
+from river import stream
+
+X = [[0, 0], [1, 1], [1, 0], [0, 1]]
+Y = [0, 0, 1, 1]
+
+model = lm.LogisticRegression(optimizer=optim.SGD(.1))
+
+for x, y in stream.iter_array(X, Y):
+    model = model.learn_one(x, y)
+    y_pred = model.predict_one(x)
+    print(y, int(y_pred))
+
+
0 0
+0 0
+1 0
+1 1
+

+

model = (
+    fx.RBFSampler(seed=3) |
+    lm.LogisticRegression(optimizer=optim.SGD(.1))
+)
+
+for x, y in stream.iter_array(X, Y):
+    model = model.learn_one(x, y)
+    y_pred = model.predict_one(x)
+    print(y, int(y_pred))
+
+
0 0
+0 0
+1 1
+1 1
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • y — defaults to None
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-extraction/TFIDF/index.html b/0.19.0/api/feature-extraction/TFIDF/index.html new file mode 100644 index 0000000000..076994b42c --- /dev/null +++ b/0.19.0/api/feature-extraction/TFIDF/index.html @@ -0,0 +1,3611 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TFIDF - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TFIDF

+

Computes TF-IDF values from sentences.

+

The TF-IDF formula is the same one as scikit-learn. The only difference is the fact that the document frequencies are determined online, whereas in a batch setting they can be determined by performing an initial pass through the data.

+

Note that the parameters are identical to those of feature_extraction.BagOfWords.

+

Parameters

+
    +
  • +

    normalize

    +

    DefaultTrue

    +

    Whether or not the TF-IDF values by their L2 norm.

    +
  • +
  • +

    on

    +

    Typestr | None

    +

    DefaultNone

    +

    The name of the feature that contains the text to vectorize. If None, then the input is treated as a document instead of a set of features.

    +
  • +
  • +

    strip_accents

    +

    DefaultTrue

    +

    Whether or not to strip accent characters.

    +
  • +
  • +

    lowercase

    +

    DefaultTrue

    +

    Whether or not to convert all characters to lowercase.

    +
  • +
  • +

    preprocessor

    +

    Typetyping.Callable | None

    +

    DefaultNone

    +

    An optional preprocessing function which overrides the strip_accents and lowercase steps, while preserving the tokenizing and n-grams generation steps.

    +
  • +
  • +

    tokenizer

    +

    Typetyping.Callable | None

    +

    DefaultNone

    +

    A function used to convert preprocessed text into a dict of tokens. By default, a regex formula that works well in most cases is used.

    +
  • +
  • +

    ngram_range

    +

    Default(1, 1)

    +

    The lower and upper boundary of the range n-grams to be extracted. All values of n such that min_n <= n <= max_n will be used. For example an ngram_range of (1, 1) means only unigrams, (1, 2) means unigrams and bigrams, and (2, 2) means only bigrams. Only works if tokenizer is not set to False.

    +
  • +
+

Attributes

+
    +
  • +

    dfs (collections.defaultdict))

    +

    Document counts.

    +
  • +
  • +

    n (int)

    +

    Number of scanned documents.

    +
  • +
+

Examples

+

from river import feature_extraction
+
+tfidf = feature_extraction.TFIDF()
+
+corpus = [
+    'This is the first document.',
+    'This document is the second document.',
+    'And this is the third one.',
+    'Is this the first document?',
+]
+
+for sentence in corpus:
+    tfidf = tfidf.learn_one(sentence)
+    print(tfidf.transform_one(sentence))
+
+
{'this': 0.447, 'is': 0.447, 'the': 0.447, 'first': 0.447, 'document': 0.447}
+{'this': 0.333, 'document': 0.667, 'is': 0.333, 'the': 0.333, 'second': 0.469}
+{'and': 0.497, 'this': 0.293, 'is': 0.293, 'the': 0.293, 'third': 0.497, 'one': 0.497}
+{'is': 0.384, 'this': 0.384, 'the': 0.384, 'first': 0.580, 'document': 0.469}
+

+

In the above example, a string is passed to transform_one. You can also indicate which +field to access if the string is stored in a dictionary:

+

tfidf = feature_extraction.TFIDF(on='sentence')
+
+for sentence in corpus:
+    x = {'sentence': sentence}
+    tfidf = tfidf.learn_one(x)
+    print(tfidf.transform_one(x))
+
+
{'this': 0.447, 'is': 0.447, 'the': 0.447, 'first': 0.447, 'document': 0.447}
+{'this': 0.333, 'document': 0.667, 'is': 0.333, 'the': 0.333, 'second': 0.469}
+{'and': 0.497, 'this': 0.293, 'is': 0.293, 'the': 0.293, 'third': 0.497, 'one': 0.497}
+{'is': 0.384, 'this': 0.384, 'the': 0.384, 'first': 0.580, 'document': 0.469}
+

+

Methods

+
+learn_many +
+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+process_text +
+
+transform_many +

Transform pandas series of string into term-frequency pandas sparse dataframe.

+

Parameters

+
    +
  • X'pd.Series'
  • +
+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-extraction/TargetAgg/index.html b/0.19.0/api/feature-extraction/TargetAgg/index.html new file mode 100644 index 0000000000..d3d114c88d --- /dev/null +++ b/0.19.0/api/feature-extraction/TargetAgg/index.html @@ -0,0 +1,3632 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TargetAgg - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TargetAgg

+

Computes a streaming aggregate of the target values.

+

This transformer is identical to feature_extraction.Agg, the only difference is that it operates on the target rather than on a feature. At each step, the running statistic how of target values in group by is updated with the target. It is therefore a supervised transformer.

+

Parameters

+
    +
  • +

    by

    +

    Typestr | list[str] | None

    +

    The feature by which to group the target values. All the data is included in the aggregate if this is None.

    +
  • +
  • +

    how

    +

    Typestats.base.Univariate | utils.Rolling | utils.TimeRolling

    +

    The statistic to compute.

    +
  • +
  • +

    target_name

    +

    Defaulty

    +

    The target name which is used in the result.

    +
  • +
+

Attributes

+
    +
  • +

    state

    +

    Return the current values for each group as a series.

    +
  • +
  • +

    target_name

    +
  • +
+

Examples

+

Consider the following dataset, where the second value of each value is the target:

+
dataset = [
+    ({'country': 'France', 'place': 'Taco Bell'}, 42),
+    ({'country': 'Sweden', 'place': 'Burger King'}, 16),
+    ({'country': 'France', 'place': 'Burger King'}, 24),
+    ({'country': 'Sweden', 'place': 'Taco Bell'}, 58),
+    ({'country': 'Sweden', 'place': 'Burger King'}, 20),
+    ({'country': 'France', 'place': 'Taco Bell'}, 50),
+    ({'country': 'France', 'place': 'Burger King'}, 10),
+    ({'country': 'Sweden', 'place': 'Taco Bell'}, 80)
+]
+
+

As an example, let's perform a target encoding of the place feature. Instead of simply +updating a running average, we use a stats.BayesianMean which allows us to incorporate +some prior knowledge. This makes subsequent models less prone to overfitting. Indeed, it +dampens the fact that too few samples might have been seen within a group.

+

from river import feature_extraction
+from river import stats
+
+agg = feature_extraction.TargetAgg(
+    by='place',
+    how=stats.BayesianMean(
+        prior=3,
+        prior_weight=1
+    )
+)
+
+for x, y in dataset:
+    print(agg.transform_one(x))
+    agg = agg.learn_one(x, y)
+
+
{'y_bayes_mean_by_place': 3.0}
+{'y_bayes_mean_by_place': 3.0}
+{'y_bayes_mean_by_place': 9.5}
+{'y_bayes_mean_by_place': 22.5}
+{'y_bayes_mean_by_place': 14.333}
+{'y_bayes_mean_by_place': 34.333}
+{'y_bayes_mean_by_place': 15.75}
+{'y_bayes_mean_by_place': 38.25}
+

+

Just like with feature_extraction.Agg, we can specify multiple features on which to +group the data:

+

agg = feature_extraction.TargetAgg(
+    by=['place', 'country'],
+    how=stats.BayesianMean(
+        prior=3,
+        prior_weight=1
+    )
+)
+
+for x, y in dataset:
+    print(agg.transform_one(x))
+    agg = agg.learn_one(x, y)
+
+
{'y_bayes_mean_by_place_and_country': 3.0}
+{'y_bayes_mean_by_place_and_country': 3.0}
+{'y_bayes_mean_by_place_and_country': 3.0}
+{'y_bayes_mean_by_place_and_country': 3.0}
+{'y_bayes_mean_by_place_and_country': 9.5}
+{'y_bayes_mean_by_place_and_country': 22.5}
+{'y_bayes_mean_by_place_and_country': 13.5}
+{'y_bayes_mean_by_place_and_country': 30.5}
+

+

agg.state
+
+
place        country
+Taco Bell    France     31.666667
+Burger King  Sweden     13.000000
+             France     12.333333
+Taco Bell    Sweden     47.000000
+Name: y_bayes_mean_by_place_and_country, dtype: float64
+

+

This transformer can also be used in conjunction with utils.TimeRolling. The latter requires +a t argument, which is a timestamp that indicates when the current row was observed. For +instance, we can calculate the average (how) revenue (on) for each place (by) over the last +7 days (t):

+
import datetime as dt
+import random
+import string
+from river import utils
+
+agg = feature_extraction.TargetAgg(
+    by="group",
+    how=utils.TimeRolling(stats.Mean(), dt.timedelta(days=7))
+)
+
+for day in range(366):
+    g = random.choice(string.ascii_lowercase)
+    x = {"group": g}
+    y = string.ascii_lowercase.index(g) + random.random()
+    t = dt.datetime(2023, 1, 1) + dt.timedelta(days=day)
+    agg = agg.learn_one(x, y, t=t)
+
+

Methods

+
+learn_one +

Update with a set of features x and a target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
  • t — defaults to None
  • +
+

Returns

+

SupervisedTransformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+1. Streaming groupbys in pandas for big datasets

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-selection/PoissonInclusion/index.html b/0.19.0/api/feature-selection/PoissonInclusion/index.html new file mode 100644 index 0000000000..9836d6edad --- /dev/null +++ b/0.19.0/api/feature-selection/PoissonInclusion/index.html @@ -0,0 +1,3479 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PoissonInclusion - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PoissonInclusion

+

Randomly selects features with an inclusion trial.

+

When a new feature is encountered, it is selected with probability p. The number of times a feature needs to beseen before it is added to the model follows a geometric distribution with expected value 1 / p. This feature selection method is meant to be used when you have a very large amount of sparse features.

+

Parameters

+
    +
  • +

    p

    +

    Typefloat

    +

    Probability of including a feature the first time it is encountered.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed value used for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import feature_selection
+from river import stream
+
+selector = feature_selection.PoissonInclusion(p=0.1, seed=42)
+
+dataset = iter(datasets.TrumpApproval())
+
+feature_names = next(dataset)[0].keys()
+n = 0
+
+while True:
+    x, y = next(dataset)
+    xt = selector.transform_one(x)
+    if xt.keys() == feature_names:
+        break
+    n += 1
+
+n
+
+
12
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-selection/SelectKBest/index.html b/0.19.0/api/feature-selection/SelectKBest/index.html new file mode 100644 index 0000000000..31e354f635 --- /dev/null +++ b/0.19.0/api/feature-selection/SelectKBest/index.html @@ -0,0 +1,3509 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SelectKBest - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SelectKBest

+

Removes all but the \(k\) highest scoring features.

+

Parameters

+
    +
  • +

    similarity

    +

    Typestats.base.Bivariate

    +
  • +
  • +

    k

    +

    Default10

    +

    The number of features to keep.

    +
  • +
+

Attributes

+
    +
  • +

    similarities (dict)

    +

    The similarity instances used for each feature.

    +
  • +
  • +

    leaderboard (dict)

    +

    The actual similarity measures.

    +
  • +
+

Examples

+

from pprint import pprint
+from river import feature_selection
+from river import stats
+from river import stream
+from sklearn import datasets
+
+X, y = datasets.make_regression(
+    n_samples=100,
+    n_features=10,
+    n_informative=2,
+    random_state=42
+)
+
+selector = feature_selection.SelectKBest(
+    similarity=stats.PearsonCorr(),
+    k=2
+)
+
+for xi, yi, in stream.iter_array(X, y):
+    selector = selector.learn_one(xi, yi)
+
+pprint(selector.leaderboard)
+
+
Counter({9: 0.7898,
+        7: 0.5444,
+        8: 0.1062,
+        2: 0.0638,
+        4: 0.0538,
+        5: 0.0271,
+        1: -0.0312,
+        6: -0.0657,
+        3: -0.1501,
+        0: -0.1895})
+

+

selector.transform_one(xi)
+
+
{7: -1.2795, 9: -1.8408}
+

+

Methods

+
+learn_one +

Update with a set of features x and a target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.Target'
  • +
+

Returns

+

SupervisedTransformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/feature-selection/VarianceThreshold/index.html b/0.19.0/api/feature-selection/VarianceThreshold/index.html new file mode 100644 index 0000000000..a5361d7afe --- /dev/null +++ b/0.19.0/api/feature-selection/VarianceThreshold/index.html @@ -0,0 +1,3489 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VarianceThreshold - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

VarianceThreshold

+

Removes low-variance features.

+

Parameters

+
    +
  • +

    threshold

    +

    Default0

    +

    Only features with a variance above the threshold will be kept.

    +
  • +
  • +

    min_samples

    +

    Default2

    +

    The minimum number of samples required to perform selection.

    +
  • +
+

Attributes

+
    +
  • +

    variances (dict)

    +

    The variance of each feature.

    +
  • +
+

Examples

+

from river import feature_selection
+from river import stream
+
+X = [
+    [0, 2, 0, 3],
+    [0, 1, 4, 3],
+    [0, 1, 1, 3]
+]
+
+selector = feature_selection.VarianceThreshold()
+
+for x, _ in stream.iter_array(X):
+    print(selector.learn_one(x).transform_one(x))
+
+
{0: 0, 1: 2, 2: 0, 3: 3}
+{1: 1, 2: 4}
+{1: 1, 2: 1}
+

+

Methods

+
+check_feature +
+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/forest/AMFClassifier/index.html b/0.19.0/api/forest/AMFClassifier/index.html new file mode 100644 index 0000000000..f9609667ea --- /dev/null +++ b/0.19.0/api/forest/AMFClassifier/index.html @@ -0,0 +1,3595 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AMFClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AMFClassifier

+

Aggregated Mondrian Forest classifier for online learning.

+

This implementation is truly online1, in the sense that a single pass is performed, and that predictions can be produced anytime.

+

Each node in a tree predicts according to the distribution of the labels it contains. This distribution is regularized using a "Jeffreys" prior with parameter dirichlet. For each class with count labels in the node and n_samples samples in it, the prediction of a node is given by

+

\(\frac{count + dirichlet}{n_{samples} + dirichlet \times n_{classes}}\).

+

The prediction for a sample is computed as the aggregated predictions of all the subtrees along the path leading to the leaf node containing the sample. The aggregation weights are exponential weights with learning rate step and log-loss when use_aggregation is True.

+

This computation is performed exactly thanks to a context tree weighting algorithm. More details can be found in the paper cited in the references below.

+

The final predictions are the average class probabilities predicted by each of the n_estimators trees in the forest.

+

Parameters

+
    +
  • +

    n_estimators

    +

    Typeint

    +

    Default10

    +

    The number of trees in the forest.

    +
  • +
  • +

    step

    +

    Typefloat

    +

    Default1.0

    +

    Step-size for the aggregation weights. Default is 1 for classification with the log-loss, which is usually the best choice.

    +
  • +
  • +

    use_aggregation

    +

    Typebool

    +

    DefaultTrue

    +

    Controls if aggregation is used in the trees. It is highly recommended to leave it as True.

    +
  • +
  • +

    dirichlet

    +

    Typefloat

    +

    Default0.5

    +

    Regularization level of the class frequencies used for predictions in each node. A rule of thumb is to set this to 1 / n_classes, where n_classes is the expected number of classes which might appear. Default is dirichlet = 0.5, which works well for binary classification problems.

    +
  • +
  • +

    split_pure

    +

    Typebool

    +

    DefaultFalse

    +

    Controls if nodes that contains only sample of the same class should be split ("pure" nodes). Default is False, namely pure nodes are not split, but True can be sometimes better.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import forest
+from river import metrics
+
+dataset = datasets.Bananas().take(500)
+
+model = forest.AMFClassifier(
+    n_estimators=10,
+    use_aggregation=True,
+    dirichlet=0.5,
+    seed=1
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 85.37%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+

Notes

+

Only log_loss used for the computation of the aggregation weights is supported for now, namely +the log-loss for multi-class classification.

+
+
+
    +
  1. +

    Mourtada, J., Gaïffas, S., & Scornet, E. (2021). AMF: Aggregated Mondrian forests for online +learning. Journal of the Royal Statistical Society Series B: Statistical Methodology, 83(3), 505-533. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/forest/AMFRegressor/index.html b/0.19.0/api/forest/AMFRegressor/index.html new file mode 100644 index 0000000000..2f06f85395 --- /dev/null +++ b/0.19.0/api/forest/AMFRegressor/index.html @@ -0,0 +1,3545 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AMFRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AMFRegressor

+

Aggregated Mondrian Forest regressor for online learning.

+

This algorithm is truly online, in the sense that a single pass is performed, and that predictions can be produced anytime.

+

Each node in a tree predicts according to the average of the labels it contains. The prediction for a sample is computed as the aggregated predictions of all the subtrees along the path leading to the leaf node containing the sample. The aggregation weights are exponential weights with learning rate step using a squared loss when use_aggregation is True.

+

This computation is performed exactly thanks to a context tree weighting algorithm. More details can be found in the original paper1.

+

The final predictions are the average of the predictions of each of the n_estimators trees in the forest.

+

Parameters

+
    +
  • +

    n_estimators

    +

    Typeint

    +

    Default10

    +

    The number of trees in the forest.

    +
  • +
  • +

    step

    +

    Typefloat

    +

    Default1.0

    +

    Step-size for the aggregation weights.

    +
  • +
  • +

    use_aggregation

    +

    Typebool

    +

    DefaultTrue

    +

    Controls if aggregation is used in the trees. It is highly recommended to leave it as True.

    +
  • +
  • +

    seed

    +

    Typeint

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import forest
+from river import metrics
+
+dataset = datasets.TrumpApproval()
+model = forest.AMFRegressor(seed=42)
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.268533
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+
+
+
    +
  1. +

    Mourtada, J., Gaïffas, S., & Scornet, E. (2021). AMF: Aggregated Mondrian forests for online +learning. Journal of the Royal Statistical Society Series B: Statistical Methodology, 83(3), 505-533. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/forest/ARFClassifier/index.html b/0.19.0/api/forest/ARFClassifier/index.html new file mode 100644 index 0000000000..d69fb8203f --- /dev/null +++ b/0.19.0/api/forest/ARFClassifier/index.html @@ -0,0 +1,3690 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARFClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ARFClassifier

+

Adaptive Random Forest classifier.

+

The 3 most important aspects of Adaptive Random Forest 1 are:

+
    +
  1. +

    inducing diversity through re-sampling

    +
  2. +
  3. +

    inducing diversity through randomly selecting subsets of features for node splits

    +
  4. +
  5. +

    drift detectors per base tree, which cause selective resets in response to drifts

    +
  6. +
+

It also allows training background trees, which start training if a warning is detected and replace the active tree if the warning escalates to a drift.

+

Parameters

+
    +
  • +

    n_models

    +

    Typeint

    +

    Default10

    +

    Number of trees in the ensemble.

    +
  • +
  • +

    max_features

    +

    Typebool | str | int

    +

    Defaultsqrt

    +

    Max number of attributes for each node split.
    - If int, then consider max_features at each split.
    - If float, then max_features is a percentage and int(max_features * n_features) features are considered per split.
    - If "sqrt", then max_features=sqrt(n_features).
    - If "log2", then max_features=log2(n_features).
    - If None, then max_features=n_features.

    +
  • +
  • +

    lambda_value

    +

    Typeint

    +

    Default6

    +

    The lambda value for bagging (lambda=6 corresponds to Leveraging Bagging).

    +
  • +
  • +

    metric

    +

    Typemetrics.base.MultiClassMetric | None

    +

    DefaultNone

    +

    Metric used to track trees performance within the ensemble. Defaults to metrics.Accuracy()`.

    +
  • +
  • +

    disable_weighted_vote

    +

    DefaultFalse

    +

    If True, disables the weighted vote prediction.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Drift Detection method. Set to None to disable Drift detection. Defaults to drift.ADWIN(delta=0.001)`.

    +
  • +
  • +

    warning_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Warning Detection method. Set to None to disable warning detection. Defaults to drift.ADWIN(delta=0.01)`.

    +
  • +
  • +

    grace_period

    +

    Typeint

    +

    Default50

    +

    [Tree parameter] Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    [Tree parameter] The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    split_criterion

    +

    Typestr

    +

    Defaultinfo_gain

    +

    [Tree parameter] Split criterion to use.
    - 'gini' - Gini
    - 'info_gain' - Information Gain
    - 'hellinger' - Hellinger Distance

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default0.01

    +

    [Tree parameter] Allowed error in split decision, a value closer to 0 takes longer to decide.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    [Tree parameter] Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultnba

    +

    [Tree parameter] Prediction mechanism used at leafs.
    - 'mc' - Majority Class
    - 'nb' - Naive Bayes
    - 'nba' - Naive Bayes Adaptive

    +
  • +
  • +

    nb_threshold

    +

    Typeint

    +

    Default0

    +

    [Tree parameter] Number of instances a leaf should observe before allowing Naive Bayes.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    [Tree parameter] List of Nominal attributes. If empty, then assume that all attributes are numerical.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    [Tree parameter] The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.GaussianSplitter is used if splitter is None.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, only allow binary splits.

    +
  • +
  • +

    min_branch_fraction

    +

    Typefloat

    +

    Default0.01

    +

    [Tree parameter] The minimum percentage of observed data required for branches resulting from split candidates. To validate a split candidate, at least two resulting branches must have a percentage of samples greater than min_branch_fraction. This criterion prevents unnecessary splits when the majority of instances are concentrated in a single branch.

    +
  • +
  • +

    max_share_to_split

    +

    Typefloat

    +

    Default0.99

    +

    [Tree parameter] Only perform a split in a leaf if the proportion of elements in the majority class is smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default100.0

    +

    [Tree parameter] Maximum memory (MB) consumed by the tree.

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default2000000

    +

    [Tree parameter] Number of instances between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    [Tree parameter] If True, enable merit-based tree pre-pruning.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • models
  • +
+

Examples

+

from river import evaluate
+from river import forest
+from river import metrics
+from river.datasets import synth
+
+dataset = synth.ConceptDriftStream(
+    seed=42,
+    position=500,
+    width=40
+).take(1000)
+
+model = forest.ARFClassifier(seed=8, leaf_prediction="mc")
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 71.07%
+

+

Methods

+
+learn_one +
+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+
+
+
    +
  1. +

    Heitor Murilo Gomes, Albert Bifet, Jesse Read, Jean Paul Barddal, + Fabricio Enembreck, Bernhard Pfharinger, Geoff Holmes, Talel Abdessalem. + Adaptive random forests for evolving data stream classification. + In Machine Learning, DOI: 10.1007/s10994-017-5642-8, Springer, 2017. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/forest/ARFRegressor/index.html b/0.19.0/api/forest/ARFRegressor/index.html new file mode 100644 index 0000000000..ab0c61fec9 --- /dev/null +++ b/0.19.0/api/forest/ARFRegressor/index.html @@ -0,0 +1,3687 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARFRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ARFRegressor

+

Adaptive Random Forest regressor.

+

The 3 most important aspects of Adaptive Random Forest 1 are:

+
    +
  1. +

    inducing diversity through re-sampling

    +
  2. +
  3. +

    inducing diversity through randomly selecting subsets of features for node splits

    +
  4. +
  5. +

    drift detectors per base tree, which cause selective resets in response to drifts

    +
  6. +
+

Notice that this implementation is slightly different from the original algorithm proposed in 2. The HoeffdingTreeRegressor is used as base learner, instead of FIMT-DD. It also adds a new strategy to monitor the predictions and check for concept drifts. The deviations of the predictions to the target are monitored and normalized in the [0, 1] range to fulfill ADWIN's requirements. We assume that the data subjected to the normalization follows a normal distribution, and thus, lies within the interval of the mean \(\pm3\sigma\).

+

Parameters

+
    +
  • +

    n_models

    +

    Typeint

    +

    Default10

    +

    Number of trees in the ensemble.

    +
  • +
  • +

    max_features

    +

    Defaultsqrt

    +

    Max number of attributes for each node split.
    - If int, then consider max_features at each split.
    - If float, then max_features is a percentage and int(max_features * n_features) features are considered per split.
    - If "sqrt", then max_features=sqrt(n_features).
    - If "log2", then max_features=log2(n_features).
    - If None, then max_features=n_features.

    +
  • +
  • +

    aggregation_method

    +

    Typestr

    +

    Defaultmedian

    +

    The method to use to aggregate predictions in the ensemble.
    - 'mean'
    - 'median' - If selected will disable the weighted vote.

    +
  • +
  • +

    lambda_value

    +

    Typeint

    +

    Default6

    +

    The lambda value for bagging (lambda=6 corresponds to Leveraging Bagging).

    +
  • +
  • +

    metric

    +

    Typemetrics.base.RegressionMetric | None

    +

    DefaultNone

    +

    Metric used to track trees performance within the ensemble. Depending, on the configuration, this metric is also used to weight predictions from the members of the ensemble. Defaults to metrics.MSE()`.

    +
  • +
  • +

    disable_weighted_vote

    +

    DefaultTrue

    +

    If True, disables the weighted vote prediction, i.e. does not assign weights to individual tree's predictions and uses the arithmetic mean instead. Otherwise will use the metric value to weight predictions.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Drift Detection method. Set to None to disable Drift detection. Defaults to drift.ADWIN(0.001)`.

    +
  • +
  • +

    warning_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    Warning Detection method. Set to None to disable warning detection. Defaults to drift.ADWIN(0.01)`.

    +
  • +
  • +

    grace_period

    +

    Typeint

    +

    Default50

    +

    [Tree parameter] Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    [Tree parameter] The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default0.01

    +

    [Tree parameter] Allowed error in split decision, a value closer to 0 takes longer to decide.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    [Tree parameter] Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultadaptive

    +

    [Tree parameter] Prediction mechanism used at leaves.
    - 'mean' - Target mean
    - 'model' - Uses the model defined in leaf_model
    - 'adaptive' - Chooses between 'mean' and 'model' dynamically

    +
  • +
  • +

    leaf_model

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    [Tree parameter] The regression model used to provide responses if leaf_prediction='model'. If not provided, an instance of linear_model.LinearRegression with the default hyperparameters is used.

    +
  • +
  • +

    model_selector_decay

    +

    Typefloat

    +

    Default0.95

    +

    [Tree parameter] The exponential decaying factor applied to the learning models' squared errors, that are monitored if leaf_prediction='adaptive'. Must be between 0 and 1. The closer to 1, the more importance is going to be given to past observations. On the other hand, if its value approaches 0, the recent observed errors are going to have more influence on the final decision.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    [Tree parameter] List of Nominal attributes. If empty, then assume that all attributes are numerical.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    [Tree parameter] The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters.By default, tree.splitter.EBSTSplitter is used if splitter is None.

    +
  • +
  • +

    min_samples_split

    +

    Typeint

    +

    Default5

    +

    [Tree parameter] The minimum number of samples every branch resulting from a split candidate must have to be considered valid.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, only allow binary splits.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default500.0

    +

    [Tree parameter] Maximum memory (MB) consumed by the tree.

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default2000000

    +

    [Tree parameter] Number of instances between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    [Tree parameter] If True, enable merit-based tree pre-pruning.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    models

    +
  • +
  • +

    valid_aggregation_method

    +

    Valid aggregation_method values.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import forest
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+
+model = (
+    preprocessing.StandardScaler() |
+    forest.ARFRegressor(seed=42)
+)
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.800378
+

+

Methods

+
+learn_one +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+
+
+
    +
  1. +

    Gomes, H.M., Bifet, A., Read, J., Barddal, J.P., Enembreck, F., + Pfharinger, B., Holmes, G. and Abdessalem, T., 2017. Adaptive random + forests for evolving data stream classification. Machine Learning, + 106(9-10), pp.1469-1495. 

    +
  2. +
  3. +

    Gomes, H.M., Barddal, J.P., Boiko, L.E., Bifet, A., 2018. + Adaptive random forests for data stream regression. ESANN 2018. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/forest/OXTRegressor/index.html b/0.19.0/api/forest/OXTRegressor/index.html new file mode 100644 index 0000000000..5e4292f666 --- /dev/null +++ b/0.19.0/api/forest/OXTRegressor/index.html @@ -0,0 +1,3714 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OXTRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OXTRegressor

+

Online Extra Trees regressor.

+

The online Extra Trees1 ensemble takes some steps further into randomization when compared to Adaptive Random Forests (ARF). A subspace of the feature space is considered at each split attempt, as ARF does, and online bagging or subbagging can also be (optionally) used. Nonetheless, Extra Trees randomizes the split candidates evaluated by each leaf node (just a single split is tested by numerical feature, which brings significant speedups to the ensemble), and might also randomize the maximum depth of the forest members, as well as the size of the feature subspace processed by each of its trees' leaves.

+

On the other hand, OXT suffers from a cold-start problem. As the splits are random, the predictive performance in small samples is usually worse than using a deterministic split approach, such as the one used by ARF.

+

Parameters

+
    +
  • +

    n_models

    +

    Typeint

    +

    Default10

    +

    The number of trees in the ensemble.

    +
  • +
  • +

    max_features

    +

    Typebool | str | int

    +

    Defaultsqrt

    +

    Max number of attributes for each node split.
    - If int, then consider max_features at each split.
    - If float, then max_features is a percentage and int(max_features * n_features) features are considered per split.
    - If "sqrt", then max_features=sqrt(n_features).
    - If "log2", then max_features=log2(n_features).
    - If "random", then max_features will assume a different random number in the interval [2, n_features] for each tree leaf.
    - If None, then max_features=n_features.

    +
  • +
  • +

    resampling_strategy

    +

    Typestr | None

    +

    Defaultsubbagging

    +

    The chosen instance resampling strategy:
    - If None, no resampling will be done and the trees will process all instances. - If 'baggging', online bagging will be performed (sampling with replacement). - If 'subbagging', online subbagging will be performed (sampling without replacement).

    +
  • +
  • +

    resampling_rate

    +

    Typeint | float

    +

    Default0.5

    +

    Only valid if resampling_strategy is not None. Controls the parameters of the resampling strategy.
    . - If resampling_strategy='bagging', must be an integer greater than or equal to 1 that parameterizes the poisson distribution used to simulate bagging in online learning settings. It acts as the lambda parameter of Oza Bagging and Leveraging Bagging.
    - If resampling_strategy='subbagging', must be a float in the interval \((0, 1]\) that controls the chance of each instance being used by a tree for learning.

    +
  • +
  • +

    detection_mode

    +

    Typestr

    +

    Defaultall

    +

    The concept drift detection mode in which the forest operates. Valid values are:
    - "all": creates both warning and concept drift detectors. If a warning is detected, an alternate tree starts being trained in the background. If the warning trigger escalates to a concept drift, the affected tree is replaced by the alternate tree.
    - "drop": only the concept drift detectors are created. If a drift is detected, the affected tree is dropped and replaced by a new tree.
    - "off": disables the concept drift adaptation capabilities. The forest will act as if the processed stream is stationary.

    +
  • +
  • +

    warning_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    The detector that will be used to trigger concept drift warnings. Defaults to drift.ADWIN(0.01)`.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    The detector used to detect concept drifts. Defaults to drift.ADWIN(0.001)`.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth the ensemble members might reach. If None, the trees will grow indefinitely.

    +
  • +
  • +

    randomize_tree_depth

    +

    Typebool

    +

    DefaultFalse

    +

    Whether or not randomize the maximum depth of each tree in the ensemble. If max_depth is provided, it is going to act as an upper bound to generate the maximum depth for each tree.

    +
  • +
  • +

    track_metric

    +

    Typemetrics.base.RegressionMetric | None

    +

    DefaultNone

    +

    The performance metric used to weight predictions. Defaults to metrics.MAE()`.

    +
  • +
  • +

    disable_weighted_vote

    +

    Typebool

    +

    DefaultTrue

    +

    Defines whether or not to use predictions weighted by each trees' prediction performance.

    +
  • +
  • +

    split_buffer_size

    +

    Typeint

    +

    Default5

    +

    Defines the size of the buffer used by the tree splitters when determining the feature range and a random split point in this interval.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed to support reproducibility.

    +
  • +
  • +

    grace_period

    +

    Typeint

    +

    Default50

    +

    [Tree parameter] Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default0.01

    +

    [Tree parameter] Allowed error in split decision, a value closer to 0 takes longer to decide.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    [Tree parameter] Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultadaptive

    +

    [Tree parameter] Prediction mechanism used at leaves.
    - 'mean' - Target mean
    - 'model' - Uses the model defined in leaf_model
    - 'adaptive' - Chooses between 'mean' and 'model' dynamically

    +
  • +
  • +

    leaf_model

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    [Tree parameter] The regression model used to provide responses if leaf_prediction='model'. If not provided, an instance of linear_model.LinearRegression with the default hyperparameters is used.

    +
  • +
  • +

    model_selector_decay

    +

    Typefloat

    +

    Default0.95

    +

    [Tree parameter] The exponential decaying factor applied to the learning models' squared errors, that are monitored if leaf_prediction='adaptive'. Must be between 0 and 1. The closer to 1, the more importance is going to be given to past observations. On the other hand, if its value approaches 0, the recent observed errors are going to have more influence on the final decision.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    [Tree parameter] List of Nominal attributes. If empty, then assume that all attributes are numerical.

    +
  • +
  • +

    min_samples_split

    +

    Typeint

    +

    Default5

    +

    [Tree parameter] The minimum number of samples every branch resulting from a split candidate must have to be considered valid.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, only allow binary splits.

    +
  • +
  • +

    max_size

    +

    Typeint

    +

    Default500

    +

    [Tree parameter] Maximum memory (MB) consumed by the tree.

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default2000000

    +

    [Tree parameter] Number of instances between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    [Tree parameter] If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    [Tree parameter] If True, enable merit-based tree pre-pruning.

    +
  • +
+

Attributes

+
    +
  • +

    instances_per_tree

    +

    The number of instances processed by each one of the current forest members. Each time a concept drift is detected, the count corresponding to the affected tree is reset.

    +
  • +
  • +

    models

    +
  • +
  • +

    n_drifts

    +

    The number of concept drifts detected per ensemble member.

    +
  • +
  • +

    n_tree_swaps

    +

    The number of performed alternate tree swaps. Not applicable if the warning detectors are disabled.

    +
  • +
  • +

    n_warnings

    +

    The number of warnings detected per ensemble member.

    +
  • +
  • +

    total_instances

    +

    The total number of instances processed by the ensemble.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import metrics
+from river import forest
+
+dataset = datasets.synth.Friedman(seed=42).take(5000)
+
+model = forest.OXTRegressor(n_models=3, seed=42)
+
+metric = metrics.RMSE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
RMSE: 3.127311
+

+

Methods

+
+learn_one +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+

Notes

+

As the Online Extra Trees change the way in which Hoeffding Trees perform split attempts +and monitor numerical input features, some of the parameters of the vanilla Hoeffding Tree +algorithms are not available.

+
+
+
    +
  1. +

    Mastelini, S. M., Nakano, F. K., Vens, C., & de Leon Ferreira, A. C. P. (2022). +Online Extra Trees Regressor. IEEE Transactions on Neural Networks and Learning Systems. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/ChebyshevOverSampler/index.html b/0.19.0/api/imblearn/ChebyshevOverSampler/index.html new file mode 100644 index 0000000000..e705eea30d --- /dev/null +++ b/0.19.0/api/imblearn/ChebyshevOverSampler/index.html @@ -0,0 +1,3565 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ChebyshevOverSampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ChebyshevOverSampler

+

Over-sampling for imbalanced regression using Chebyshev's inequality.

+

Chebyshev's inequality can be used to define the probability of target observations being frequent values (w.r.t. the distribution mean).

+

Let \(Y\) be a random variable with finite expected value \(\overline{y}\) and non-zero variance \(\sigma^2\). For any real number \(t > 0\), the Chebyshev's inequality states that, for a wide class of unimodal probability distributions: \(Pr(|y-\overline{y}| \ge t\sigma) \le \dfrac{1}{t^2}\).

+

Taking \(t=\dfrac{|y-\overline{y}|}{\sigma}\), and assuming \(t > 1\), the Chebyshev’s inequality for an observation \(y\) becomes: \(P(|y - \overline{y}|=t) = \dfrac{\sigma^2}{|y-\overline{y}|}\).

+

Alternatively, one can use \(t\) directly to estimate a frequency weight \(\kappa = \lceil t\rceil\) and define an over-sampling strategy for extreme and rare target values1. Each incoming instance is used \(\kappa\) times to update the underlying regressor. Frequent target values contribute only once to the underlying regressor, whereas rares cases are used multiple times for training.

+

Parameters

+
    +
  • +

    regressor

    +

    Typebase.Regressor

    +

    The regression model that will receive the biased sample.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import metrics
+from river import preprocessing
+from river import rules
+
+model = (
+    preprocessing.StandardScaler() |
+    imblearn.ChebyshevOverSampler(
+        regressor=rules.AMRules(
+            n_min=50, delta=0.01
+        )
+    )
+)
+
+evaluate.progressive_val_score(
+    datasets.TrumpApproval(),
+    model,
+    metrics.MAE(),
+    print_every=500
+)
+
+
[500] MAE: 1.673902
+[1,000] MAE: 1.743046
+[1,001] MAE: 1.741335
+MAE: 1.741335
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The prediction.

+
+

+
+
+
    +
  1. +

    Aminian, Ehsan, Rita P. Ribeiro, and João Gama. "Chebyshev approaches for imbalanced data +streams regression models." Data Mining and Knowledge Discovery 35.6 (2021): 2389-2466. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/ChebyshevUnderSampler/index.html b/0.19.0/api/imblearn/ChebyshevUnderSampler/index.html new file mode 100644 index 0000000000..85c9ee9b9b --- /dev/null +++ b/0.19.0/api/imblearn/ChebyshevUnderSampler/index.html @@ -0,0 +1,3577 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ChebyshevUnderSampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ChebyshevUnderSampler

+

Under-sampling for imbalanced regression using Chebyshev's inequality.

+

Chebyshev's inequality can be used to define the probability of target observations being frequent values (w.r.t. the distribution mean).

+

Let \(Y\) be a random variable with finite expected value \(\overline{y}\) and non-zero variance \(\sigma^2\). For any real number \(t > 0\), the Chebyshev's inequality states that, for a wide class of unimodal probability distributions: \(Pr(|y-\overline{y}| \ge t\sigma) \le \dfrac{1}{t^2}\).

+

Taking \(t=\dfrac{|y-\overline{y}|}{\sigma}\), and assuming \(t > 1\), the Chebyshev’s inequality for an observation \(y\) becomes: \(P(|y - \overline{y}|=t) = \dfrac{\sigma^2}{|y-\overline{y}|}\). The reciprocal of this probability is used for under-sampling1 the most frequent cases. Extreme valued or rare cases have higher probabilities of selection, whereas the most frequent cases are likely to be discarded. Still, frequent cases have a small chance of being selected (controlled via the sp parameter) in case few rare instances were observed.

+

Parameters

+
    +
  • +

    regressor

    +

    Typebase.Regressor

    +

    The regression model that will receive the biased sample.

    +
  • +
  • +

    sp

    +

    Typefloat

    +

    Default0.15

    +

    Second chance probability. Even if an example is not initially selected for training, it still has a small chance of being selected in case the number of rare case observed so far is small.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed to support reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import metrics
+from river import preprocessing
+from river import rules
+
+model = (
+    preprocessing.StandardScaler() |
+    imblearn.ChebyshevUnderSampler(
+        regressor=rules.AMRules(
+            n_min=50, delta=0.01,
+        ),
+        seed=42
+    )
+)
+
+evaluate.progressive_val_score(
+    datasets.TrumpApproval(),
+    model,
+    metrics.MAE(),
+    print_every=500
+)
+
+
[500] MAE: 1.787162
+[1,000] MAE: 1.515711
+[1,001] MAE: 1.515236
+MAE: 1.515236
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The prediction.

+
+

+
+
+
    +
  1. +

    Aminian, Ehsan, Rita P. Ribeiro, and João Gama. "Chebyshev approaches for imbalanced data +streams regression models." Data Mining and Knowledge Discovery 35.6 (2021): 2389-2466. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/HardSamplingClassifier/index.html b/0.19.0/api/imblearn/HardSamplingClassifier/index.html new file mode 100644 index 0000000000..3bbe224182 --- /dev/null +++ b/0.19.0/api/imblearn/HardSamplingClassifier/index.html @@ -0,0 +1,3588 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HardSamplingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HardSamplingClassifier

+

Hard sampling classifier.

+

This wrapper enables a model to retrain on past samples who's output was hard to predict. This works by storing the hardest samples in a buffer of a fixed size. When a new sample arrives, the wrapped model is either trained on one of the buffered samples with a probability p or on the new sample with a probability (1 - p).

+

The hardness of an observation is evaluated with a loss function that compares the sample's ground truth with the wrapped model's prediction. If the buffer is not full, then the sample is added to the buffer. If the buffer is full and the new sample has a bigger loss than the lowest loss in the buffer, then the sample takes it's place.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +
  • +
  • +

    size

    +

    Typeint

    +

    Size of the buffer.

    +
  • +
  • +

    p

    +

    Typefloat

    +

    Probability of updating the model with a sample from the buffer instead of a new incoming sample.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.BinaryLoss | optim.losses.MultiClassLoss | None

    +

    DefaultNone

    +

    Criterion used to evaluate the hardness of a sample.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed.

    +
  • +
+

Attributes

+
    +
  • classifier
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+model = (
+    preprocessing.StandardScaler() |
+    imblearn.HardSamplingClassifier(
+        classifier=linear_model.LogisticRegression(),
+        p=0.1,
+        size=40,
+        seed=42,
+    )
+)
+
+evaluate.progressive_val_score(
+    dataset=datasets.Phishing(),
+    model=model,
+    metric=metrics.ROCAUC(),
+    print_every=500,
+)
+
+
[500] ROCAUC: 92.78%
+[1,000] ROCAUC: 94.76%
+[1,250] ROCAUC: 95.06%
+ROCAUC: 95.06%
+

+

Methods

+
+learn_one +
+
+predict_one +
+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/HardSamplingRegressor/index.html b/0.19.0/api/imblearn/HardSamplingRegressor/index.html new file mode 100644 index 0000000000..492b253f8c --- /dev/null +++ b/0.19.0/api/imblearn/HardSamplingRegressor/index.html @@ -0,0 +1,3576 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HardSamplingRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HardSamplingRegressor

+

Hard sampling regressor.

+

This wrapper enables a model to retrain on past samples who's output was hard to predict. This works by storing the hardest samples in a buffer of a fixed size. When a new sample arrives, the wrapped model is either trained on one of the buffered samples with a probability p or on the new sample with a probability (1 - p).

+

The hardness of an observation is evaluated with a loss function that compares the sample's ground truth with the wrapped model's prediction. If the buffer is not full, then the sample is added to the buffer. If the buffer is full and the new sample has a bigger loss than the lowest loss in the buffer, then the sample takes it's place.

+

Parameters

+
    +
  • +

    regressor

    +

    Typebase.Regressor

    +
  • +
  • +

    size

    +

    Typeint

    +

    Size of the buffer.

    +
  • +
  • +

    p

    +

    Typefloat

    +

    Probability of updating the model with a sample from the buffer instead of a new incoming sample.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    Criterion used to evaluate the hardness of a sample.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed.

    +
  • +
+

Attributes

+
    +
  • regressor
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+model = (
+    preprocessing.StandardScaler() |
+    imblearn.HardSamplingRegressor(
+        regressor=linear_model.LinearRegression(),
+        p=.2,
+        size=30,
+        seed=42,
+    )
+)
+
+evaluate.progressive_val_score(
+    datasets.TrumpApproval(),
+    model,
+    metrics.MAE(),
+    print_every=500
+)
+
+
[500] MAE: 2.274021
+[1,000] MAE: 1.392399
+[1,001] MAE: 1.391246
+MAE: 1.391246
+

+

Methods

+
+learn_one +
+
+predict_one +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/RandomOverSampler/index.html b/0.19.0/api/imblearn/RandomOverSampler/index.html new file mode 100644 index 0000000000..f21e74dbb0 --- /dev/null +++ b/0.19.0/api/imblearn/RandomOverSampler/index.html @@ -0,0 +1,3572 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomOverSampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomOverSampler

+

Random over-sampling.

+

This is a wrapper for classifiers. It will train the provided classifier by over-sampling the stream of given observations so that the class distribution seen by the classifier follows a given desired distribution. The implementation is a discrete version of reverse rejection sampling.

+

See Working with imbalanced data for example usage.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +
  • +
  • +

    desired_dist

    +

    Typedict

    +

    The desired class distribution. The keys are the classes whilst the values are the desired class percentages. The values must sum up to 1.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+model = imblearn.RandomOverSampler(
+    (
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    desired_dist={False: 0.4, True: 0.6},
+    seed=42
+)
+
+dataset = datasets.CreditCard().take(3000)
+
+metric = metrics.LogLoss()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
LogLoss: 0.0457...
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • kwargs
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/RandomSampler/index.html b/0.19.0/api/imblearn/RandomSampler/index.html new file mode 100644 index 0000000000..5ccd17a6b9 --- /dev/null +++ b/0.19.0/api/imblearn/RandomSampler/index.html @@ -0,0 +1,3578 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomSampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomSampler

+

Random sampling by mixing under-sampling and over-sampling.

+

This is a wrapper for classifiers. It will train the provided classifier by both under-sampling and over-sampling the stream of given observations so that the class distribution seen by the classifier follows a given desired distribution.

+

See Working with imbalanced data for example usage.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +
  • +
  • +

    desired_dist

    +

    Typedict

    +

    The desired class distribution. The keys are the classes whilst the values are the desired class percentages. The values must sum up to 1. If set to None, then the observations will be sampled uniformly at random, which is stricly equivalent to using ensemble.BaggingClassifier.

    +
  • +
  • +

    sampling_rate

    +

    Default1.0

    +

    The desired ratio of data to sample.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+model = imblearn.RandomSampler(
+    (
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    desired_dist={False: 0.4, True: 0.6},
+    sampling_rate=0.8,
+    seed=42
+)
+
+dataset = datasets.CreditCard().take(3000)
+
+metric = metrics.LogLoss()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
LogLoss: 0.09...
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • kwargs
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/imblearn/RandomUnderSampler/index.html b/0.19.0/api/imblearn/RandomUnderSampler/index.html new file mode 100644 index 0000000000..26fae8fd60 --- /dev/null +++ b/0.19.0/api/imblearn/RandomUnderSampler/index.html @@ -0,0 +1,3583 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomUnderSampler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomUnderSampler

+

Random under-sampling.

+

This is a wrapper for classifiers. It will train the provided classifier by under-sampling the stream of given observations so that the class distribution seen by the classifier follows a given desired distribution. The implementation is a discrete version of rejection sampling.

+

See Working with imbalanced data for example usage.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +
  • +
  • +

    desired_dist

    +

    Typedict

    +

    The desired class distribution. The keys are the classes whilst the values are the desired class percentages. The values must sum up to 1.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import imblearn
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+model = imblearn.RandomUnderSampler(
+    (
+        preprocessing.StandardScaler() |
+        linear_model.LogisticRegression()
+    ),
+    desired_dist={False: 0.4, True: 0.6},
+    seed=42
+)
+
+dataset = datasets.CreditCard().take(3000)
+
+metric = metrics.LogLoss()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
LogLoss: 0.0336...
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • kwargs
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/ALMAClassifier/index.html b/0.19.0/api/linear-model/ALMAClassifier/index.html new file mode 100644 index 0000000000..15aaedd6bc --- /dev/null +++ b/0.19.0/api/linear-model/ALMAClassifier/index.html @@ -0,0 +1,3657 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ALMAClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ALMAClassifier

+

Approximate Large Margin Algorithm (ALMA).

+

Parameters

+
    +
  • +

    p

    +

    Default2

    +
  • +
  • +

    alpha

    +

    Default0.9

    +
  • +
  • +

    B

    +

    Default1.1111111111111112

    +
  • +
  • +

    C

    +

    Default1.4142135623730951

    +
  • +
+

Attributes

+
    +
  • +

    w (collections.defaultdict)

    +

    The current weights.

    +
  • +
  • +

    k (int)

    +

    The number of instances seen during training.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.Phishing()
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.ALMAClassifier()
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 82.56%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/BayesianLinearRegression/index.html b/0.19.0/api/linear-model/BayesianLinearRegression/index.html new file mode 100644 index 0000000000..d5abf5632f --- /dev/null +++ b/0.19.0/api/linear-model/BayesianLinearRegression/index.html @@ -0,0 +1,3684 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BayesianLinearRegression - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BayesianLinearRegression

+

Bayesian linear regression.

+

An advantage of Bayesian linear regression over standard linear regression is that features do not have to scaled beforehand. Another attractive property is that this flavor of linear regression is somewhat insensitive to its hyperparameters. Finally, this model can output instead a predictive distribution rather than just a point estimate.

+

The downside is that the learning step runs in O(n^2) time, whereas the learning step of standard linear regression takes O(n) time.

+

Parameters

+
    +
  • +

    alpha

    +

    Default1

    +

    Prior parameter.

    +
  • +
  • +

    beta

    +

    Default1

    +

    Noise parameter.

    +
  • +
  • +

    smoothing

    +

    Typefloat

    +

    DefaultNone

    +

    Smoothing allows the model to gradually "forget" the past, and focus on the more recent data. It thus enables the model to deal with concept drift. Due to the current implementation, activating smoothing may slow down the model.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+
+dataset = datasets.TrumpApproval()
+model = linear_model.BayesianLinearRegression()
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.586...
+

+

x, _ = next(iter(dataset))
+model.predict_one(x)
+
+
43.852...
+

+

model.predict_one(x, with_dist=True)
+
+
𝒩(μ=43.85..., σ=1.00...)
+

+

The smoothing parameter can be set to make the model robust to drift. The parameter is +expected to be between 0 and 1. To exemplify, let's generate some simulation data with an +abrupt concept drift right in the middle.

+
import itertools
+import random
+
+def random_data(coefs, n, seed=42):
+    rng = random.Random(seed)
+    for _ in range(n):
+        x = {i: rng.random() for i, c in enumerate(coefs)}
+        y = sum(c * xi for c, xi in zip(coefs, x.values()))
+        yield x, y
+
+

Here's how the model performs without any smoothing:

+

model = linear_model.BayesianLinearRegression()
+dataset = itertools.chain(
+    random_data([0.1, 3], 100),
+    random_data([10, -2], 100)
+)
+metric = metrics.MAE()
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 1.284...
+

+

And here's how it performs with some smoothing:

+

model = linear_model.BayesianLinearRegression(smoothing=0.8)
+dataset = itertools.chain(
+    random_data([0.1, 3], 100),
+    random_data([10, -2], 100)
+)
+metric = metrics.MAE()
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.159...
+

+

Smoothing allows the model to gradually "forget" the past, and focus on the more recent data.

+

Note how this works better than standard linear regression, even when using an aggressive +learning rate.

+

from river import optim
+model = linear_model.LinearRegression(optimizer=optim.SGD(0.5))
+dataset = itertools.chain(
+    random_data([0.1, 3], 100),
+    random_data([10, -2], 100)
+)
+metric = metrics.MAE()
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.242...
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_many +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • with_dist — defaults to False
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/LinearRegression/index.html b/0.19.0/api/linear-model/LinearRegression/index.html new file mode 100644 index 0000000000..4bb95b4d99 --- /dev/null +++ b/0.19.0/api/linear-model/LinearRegression/index.html @@ -0,0 +1,3740 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LinearRegression - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LinearRegression

+

Linear regression.

+

This estimator supports learning with mini-batches. On top of the single instance methods, it provides the following methods: learn_many, predict_many, predict_proba_many. Each method takes as input a pandas.DataFrame where each column represents a feature.

+

It is generally a good idea to scale the data beforehand in order for the optimizer to converge. You can do this online with a preprocessing.StandardScaler.

+

Parameters

+
    +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the weights. Note that the intercept updates are handled separately.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.RegressionLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    l2

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0. For now, only one type of penalty can be used. The joint use of L1 and L2 is not explicitly supported.

    +
  • +
  • +

    l1

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0. For now, only one type of penalty can be used. The joint use of L1 and L2 is not explicitly supported.

    +
  • +
  • +

    intercept_init

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typeoptim.base.Scheduler | float

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. A optim.schedulers.Constant is used if a float is provided. The intercept is not updated when this is set to 0.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    initializer

    +

    Typeoptim.base.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme.

    +
  • +
+

Attributes

+
    +
  • +

    weights (dict)

    +

    The current weights.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LinearRegression(intercept_lr=.1)
+)
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.558735
+

+

model['LinearRegression'].intercept
+
+
35.617670
+

+

You can call the debug_one method to break down a prediction. This works even if the +linear regression is part of a pipeline.

+

x, y = next(iter(dataset))
+report = model.debug_one(x)
+print(report)
+
+
0. Input
+--------
+gallup: 43.84321 (float)
+ipsos: 46.19925 (float)
+morning_consult: 48.31875 (float)
+ordinal_date: 736389 (int)
+rasmussen: 44.10469 (float)
+you_gov: 43.63691 (float)
+<BLANKLINE>
+1. StandardScaler
+-----------------
+gallup: 1.18810 (float)
+ipsos: 2.10348 (float)
+morning_consult: 2.73545 (float)
+ordinal_date: -1.73032 (float)
+rasmussen: 1.26872 (float)
+you_gov: 1.48391 (float)
+<BLANKLINE>
+2. LinearRegression
+-------------------
+Name              Value      Weight      Contribution
+      Intercept    1.00000    35.61767       35.61767
+          ipsos    2.10348     0.62689        1.31866
+morning_consult    2.73545     0.24180        0.66144
+         gallup    1.18810     0.43568        0.51764
+      rasmussen    1.26872     0.28118        0.35674
+        you_gov    1.48391     0.03123        0.04634
+   ordinal_date   -1.73032     3.45162       -5.97242
+<BLANKLINE>
+Prediction: 32.54607
+

+

Methods

+
+debug_one +

Debugs the output of the linear regression.

+

Parameters

+
    +
  • x'dict'
  • +
  • decimals'int' — defaults to 5
  • +
+

Returns

+

str: A table which explains the output.

+
+

+
+learn_many +

Update the model with a mini-batch of features X and real-valued targets y.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
  • w'float | pd.Series' — defaults to 1
  • +
+

Returns

+

MiniBatchRegressor: self

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • w — defaults to 1.0
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X
  • +
+

Returns

+

The predicted outcomes.

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/LogisticRegression/index.html b/0.19.0/api/linear-model/LogisticRegression/index.html new file mode 100644 index 0000000000..0b2daebf75 --- /dev/null +++ b/0.19.0/api/linear-model/LogisticRegression/index.html @@ -0,0 +1,3712 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LogisticRegression - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LogisticRegression

+

Logistic regression.

+

This estimator supports learning with mini-batches. On top of the single instance methods, it provides the following methods: learn_many, predict_many, predict_proba_many. Each method takes as input a pandas.DataFrame where each column represents a feature.

+

It is generally a good idea to scale the data beforehand in order for the optimizer to converge. You can do this online with a preprocessing.StandardScaler.

+

Parameters

+
    +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the weights. Note that the intercept is handled separately.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.BinaryLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for. Defaults to optim.losses.Log.

    +
  • +
  • +

    l2

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0. For now, only one type of penalty can be used. The joint use of L1 and L2 is not explicitly supported.

    +
  • +
  • +

    l1

    +

    Default0.0

    +

    Amount of L1 regularization used to push weights towards 0. For now, only one type of penalty can be used. The joint use of L1 and L2 is not explicitly supported.

    +
  • +
  • +

    intercept_init

    +

    Default0.0

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Typefloat | optim.base.Scheduler

    +

    Default0.01

    +

    Learning rate scheduler used for updating the intercept. A optim.schedulers.Constant is used if a float is provided. The intercept is not updated when this is set to 0.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    initializer

    +

    Typeoptim.base.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer=optim.SGD(.1))
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 88.96%
+

+

Methods

+
+learn_many +

Update the model with a mini-batch of features X and boolean targets y.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
  • w'float | pd.Series' — defaults to 1
  • +
+

Returns

+

MiniBatchClassifier: self

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • w — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted labels.

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +

Predict the outcome probabilities for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A dataframe with probabilities of True and False for each sample.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/PAClassifier/index.html b/0.19.0/api/linear-model/PAClassifier/index.html new file mode 100644 index 0000000000..5a110397c8 --- /dev/null +++ b/0.19.0/api/linear-model/PAClassifier/index.html @@ -0,0 +1,3652 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PAClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PAClassifier

+

Passive-aggressive learning for classification.

+

Parameters

+
    +
  • +

    C

    +

    Default1.0

    +
  • +
  • +

    mode

    +

    Default1

    +
  • +
  • +

    learn_intercept

    +

    DefaultTrue

    +
  • +
+

Examples

+

The following example is taken from this blog post.

+

from river import linear_model
+from river import metrics
+from river import stream
+import numpy as np
+from sklearn import datasets
+from sklearn import model_selection
+
+np.random.seed(1000)
+X, y = datasets.make_classification(
+    n_samples=5000,
+    n_features=4,
+    n_informative=2,
+    n_redundant=0,
+    n_repeated=0,
+    n_classes=2,
+    n_clusters_per_class=2
+)
+
+X_train, X_test, y_train, y_test = model_selection.train_test_split(
+    X,
+    y,
+    test_size=0.35,
+    random_state=1000
+)
+
+model = linear_model.PAClassifier(
+    C=0.01,
+    mode=1
+)
+
+for xi, yi in stream.iter_array(X_train, y_train):
+    y_pred = model.learn_one(xi, yi)
+
+metric = metrics.Accuracy() + metrics.LogLoss()
+
+for xi, yi in stream.iter_array(X_test, y_test):
+    metric = metric.update(yi, model.predict_proba_one(xi))
+
+print(metric)
+
+
Accuracy: 88.46%, LogLoss: 0.325727
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/PARegressor/index.html b/0.19.0/api/linear-model/PARegressor/index.html new file mode 100644 index 0000000000..59a0d900c1 --- /dev/null +++ b/0.19.0/api/linear-model/PARegressor/index.html @@ -0,0 +1,3628 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PARegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PARegressor

+

Passive-aggressive learning for regression.

+

Parameters

+
    +
  • +

    C

    +

    Default1.0

    +
  • +
  • +

    mode

    +

    Default1

    +
  • +
  • +

    eps

    +

    Default0.1

    +
  • +
  • +

    learn_intercept

    +

    DefaultTrue

    +
  • +
+

Examples

+

The following example is taken from this blog post.

+

from river import linear_model
+from river import metrics
+from river import stream
+import numpy as np
+from sklearn import datasets
+
+np.random.seed(1000)
+X, y = datasets.make_regression(n_samples=500, n_features=4)
+
+model = linear_model.PARegressor(
+    C=0.01,
+    mode=2,
+    eps=0.1,
+    learn_intercept=False
+)
+metric = metrics.MAE() + metrics.MSE()
+
+for xi, yi in stream.iter_array(X, y):
+    y_pred = model.predict_one(xi)
+    model = model.learn_one(xi, yi)
+    metric = metric.update(yi, y_pred)
+
+print(metric)
+
+
MAE: 9.809402, MSE: 472.393532
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/Perceptron/index.html b/0.19.0/api/linear-model/Perceptron/index.html new file mode 100644 index 0000000000..3751089df6 --- /dev/null +++ b/0.19.0/api/linear-model/Perceptron/index.html @@ -0,0 +1,3679 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Perceptron - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Perceptron

+

Perceptron classifier.

+

In this implementation, the Perceptron is viewed as a special case of the logistic regression. The loss function that is used is the Hinge loss with a threshold set to 0, whilst the learning rate of the stochastic gradient descent procedure is set to 1 for both the weights and the intercept.

+

Parameters

+
    +
  • +

    l2

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme.

    +
  • +
+

Attributes

+
    +
  • +

    weights

    +

    The current weights.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model as lm
+from river import metrics
+from river import preprocessing as pp
+
+dataset = datasets.Phishing()
+
+model = pp.StandardScaler() | lm.Perceptron()
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 85.84%
+

+

Methods

+
+learn_many +

Update the model with a mini-batch of features X and boolean targets y.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
  • w'float | pd.Series' — defaults to 1
  • +
+

Returns

+

MiniBatchClassifier: self

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • w — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted labels.

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +

Predict the outcome probabilities for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A dataframe with probabilities of True and False for each sample.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/SoftmaxRegression/index.html b/0.19.0/api/linear-model/SoftmaxRegression/index.html new file mode 100644 index 0000000000..87a0b44b8e --- /dev/null +++ b/0.19.0/api/linear-model/SoftmaxRegression/index.html @@ -0,0 +1,3654 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SoftmaxRegression - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SoftmaxRegression

+

Softmax regression is a generalization of logistic regression to multiple classes.

+

Softmax regression is also known as "multinomial logistic regression". There are a set weights for each class, hence the weights attribute is a nested collections.defaultdict. The main advantage of using this instead of a one-vs-all logistic regression is that the probabilities will be calibrated. Moreover softmax regression is more robust to outliers.

+

Parameters

+
    +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used to tune the weights.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.MultiClassLoss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    l2

    +

    Default0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
+

Attributes

+
    +
  • weights (collections.defaultdict)
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.ImageSegments()
+
+model = preprocessing.StandardScaler()
+model |= linear_model.SoftmaxRegression()
+
+metric = metrics.MacroF1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MacroF1: 81.88%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/linear-model/base/GLM/index.html b/0.19.0/api/linear-model/base/GLM/index.html new file mode 100644 index 0000000000..20fdf0e370 --- /dev/null +++ b/0.19.0/api/linear-model/base/GLM/index.html @@ -0,0 +1,3624 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GLM - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GLM

+

Generalized Linear Model.

+

This serves as a base class for linear and logistic regression.

+

Parameters

+
    +
  • +

    optimizer

    +

    The sequential optimizer used for updating the weights. Note that the intercept updates are handled separately.

    +
  • +
  • +

    loss

    +

    The loss function to optimize for.

    +
  • +
  • +

    l2

    +

    Amount of L2 regularization used to push weights towards 0. For now, only one type of penalty can be used. The joint use of L1 and L2 is not explicitly supported.

    +
  • +
  • +

    l1

    +

    Amount of L1 regularization used to push weights towards 0. For now, only one type of penalty can be used. The joint use of L1 and L2 is not explicitly supported.

    +
  • +
  • +

    intercept_init

    +

    Initial intercept value.

    +
  • +
  • +

    intercept_lr

    +

    Learning rate scheduler used for updating the intercept. A optim.schedulers.Constant is used if a float is provided. The intercept is not updated when this is set to 0.

    +
  • +
  • +

    clip_gradient

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    initializer

    +

    Weights initialization scheme.

    +
  • +
+

Attributes

+
    +
  • weights
  • +
+

Methods

+
+learn_many +
+
+learn_one +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Accuracy/index.html b/0.19.0/api/metrics/Accuracy/index.html new file mode 100644 index 0000000000..e22ba50ad2 --- /dev/null +++ b/0.19.0/api/metrics/Accuracy/index.html @@ -0,0 +1,4531 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Accuracy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Accuracy

+

Accuracy score, which is the percentage of exact matches.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [True, False, True, True, True]
+y_pred = [True, True, False, True, True]
+
+metric = metrics.Accuracy()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
Accuracy: 60.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/AdjustedMutualInfo/index.html b/0.19.0/api/metrics/AdjustedMutualInfo/index.html new file mode 100644 index 0000000000..23258f2f3f --- /dev/null +++ b/0.19.0/api/metrics/AdjustedMutualInfo/index.html @@ -0,0 +1,4557 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdjustedMutualInfo - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdjustedMutualInfo

+

Adjusted Mutual Information between two clusterings.

+

Adjusted Mutual Information (AMI) is an adjustment of the Mutual Information score that accounts for chance. It corrects the effect of agreement solely due to chance between clusterings, similar to the way the Adjusted Rand Index corrects the Rand Index. It is closely related to variation of information. The adjusted measure, however, is no longer metrical.

+

For two clusterings \(U\) and \(V\), the Adjusted Mutual Information is calculated as:

+
\[ AMI(U, V) = \frac{MI(U, V) - E(MI(U, V))}{avg(H(U), H(V)) - E(MI(U, V))} \]
+

This metric is independent of the permutation of the class or cluster label values; furthermore, it is also symmetric. This can be useful to measure the agreement of two label assignments strategies on the same dataset, regardless of the ground truth.

+

However, due to the complexity of the Expected Mutual Info Score, the computation of this metric is an order of magnitude slower than most other metrics, in general.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    average_method

    +

    Defaultarithmetic

    +

    This parameter defines how to compute the normalizer in the denominator. Possible options include min, max, arithmetic and geometric.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [1, 1, 2, 2, 3, 3]
+y_pred = [1, 1, 1, 2, 2, 2]
+
+metric = metrics.AdjustedMutualInfo()
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
1.0
+1.0
+0.0
+0.0
+0.105891
+0.298792
+

+

metric
+
+
AdjustedMutualInfo: 0.298792
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia contributors. (2021, March 17). Mutual information. + In Wikipedia, The Free Encyclopedia, + from https://en.wikipedia.org/w/index.php?title=Mutual_information&oldid=1012714929 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/AdjustedRand/index.html b/0.19.0/api/metrics/AdjustedRand/index.html new file mode 100644 index 0000000000..dca4673f1b --- /dev/null +++ b/0.19.0/api/metrics/AdjustedRand/index.html @@ -0,0 +1,4557 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdjustedRand - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdjustedRand

+

Adjusted Rand Index.

+

The Adjusted Rand Index is the corrected-for-chance version of the Rand Index 1 2. Such a correction for chance establishes a baseline by using the expected similarity of all pair-wise comparisions between clusterings specified by a random model.

+

Traditionally, the Rand Index was corrected using the Permutation Model for Clustering. However, the premises of the permutation model are frequently violated; in many clustering scenarios, either the number of clusters or the size distribution of those clusters vary drastically. Variations of the adjusted Rand Index account for different models of random clusterings.

+

Though the Rand Index may only yield a value between 0 and 1, the Adjusted Rand index can yield negative values if the index is less than the expected index.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 0, 0, 1, 1, 1]
+y_pred = [0, 0, 1, 1, 2, 2]
+
+metric = metrics.AdjustedRand()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
1.0
+1.0
+0.0
+0.0
+0.09090909090909091
+0.24242424242424243
+

+

metric
+
+
AdjustedRand: 0.242424
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia contributors. (2021, January 13). Rand index. + In Wikipedia, The Free Encyclopedia, + from https://en.wikipedia.org/w/index.php?title=Rand_index&oldid=1000098911 

    +
  2. +
  3. +

    W. M. Rand (1971). "Objective criteria for the evaluation of clustering methods". + Journal of the American Statistical Association. American Statistical Association. + 66 (336): 846–850. arXiv:1704.01036. doi:10.2307/2284239. JSTOR 2284239. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/BalancedAccuracy/index.html b/0.19.0/api/metrics/BalancedAccuracy/index.html new file mode 100644 index 0000000000..2408b77ac7 --- /dev/null +++ b/0.19.0/api/metrics/BalancedAccuracy/index.html @@ -0,0 +1,4541 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BalancedAccuracy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BalancedAccuracy

+

Balanced accuracy.

+

Balanced accuracy is the average of recall obtained on each class. It is used to deal with imbalanced datasets in binary and multi-class classification problems.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+y_true = [True, False, True, True, False, True]
+y_pred = [True, False, True, True, True, False]
+
+metric = metrics.BalancedAccuracy()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
BalancedAccuracy: 62.50%
+

+

y_true = [0, 1, 0, 0, 1, 0]
+y_pred = [0, 1, 0, 0, 0, 1]
+metric = metrics.BalancedAccuracy()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
BalancedAccuracy: 62.50%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/ClassificationReport/index.html b/0.19.0/api/metrics/ClassificationReport/index.html new file mode 100644 index 0000000000..c9d31d0b8d --- /dev/null +++ b/0.19.0/api/metrics/ClassificationReport/index.html @@ -0,0 +1,4548 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ClassificationReport - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ClassificationReport

+

A report for monitoring a classifier.

+

This class maintains a set of metrics and updates each of them every time update is called. You can print this class at any time during a model's lifetime to get a tabular visualization of various metrics.

+

You can wrap a metrics.ClassificationReport with utils.Rolling in order to obtain a classification report over a window of observations. You can also wrap it with utils.TimeRolling to obtain a report over a period of time.

+

Parameters

+
    +
  • +

    decimals

    +

    Default2

    +

    The number of decimals to display in each cell.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = ['pear', 'apple', 'banana', 'banana', 'banana']
+y_pred = ['apple', 'pear', 'banana', 'banana', 'apple']
+
+report = metrics.ClassificationReport()
+
+for yt, yp in zip(y_true, y_pred):
+    report = report.update(yt, yp)
+
+print(report)
+
+
               Precision   Recall   F1       Support
+<BLANKLINE>
+   apple       0.00%    0.00%    0.00%         1
+  banana     100.00%   66.67%   80.00%         3
+    pear       0.00%    0.00%    0.00%         1
+<BLANKLINE>
+   Macro      33.33%   22.22%   26.67%
+   Micro      40.00%   40.00%   40.00%
+Weighted      60.00%   40.00%   48.00%
+<BLANKLINE>
+                 40.00% accuracy
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/CohenKappa/index.html b/0.19.0/api/metrics/CohenKappa/index.html new file mode 100644 index 0000000000..7587523597 --- /dev/null +++ b/0.19.0/api/metrics/CohenKappa/index.html @@ -0,0 +1,4543 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CohenKappa - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

CohenKappa

+

Cohen's Kappa score.

+

Cohen's Kappa expresses the level of agreement between two annotators on a classification problem. It is defined as

+
\[ \kappa = (p_o - p_e) / (1 - p_e) \]
+

where \(p_o\) is the empirical probability of agreement on the label assigned to any sample (prequential accuracy), and \(p_e\) is the expected agreement when both annotators assign labels randomly.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird']
+y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat']
+
+metric = metrics.CohenKappa()
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
CohenKappa: 42.86%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    J. Cohen (1960). "A coefficient of agreement for nominal scales". Educational and Psychological Measurement 20(1):37-46. doi:10.1177/001316446002000104. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Completeness/index.html b/0.19.0/api/metrics/Completeness/index.html new file mode 100644 index 0000000000..df70e21f65 --- /dev/null +++ b/0.19.0/api/metrics/Completeness/index.html @@ -0,0 +1,4553 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Completeness - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Completeness

+

Completeness Score.

+

Completeness 1 is symmetrical to homogeneity. In order to satisfy the completeness criteria, a clustering must assign all of those datapoints that are members of a single class to a single cluster. To evaluate completeness, we examine the distribution cluster assignments within each class. In a perfectly complete clustering solution, each of these distributions will be completely skewed to a single cluster.

+

We can evaluate this degree of skew by calculating the conditional entropy of the proposed cluster distribution given the class of the component data points. However, in the worst case scenario, each class is represented by every cluster with a distribution equal to the distribution of cluster sizes. Therefore, symmetric to the claculation above, we define completeness as:

+
\[ c = \begin{cases} 1 if H(K) = 0, \\ 1 - \frac{H(K|C)}{H(K)} otherwise. \end{cases}. \]
+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [1, 1, 2, 2, 3, 3]
+y_pred = [1, 1, 1, 2, 2, 2]
+
+metric = metrics.Completeness()
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
1.0
+1.0
+1.0
+0.3836885465963443
+0.5880325916843805
+0.6666666666666667
+

+

metric
+
+
Completeness: 66.67%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Andrew Rosenberg and Julia Hirschberg (2007). + V-Measure: A conditional entropy-based external cluster evaluation measure. + Proceedings of the 2007 Joing Conference on Empirical Methods in Natural Language + Processing and Computational Natural Language Learning, pp. 410 - 420, + Prague, June 2007. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/ConfusionMatrix/index.html b/0.19.0/api/metrics/ConfusionMatrix/index.html new file mode 100644 index 0000000000..222cd51e07 --- /dev/null +++ b/0.19.0/api/metrics/ConfusionMatrix/index.html @@ -0,0 +1,4539 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ConfusionMatrix - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ConfusionMatrix

+

Confusion Matrix for binary and multi-class classification.

+

Parameters

+
    +
  • +

    classes

    +

    DefaultNone

    +

    The initial set of classes. This is optional and serves only for displaying purposes.

    +
  • +
+

Attributes

+
    +
  • +

    classes

    +
  • +
  • +

    total_false_negatives

    +
  • +
  • +

    total_false_positives

    +
  • +
  • +

    total_true_negatives

    +
  • +
  • +

    total_true_positives

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird']
+y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat']
+
+cm = metrics.ConfusionMatrix()
+
+for yt, yp in zip(y_true, y_pred):
+    cm = cm.update(yt, yp)
+
+cm
+
+
       ant  bird   cat
+ ant     2     0     0
+bird     0     0     1
+ cat     1     0     2
+

+

cm['bird']['cat']
+
+
1.0
+

+

Methods

+
+false_negatives +
+
+false_positives +
+
+revert +
+
+support +
+
+true_negatives +
+
+true_positives +
+
+update +
+

Notes

+

This confusion matrix is a 2D matrix of shape (n_classes, n_classes), corresponding +to a single-target (binary and multi-class) classification task.

+

Each row represents true (actual) class-labels, while each column corresponds +to the predicted class-labels. For example, an entry in position [1, 2] means +that the true class-label is 1, and the predicted class-label is 2 (incorrect prediction).

+

This structure is used to keep updated statistics about a single-output classifier's +performance and to compute multiple evaluation metrics.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/CrossEntropy/index.html b/0.19.0/api/metrics/CrossEntropy/index.html new file mode 100644 index 0000000000..dfc515f686 --- /dev/null +++ b/0.19.0/api/metrics/CrossEntropy/index.html @@ -0,0 +1,4520 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CrossEntropy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

CrossEntropy

+

Multiclass generalization of the logarithmic loss.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2]
+y_pred = [
+    {0: 0.29450637, 1: 0.34216758, 2: 0.36332605},
+    {0: 0.21290077, 1: 0.32728332, 2: 0.45981591},
+    {0: 0.42860913, 1: 0.33380113, 2: 0.23758974},
+    {0: 0.44941979, 1: 0.32962558, 2: 0.22095463}
+]
+
+metric = metrics.CrossEntropy()
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+    print(metric.get())
+
+
1.222454
+1.169691
+1.258864
+1.321597
+

+

metric
+
+
CrossEntropy: 1.321598
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/F1/index.html b/0.19.0/api/metrics/F1/index.html new file mode 100644 index 0000000000..0589a15d59 --- /dev/null +++ b/0.19.0/api/metrics/F1/index.html @@ -0,0 +1,4536 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + F1 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

F1

+

Binary F1 score.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [False, False, False, True, True, True]
+y_pred = [False, False, True, True, False, False]
+
+metric = metrics.F1()
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
F1: 40.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/FBeta/index.html b/0.19.0/api/metrics/FBeta/index.html new file mode 100644 index 0000000000..7057e18d99 --- /dev/null +++ b/0.19.0/api/metrics/FBeta/index.html @@ -0,0 +1,4535 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FBeta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FBeta

+

Binary F-Beta score.

+

The FBeta score is a weighted harmonic mean between precision and recall. The higher the beta value, the higher the recall will be taken into account. When beta equals 1, precision and recall and equivalently weighted, which results in the F1 score (see metrics.F1).

+

Parameters

+
    +
  • +

    beta

    +

    Typefloat

    +

    Weight of precision in the harmonic mean.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+ +

Examples

+

from river import metrics
+
+y_true = [False, False, False, True, True, True]
+y_pred = [False, False, True, True, False, False]
+
+metric = metrics.FBeta(beta=2)
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
FBeta: 35.71%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/FowlkesMallows/index.html b/0.19.0/api/metrics/FowlkesMallows/index.html new file mode 100644 index 0000000000..732a6974cb --- /dev/null +++ b/0.19.0/api/metrics/FowlkesMallows/index.html @@ -0,0 +1,4562 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FowlkesMallows - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FowlkesMallows

+

Fowlkes-Mallows Index.

+

The Fowlkes-Mallows Index 1 2 is an external evaluation method that is used to determine the similarity between two clusterings, and also a metric to measure confusion matrices. The measure of similarity could be either between two hierarchical clusterings or a clustering and a benchmark classification. A higher value for the Fowlkes-Mallows index indicates a greater similarity between the clusters and the benchmark classifications.

+

The Fowlkes-Mallows Index, for two cluster algorithms, is defined as:

+
\[ FM = \sqrt{PPV \times TPR} = \sqrt{\frac{TP}{TP+FP} \times \frac{TP}{TP+FN}} \]
+

where

+
    +
  • +

    TP, FP, FN are respectively the number of true positives, false positives and false negatives;

    +
  • +
  • +

    TPR is the True Positive Rate (or Sensitivity/Recall), PPV is the Positive Predictive Rate (or Precision).

    +
  • +
+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 0, 0, 1, 1, 1]
+y_pred = [0, 0, 1, 1, 2, 2]
+
+metric = metrics.FowlkesMallows()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
FowlkesMallows: 0.00%
+FowlkesMallows: 100.00%
+FowlkesMallows: 57.74%
+FowlkesMallows: 40.82%
+FowlkesMallows: 35.36%
+FowlkesMallows: 47.14%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia contributors. (2020, December 22). + Fowlkes–Mallows index. In Wikipedia, The Free Encyclopedia, + from https://en.wikipedia.org/w/index.php?title=Fowlkes%E2%80%93Mallows_index&oldid=995714222 

    +
  2. +
  3. +

    E. B. Fowkles and C. L. Mallows (1983). + “A method for comparing two hierarchical clusterings”. + Journal of the American Statistical Association 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/GeometricMean/index.html b/0.19.0/api/metrics/GeometricMean/index.html new file mode 100644 index 0000000000..1a39b88f32 --- /dev/null +++ b/0.19.0/api/metrics/GeometricMean/index.html @@ -0,0 +1,4543 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GeometricMean - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GeometricMean

+

Geometric mean score.

+

The geometric mean is a good indicator of a classifier's performance in the presence of class imbalance because it is independent of the distribution of examples between classes. This implementation computes the geometric mean of class-wise sensitivity (recall).

+
\[ gm = \sqrt[n]{s_1\cdot s_2\cdot s_3\cdot \ldots\cdot s_n} \]
+

where \(s_i\) is the sensitivity (recall) of class \(i\) and \(n\) is the number of classes.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird', 'bird']
+y_pred = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat', 'bird']
+
+metric = metrics.GeometricMean()
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
GeometricMean: 69.34%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Barandela, R. et al. “Strategies for learning in class imbalance problems”, Pattern Recognition, 36(3), (2003), pp 849-851. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Homogeneity/index.html b/0.19.0/api/metrics/Homogeneity/index.html new file mode 100644 index 0000000000..ff8f89e74c --- /dev/null +++ b/0.19.0/api/metrics/Homogeneity/index.html @@ -0,0 +1,4555 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Homogeneity - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Homogeneity

+

Homogeneity Score.

+

Homogeneity metric 1 of a cluster labeling given a ground truth.

+

In order to satisfy the homogeneity criteria, a clustering must assign only those data points that are members of a single class to a single cluster. That is, the class distribution within each cluster should be skewed to a single class, that is, zero entropy. We determine how close a given clustering is to this ideal by examining the conditional entropy of the class distribution given the proposed clustering.

+

However, in an imperfect situation, the size of this value is dependent on the size of the dataset and the distribution of class sizes. Therefore, instead of taking the raw conditional entropy, we normalize by the maximum reduction in entropy the clustering information could provide.

+

As such, we define homogeneity as:

+
\[ h = \begin{cases} 1 if H(C) = 0, \\ 1 - \frac{H(C|K)}{H(C)} otherwise. \end{cases}. \]
+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [1, 1, 2, 2, 3, 3]
+y_pred = [1, 1, 1, 2, 2, 2]
+
+metric = metrics.Homogeneity()
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
1.0
+1.0
+0.0
+0.311278
+0.37515
+0.42062
+

+

metric
+
+
Homogeneity: 42.06%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Andrew Rosenberg and Julia Hirschberg (2007). + V-Measure: A conditional entropy-based external cluster evaluation measure. + Proceedings of the 2007 Joing Conference on Empirical Methods in Natural Language + Processing and Computational Natural Language Learning, pp. 410 - 420, + Prague, June 2007. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Jaccard/index.html b/0.19.0/api/metrics/Jaccard/index.html new file mode 100644 index 0000000000..30fdb1a8c0 --- /dev/null +++ b/0.19.0/api/metrics/Jaccard/index.html @@ -0,0 +1,4544 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Jaccard - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Jaccard

+

Jaccard score.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [False, True, True]
+y_pred = [True, True, True]
+
+metric = metrics.Jaccard()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
Jaccard: 0.00%
+Jaccard: 50.00%
+Jaccard: 66.67%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Jaccard index 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/LogLoss/index.html b/0.19.0/api/metrics/LogLoss/index.html new file mode 100644 index 0000000000..b9e3cf1ab4 --- /dev/null +++ b/0.19.0/api/metrics/LogLoss/index.html @@ -0,0 +1,4514 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LogLoss - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LogLoss

+

Binary logarithmic loss.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [True, False, False, True]
+y_pred = [0.9,  0.1,   0.2,   0.65]
+
+metric = metrics.LogLoss()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+    print(metric.get())
+
+
0.105360
+0.105360
+0.144621
+0.216161
+

+

metric
+
+
LogLoss: 0.216162
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MAE/index.html b/0.19.0/api/metrics/MAE/index.html new file mode 100644 index 0000000000..e6b1c53c52 --- /dev/null +++ b/0.19.0/api/metrics/MAE/index.html @@ -0,0 +1,4510 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MAE - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MAE

+

Mean absolute error.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [3, -0.5, 2, 7]
+y_pred = [2.5, 0.0, 2, 8]
+
+metric = metrics.MAE()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
0.5
+0.5
+0.333
+0.5
+

+

metric
+
+
MAE: 0.5
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MAPE/index.html b/0.19.0/api/metrics/MAPE/index.html new file mode 100644 index 0000000000..23282c04c3 --- /dev/null +++ b/0.19.0/api/metrics/MAPE/index.html @@ -0,0 +1,4504 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MAPE - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MAPE

+

Mean absolute percentage error.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [3, -0.5, 2, 7]
+y_pred = [2.5, 0.0, 2, 8]
+
+metric = metrics.MAPE()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
MAPE: 32.738095
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MCC/index.html b/0.19.0/api/metrics/MCC/index.html new file mode 100644 index 0000000000..7c1ac4d9a6 --- /dev/null +++ b/0.19.0/api/metrics/MCC/index.html @@ -0,0 +1,4544 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MCC - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MCC

+

Matthews correlation coefficient.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [True, True, True, False]
+y_pred = [True, False, True, True]
+
+mcc = metrics.MCC()
+
+for yt, yp in zip(y_true, y_pred):
+    mcc = mcc.update(yt, yp)
+
+mcc
+
+
MCC: -0.333333
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia article 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MSE/index.html b/0.19.0/api/metrics/MSE/index.html new file mode 100644 index 0000000000..25b572147f --- /dev/null +++ b/0.19.0/api/metrics/MSE/index.html @@ -0,0 +1,4506 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MSE - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MSE

+

Mean squared error.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [3, -0.5, 2, 7]
+y_pred = [2.5, 0.0, 2, 8]
+
+metric = metrics.MSE()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
0.25
+0.25
+0.1666
+0.375
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MacroF1/index.html b/0.19.0/api/metrics/MacroF1/index.html new file mode 100644 index 0000000000..9c668e148e --- /dev/null +++ b/0.19.0/api/metrics/MacroF1/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MacroF1 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MacroF1

+

Macro-average F1 score.

+

This works by computing the F1 score per class, and then performs a global average.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MacroF1()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MacroF1: 100.00%
+MacroF1: 33.33%
+MacroF1: 55.56%
+MacroF1: 55.56%
+MacroF1: 48.89%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MacroFBeta/index.html b/0.19.0/api/metrics/MacroFBeta/index.html new file mode 100644 index 0000000000..7eb0142c69 --- /dev/null +++ b/0.19.0/api/metrics/MacroFBeta/index.html @@ -0,0 +1,4538 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MacroFBeta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MacroFBeta

+

Macro-average F-Beta score.

+

This works by computing the F-Beta score per class, and then performs a global average.

+

Parameters

+
    +
  • +

    beta

    +

    Weight of precision in harmonic mean.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MacroFBeta(beta=.8)
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MacroFBeta: 100.00%
+MacroFBeta: 31.06%
+MacroFBeta: 54.04%
+MacroFBeta: 54.04%
+MacroFBeta: 48.60%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MacroJaccard/index.html b/0.19.0/api/metrics/MacroJaccard/index.html new file mode 100644 index 0000000000..03da023005 --- /dev/null +++ b/0.19.0/api/metrics/MacroJaccard/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MacroJaccard - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MacroJaccard

+

Macro-average Jaccard score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MacroJaccard()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MacroJaccard: 100.00%
+MacroJaccard: 25.00%
+MacroJaccard: 50.00%
+MacroJaccard: 50.00%
+MacroJaccard: 38.89%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MacroPrecision/index.html b/0.19.0/api/metrics/MacroPrecision/index.html new file mode 100644 index 0000000000..d10575349b --- /dev/null +++ b/0.19.0/api/metrics/MacroPrecision/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MacroPrecision - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MacroPrecision

+

Macro-average precision score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MacroPrecision()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MacroPrecision: 100.00%
+MacroPrecision: 25.00%
+MacroPrecision: 50.00%
+MacroPrecision: 50.00%
+MacroPrecision: 50.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MacroRecall/index.html b/0.19.0/api/metrics/MacroRecall/index.html new file mode 100644 index 0000000000..cd038bf173 --- /dev/null +++ b/0.19.0/api/metrics/MacroRecall/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MacroRecall - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MacroRecall

+

Macro-average recall score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MacroRecall()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MacroRecall: 100.00%
+MacroRecall: 50.00%
+MacroRecall: 66.67%
+MacroRecall: 66.67%
+MacroRecall: 55.56%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MicroF1/index.html b/0.19.0/api/metrics/MicroF1/index.html new file mode 100644 index 0000000000..5db38aa777 --- /dev/null +++ b/0.19.0/api/metrics/MicroF1/index.html @@ -0,0 +1,4539 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MicroF1 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MicroF1

+

Micro-average F1 score.

+

This computes the F1 score by merging all the predictions and true labels, and then computes a global F1 score.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 0]
+y_pred = [0, 1, 1, 2, 1]
+
+metric = metrics.MicroF1()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
MicroF1: 60.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MicroFBeta/index.html b/0.19.0/api/metrics/MicroFBeta/index.html new file mode 100644 index 0000000000..6bb98bb542 --- /dev/null +++ b/0.19.0/api/metrics/MicroFBeta/index.html @@ -0,0 +1,4537 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MicroFBeta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MicroFBeta

+

Micro-average F-Beta score.

+

This computes the F-Beta score by merging all the predictions and true labels, and then computes a global F-Beta score.

+

Parameters

+
    +
  • +

    beta

    +

    Typefloat

    +

    Weight of precision in the harmonic mean.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 0]
+y_pred = [0, 1, 1, 2, 1]
+
+metric = metrics.MicroFBeta(beta=2)
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
MicroFBeta: 60.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+1. Why are precision, recall and F1 score equal when using micro averaging in a multi-class problem?

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MicroJaccard/index.html b/0.19.0/api/metrics/MicroJaccard/index.html new file mode 100644 index 0000000000..6c2080f92d --- /dev/null +++ b/0.19.0/api/metrics/MicroJaccard/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MicroJaccard - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MicroJaccard

+

Micro-average Jaccard score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MicroJaccard()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MicroJaccard: 100.00%
+MicroJaccard: 33.33%
+MicroJaccard: 50.00%
+MicroJaccard: 60.00%
+MicroJaccard: 42.86%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MicroPrecision/index.html b/0.19.0/api/metrics/MicroPrecision/index.html new file mode 100644 index 0000000000..76b34939c0 --- /dev/null +++ b/0.19.0/api/metrics/MicroPrecision/index.html @@ -0,0 +1,4543 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MicroPrecision - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MicroPrecision

+

Micro-average precision score.

+

The micro-average precision score is exactly equivalent to the micro-average recall as well as the micro-average F1 score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MicroPrecision()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MicroPrecision: 100.00%
+MicroPrecision: 50.00%
+MicroPrecision: 66.67%
+MicroPrecision: 75.00%
+MicroPrecision: 60.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MicroRecall/index.html b/0.19.0/api/metrics/MicroRecall/index.html new file mode 100644 index 0000000000..0e4f44689f --- /dev/null +++ b/0.19.0/api/metrics/MicroRecall/index.html @@ -0,0 +1,4543 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MicroRecall - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MicroRecall

+

Micro-average recall score.

+

The micro-average recall is exactly equivalent to the micro-average precision as well as the micro-average F1 score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MicroRecall()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MicroRecall: 100.00%
+MicroRecall: 50.00%
+MicroRecall: 66.67%
+MicroRecall: 75.00%
+MicroRecall: 60.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MultiFBeta/index.html b/0.19.0/api/metrics/MultiFBeta/index.html new file mode 100644 index 0000000000..7faf0d54d5 --- /dev/null +++ b/0.19.0/api/metrics/MultiFBeta/index.html @@ -0,0 +1,4545 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiFBeta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiFBeta

+

Multi-class F-Beta score with different betas per class.

+

The multiclass F-Beta score is the arithmetic average of the binary F-Beta scores of each class. The mean can be weighted by providing class weights.

+

Parameters

+
    +
  • +

    betas

    +

    Weight of precision in the harmonic mean of each class.

    +
  • +
  • +

    weights

    +

    Class weights. If not provided then uniform weights will be used.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.MultiFBeta(
+    betas={0: 0.25, 1: 1, 2: 4},
+    weights={0: 1, 1: 1, 2: 2}
+)
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
MultiFBeta: 100.00%
+MultiFBeta: 25.76%
+MultiFBeta: 62.88%
+MultiFBeta: 62.88%
+MultiFBeta: 46.88%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/MutualInfo/index.html b/0.19.0/api/metrics/MutualInfo/index.html new file mode 100644 index 0000000000..f98f1a82ed --- /dev/null +++ b/0.19.0/api/metrics/MutualInfo/index.html @@ -0,0 +1,4555 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MutualInfo - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MutualInfo

+

Mutual Information between two clusterings.

+

The Mutual Information 1 is a measure of the similarity between two labels of the same data. Where \(|U_i|\) is the number of samples in cluster \(U_i\) and \(|V_j|\) is the number of the samples in cluster \(V_j\), the Mutual Information between clusterings \(U\) and \(V\) can be calculated as:

+
\[ MI(U,V) = \sum_{i=1}^{|U|} \sum_{v=1}^{|V|} \frac{|U_i \cup V_j|}{N} \log \frac{N |U_i \cup V_j|}{|U_i| |V_j|} \]
+

This metric is independent of the absolute values of the labels: a permutation of the class or cluster label values won't change the score.

+

This metric is furthermore symmetric: switching y_true and y_pred will return the same score value. This can be useful to measure the agreement of two independent label assignments strategies on the same dataset when the real ground truth is not known.

+

The Mutual Information can be equivalently expressed as:

+
\[ MI(U,V) = H(U) - H(U | V) = H(V) - H(V | U) \]
+

where \(H(U)\) and \(H(V)\) are the marginal entropies, \(H(U | V)\) and \(H(V | U)\) are the conditional entropies.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [1, 1, 2, 2, 3, 3]
+y_pred = [1, 1, 1, 2, 2, 2]
+
+metric = metrics.MutualInfo()
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
0.0
+0.0
+0.0
+0.215761
+0.395752
+0.462098
+

+

metric
+
+
MutualInfo: 0.462098
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia contributors. (2021, March 17). Mutual information. + In Wikipedia, The Free Encyclopedia, + from https://en.wikipedia.org/w/index.php?title=Mutual_information&oldid=1012714929 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/NormalizedMutualInfo/index.html b/0.19.0/api/metrics/NormalizedMutualInfo/index.html new file mode 100644 index 0000000000..d4363aea08 --- /dev/null +++ b/0.19.0/api/metrics/NormalizedMutualInfo/index.html @@ -0,0 +1,4555 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NormalizedMutualInfo - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

NormalizedMutualInfo

+

Normalized Mutual Information between two clusterings.

+

Normalized Mutual Information (NMI) is a normalized version of the Mutual Information (MI) score to scale the results between the range of 0 (no mutual information) and 1 (perfectly mutual information). In the formula, the mutual information will be normalized by a generalized mean of the entropy of true and predicted labels, defined by the average_method.

+

We note that this measure is not adjusted for chance (i.e corrected the effect of result agreement solely due to chance); as a result, the Adjusted Mutual Info Score will mostly be preferred. However, this metric is still symmetric, which means that switching true and predicted labels will not alter the score value. This fact can be useful when the metric is used to measure the agreement between two indepedent label solutions on the same dataset, when the ground truth remains unknown.

+

Another advantage of the metric is that as it is based on the calculation of entropy-related measures, it is independent of the permutation of class/cluster labels.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    average_method

    +

    Defaultarithmetic

    +

    This parameter defines how to compute the normalizer in the denominator. Possible options include min, max, arithmetic and geometric.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [1, 1, 2, 2, 3, 3]
+y_pred = [1, 1, 1, 2, 2, 2]
+
+metric = metrics.NormalizedMutualInfo()
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
1.0
+1.0
+0.0
+0.343711
+0.458065
+0.515803
+

+

metric
+
+
NormalizedMutualInfo: 0.515804
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia contributors. (2021, March 17). Mutual information. + In Wikipedia, The Free Encyclopedia, + from https://en.wikipedia.org/w/index.php?title=Mutual_information&oldid=1012714929 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Precision/index.html b/0.19.0/api/metrics/Precision/index.html new file mode 100644 index 0000000000..200f872e11 --- /dev/null +++ b/0.19.0/api/metrics/Precision/index.html @@ -0,0 +1,4538 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Precision - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Precision

+

Binary precision score.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [True, False, True, True, True]
+y_pred = [True, True, False, True, True]
+
+metric = metrics.Precision()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
Precision: 100.00%
+Precision: 50.00%
+Precision: 50.00%
+Precision: 66.67%
+Precision: 75.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/R2/index.html b/0.19.0/api/metrics/R2/index.html new file mode 100644 index 0000000000..e008283649 --- /dev/null +++ b/0.19.0/api/metrics/R2/index.html @@ -0,0 +1,4517 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + R2 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

R2

+

Coefficient of determination (\(R^2\)) score

+

The coefficient of determination, denoted \(R^2\) or \(r^2\), is the proportion of the variance in the dependent variable that is predictable from the independent variable(s). 1

+

Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of \(y\), disregarding the input features, would get a \(R^2\) score of 0.0.

+

\(R^2\) is not defined when less than 2 samples have been observed. This implementation returns 0.0 in this case.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [3, -0.5, 2, 7]
+y_pred = [2.5, 0.0, 2, 8]
+
+metric = metrics.R2()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
0.0
+0.9183
+0.9230
+0.9486
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/RMSE/index.html b/0.19.0/api/metrics/RMSE/index.html new file mode 100644 index 0000000000..b6b05757e5 --- /dev/null +++ b/0.19.0/api/metrics/RMSE/index.html @@ -0,0 +1,4509 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RMSE - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RMSE

+

Root mean squared error.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [3, -0.5, 2, 7]
+y_pred = [2.5, 0.0, 2, 8]
+
+metric = metrics.RMSE()
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
0.5
+0.5
+0.408248
+0.612372
+

+

metric
+
+
RMSE: 0.612372
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/RMSLE/index.html b/0.19.0/api/metrics/RMSLE/index.html new file mode 100644 index 0000000000..e45270c004 --- /dev/null +++ b/0.19.0/api/metrics/RMSLE/index.html @@ -0,0 +1,4504 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RMSLE - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RMSLE

+

Root mean squared logarithmic error.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [3, -0.5, 2, 7]
+y_pred = [2.5, 0.0, 2, 8]
+
+metric = metrics.RMSLE()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
RMSLE: 0.357826
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/ROCAUC/index.html b/0.19.0/api/metrics/ROCAUC/index.html new file mode 100644 index 0000000000..1e83d53b32 --- /dev/null +++ b/0.19.0/api/metrics/ROCAUC/index.html @@ -0,0 +1,4548 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ROCAUC - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ROCAUC

+

Receiving Operating Characteristic Area Under the Curve.

+

This metric is an approximation of the true ROC AUC. Computing the true ROC AUC would require storing all the predictions and ground truths, which isn't desirable. The approximation error is not significant as long as the predicted probabilities are well calibrated. In any case, this metric can still be used to reliably compare models between each other.

+

Parameters

+
    +
  • +

    n_thresholds

    +

    Default10

    +

    The number of thresholds used for discretizing the ROC curve. A higher value will lead to more accurate results, but will also cost more time and memory.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [ 0,  0,   1,  1]
+y_pred = [.1, .4, .35, .8]
+
+metric = metrics.ROCAUC()
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
ROCAUC: 87.50%
+

+

The true ROC AUC is in fact 0.75. We can improve the accuracy by increasing the amount +of thresholds. This comes at the cost more computation time and more memory usage.

+

metric = metrics.ROCAUC(n_thresholds=20)
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
ROCAUC: 75.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Rand/index.html b/0.19.0/api/metrics/Rand/index.html new file mode 100644 index 0000000000..b291b41580 --- /dev/null +++ b/0.19.0/api/metrics/Rand/index.html @@ -0,0 +1,4564 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rand - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Rand

+

Rand Index.

+

The Rand Index 1 2 is a measure of the similarity between two data clusterings. Given a set of elements S and two partitions of S to compare, X and Y, define the following:

+
    +
  • +

    a, the number of pairs of elements in S that are in the same subset in X and in the same subset in Y

    +
  • +
  • +

    b, the number of pairs of elements in S that are in the different subset in X and in different subsets in Y

    +
  • +
  • +

    c, the number of pairs of elements in S that are in the same subset in X and in different subsets in Y

    +
  • +
  • +

    d, the number of pairs of elements in S that are in the different subset in X and in the same subset in Y

    +
  • +
+

The Rand index, R, is

+
\[ R = rac{a+b}{a+b+c+d} = rac{a+b}{ rac{n(n-1)}{2}}. \]
+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 0, 0, 1, 1, 1]
+y_pred = [0, 0, 1, 1, 2, 2]
+
+metric = metrics.Rand()
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
Rand: 0.666667
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Wikipedia contributors. (2021, January 13). Rand index. + In Wikipedia, The Free Encyclopedia, + from https://en.wikipedia.org/w/index.php?title=Rand_index&oldid=1000098911 

    +
  2. +
  3. +

    W. M. Rand (1971). "Objective criteria for the evaluation of clustering methods". + Journal of the American Statistical Association. American Statistical Association. + 66 (336): 846–850. arXiv:1704.01036. doi:10.2307/2284239. JSTOR 2284239. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Recall/index.html b/0.19.0/api/metrics/Recall/index.html new file mode 100644 index 0000000000..2ae4bfabd1 --- /dev/null +++ b/0.19.0/api/metrics/Recall/index.html @@ -0,0 +1,4538 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Recall - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Recall

+

Binary recall score.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [True, False, True, True, True]
+y_pred = [True, True, False, True, True]
+
+metric = metrics.Recall()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
Recall: 100.00%
+Recall: 100.00%
+Recall: 50.00%
+Recall: 66.67%
+Recall: 75.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/RollingROCAUC/index.html b/0.19.0/api/metrics/RollingROCAUC/index.html new file mode 100644 index 0000000000..27a6e2845d --- /dev/null +++ b/0.19.0/api/metrics/RollingROCAUC/index.html @@ -0,0 +1,4536 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingROCAUC - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingROCAUC

+

Rolling version of the Receiving Operating Characteristic Area Under the Curve.

+

The RollingROCAUC calculates the metric using the instances in its window of size S. It keeps a queue of the instances, when an instance is added and the queue length is equal to S, the last instance is removed. The metric has a tree with ordered instances, in order to calculate the AUC efficiently. It was implemented based on the algorithm presented in Brzezinski and Stefanowski, 2017.

+

The difference between this metric and the standard ROCAUC is that the latter calculates an approximation of the real metric considering all data from the beginning of the stream, while the RollingROCAUC calculates the exact value considering only the last S instances. This approach may be beneficial if it's necessary to evaluate the model's performance over time, since calculating the metric using the entire stream may hide the current performance of the classifier.

+

Parameters

+
    +
  • +

    window_size

    +

    Default1000

    +

    The max length of the window.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [ 0,  1,  0,  1,  0,  1,  0,  0,   1,  1]
+y_pred = [.3, .5, .5, .7, .1, .3, .1, .4, .35, .8]
+
+metric = metrics.RollingROCAUC(window_size=4)
+
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
RollingROCAUC: 75.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/SMAPE/index.html b/0.19.0/api/metrics/SMAPE/index.html new file mode 100644 index 0000000000..0e10e61935 --- /dev/null +++ b/0.19.0/api/metrics/SMAPE/index.html @@ -0,0 +1,4504 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SMAPE - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SMAPE

+

Symmetric mean absolute percentage error.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 0.07533, 0.07533, 0.07533, 0.07533, 0.07533, 0.07533, 0.0672, 0.0672]
+y_pred = [0, 0.102, 0.107, 0.047, 0.1, 0.032, 0.047, 0.108, 0.089]
+
+metric = metrics.SMAPE()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
SMAPE: 37.869392
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/Silhouette/index.html b/0.19.0/api/metrics/Silhouette/index.html new file mode 100644 index 0000000000..a86163bca5 --- /dev/null +++ b/0.19.0/api/metrics/Silhouette/index.html @@ -0,0 +1,4523 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Silhouette - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Silhouette

+

Silhouette coefficient 1, roughly speaking, is the ratio between cohesion and the average distances from the points to their second-closest centroid. It rewards the clustering algorithm where points are very close to their assigned centroids and far from any other centroids, that is, clustering results with good cohesion and good separation.

+

It rewards clusterings where points are very close to their assigned centroids and far from any other centroids, that is clusterings with good cohesion and good separation. 2

+

The definition of Silhouette coefficient for online clustering evaluation is different from that of batch learning. It does not store information and calculate pairwise distances between all points at the same time, since the practice is too expensive for an incremental metric.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicates if a high value is better than a low one or not.

    +
  • +
+

Examples

+

from river import cluster
+from river import stream
+from river import metrics
+
+X = [
+    [1, 2],
+    [1, 4],
+    [1, 0],
+    [4, 2],
+    [4, 4],
+    [4, 0],
+    [-2, 2],
+    [-2, 4],
+    [-2, 0]
+]
+
+k_means = cluster.KMeans(n_clusters=3, halflife=0.4, sigma=3, seed=0)
+metric = metrics.Silhouette()
+
+for x, _ in stream.iter_array(X):
+    k_means = k_means.learn_one(x)
+    y_pred = k_means.predict_one(x)
+    metric = metric.update(x, y_pred, k_means.centers)
+
+metric
+
+
Silhouette: 0.568058
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • x
  • +
  • y_pred
  • +
  • centers
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • x
  • +
  • y_pred
  • +
  • centers
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Rousseeuw, P. (1987). Silhouettes: a graphical aid to the intepretation and validation + of cluster analysis 20, 53 - 65. DOI: 10.1016/0377-0427(87)90125-7 

    +
  2. +
  3. +

    Bifet, A. et al. (2018). "Machine Learning for Data Streams". + DOI: 10.7551/mitpress/10654.001.0001. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/VBeta/index.html b/0.19.0/api/metrics/VBeta/index.html new file mode 100644 index 0000000000..97d326d8af --- /dev/null +++ b/0.19.0/api/metrics/VBeta/index.html @@ -0,0 +1,4569 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VBeta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

VBeta

+

VBeta.

+

VBeta (or V-Measure) 1 is an external entropy-based cluster evaluation measure. It provides an elegant solution to many problems that affect previously defined cluster evaluation measures including

+
    +
  • +

    Dependance of clustering algorithm or dataset,

    +
  • +
  • +

    The "problem of matching", where the clustering of only a portion of data points are evaluated, and

    +
  • +
  • +

    Accurate evaluation and combination of two desirable aspects of clustering, homogeneity and completeness.

    +
  • +
+

Based upon the calculations of homogeneity and completeness, a clustering solution's V-measure is calculated by computing the weighted harmonic mean of homogeneity and completeness,

+
\[ V_{\beta} = \frac{(1 + \beta) \times h \times c}{\beta \times h + c}. \]
+

Parameters

+
    +
  • +

    beta

    +

    Typefloat

    +

    Default1.0

    +

    Weight of Homogeneity in the harmonic mean.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [1, 1, 2, 2, 3, 3]
+y_pred = [1, 1, 1, 2, 2, 2]
+
+metric = metrics.VBeta(beta=1.0)
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp).get())
+
+
1.0
+1.0
+0.0
+0.3437110184854507
+0.4580652856440158
+0.5158037429793888
+

+

metric
+
+
VBeta: 51.58%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+
+
+
    +
  1. +

    Andrew Rosenberg and Julia Hirschberg (2007). + V-Measure: A conditional entropy-based external cluster evaluation measure. + Proceedings of the 2007 Joing Conference on Empirical Methods in Natural Language + Processing and Computational Natural Language Learning, pp. 410 - 420, + Prague, June 2007. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/WeightedF1/index.html b/0.19.0/api/metrics/WeightedF1/index.html new file mode 100644 index 0000000000..d72b683ad6 --- /dev/null +++ b/0.19.0/api/metrics/WeightedF1/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WeightedF1 - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WeightedF1

+

Weighted-average F1 score.

+

This works by computing the F1 score per class, and then performs a global weighted average by using the support of each class.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.WeightedF1()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
WeightedF1: 100.00%
+WeightedF1: 33.33%
+WeightedF1: 55.56%
+WeightedF1: 66.67%
+WeightedF1: 61.33%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/WeightedFBeta/index.html b/0.19.0/api/metrics/WeightedFBeta/index.html new file mode 100644 index 0000000000..797e6dce06 --- /dev/null +++ b/0.19.0/api/metrics/WeightedFBeta/index.html @@ -0,0 +1,4538 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WeightedFBeta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WeightedFBeta

+

Weighted-average F-Beta score.

+

This works by computing the F-Beta score per class, and then performs a global weighted average according to the support of each class.

+

Parameters

+
    +
  • +

    beta

    +

    Weight of precision in the harmonic mean.

    +
  • +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.WeightedFBeta(beta=0.8)
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
WeightedFBeta: 100.00%
+WeightedFBeta: 31.06%
+WeightedFBeta: 54.04%
+WeightedFBeta: 65.53%
+WeightedFBeta: 62.63%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/WeightedJaccard/index.html b/0.19.0/api/metrics/WeightedJaccard/index.html new file mode 100644 index 0000000000..bb7c27fc58 --- /dev/null +++ b/0.19.0/api/metrics/WeightedJaccard/index.html @@ -0,0 +1,4534 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WeightedJaccard - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WeightedJaccard

+

Weighted average Jaccard score.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.WeightedJaccard()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
WeightedJaccard: 100.00%
+WeightedJaccard: 25.00%
+WeightedJaccard: 50.00%
+WeightedJaccard: 62.50%
+WeightedJaccard: 50.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/WeightedPrecision/index.html b/0.19.0/api/metrics/WeightedPrecision/index.html new file mode 100644 index 0000000000..6dfad7b623 --- /dev/null +++ b/0.19.0/api/metrics/WeightedPrecision/index.html @@ -0,0 +1,4535 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WeightedPrecision - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WeightedPrecision

+

Weighted-average precision score.

+

This uses the support of each label to compute an average score, whereas metrics.MacroPrecision ignores the support.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.WeightedPrecision()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
WeightedPrecision: 100.00%
+WeightedPrecision: 25.00%
+WeightedPrecision: 50.00%
+WeightedPrecision: 62.50%
+WeightedPrecision: 70.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/WeightedRecall/index.html b/0.19.0/api/metrics/WeightedRecall/index.html new file mode 100644 index 0000000000..7a7a589661 --- /dev/null +++ b/0.19.0/api/metrics/WeightedRecall/index.html @@ -0,0 +1,4535 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WeightedRecall - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WeightedRecall

+

Weighted-average recall score.

+

This uses the support of each label to compute an average score, whereas MacroRecall ignores the support.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [0, 1, 2, 2, 2]
+y_pred = [0, 0, 2, 2, 1]
+
+metric = metrics.WeightedRecall()
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
WeightedRecall: 100.00%
+WeightedRecall: 50.00%
+WeightedRecall: 66.67%
+WeightedRecall: 75.00%
+WeightedRecall: 60.00%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/BinaryMetric/index.html b/0.19.0/api/metrics/base/BinaryMetric/index.html new file mode 100644 index 0000000000..cb7a5c30fb --- /dev/null +++ b/0.19.0/api/metrics/base/BinaryMetric/index.html @@ -0,0 +1,4655 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryMetric

+

Mother class for all binary classification metrics.

+

Parameters

+
    +
  • +

    cm

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
  • +

    pos_val

    +

    DefaultTrue

    +

    Value to treat as "positive".

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'bool'
  • +
  • y_pred'bool | float | dict[bool, float]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/ClassificationMetric/index.html b/0.19.0/api/metrics/base/ClassificationMetric/index.html new file mode 100644 index 0000000000..f3276ad4fd --- /dev/null +++ b/0.19.0/api/metrics/base/ClassificationMetric/index.html @@ -0,0 +1,4651 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ClassificationMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ClassificationMetric

+

Mother class for all classification metrics.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/Metric/index.html b/0.19.0/api/metrics/base/Metric/index.html new file mode 100644 index 0000000000..2df9d58626 --- /dev/null +++ b/0.19.0/api/metrics/base/Metric/index.html @@ -0,0 +1,4622 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Metric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Metric

+

Mother class for all metrics.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/Metrics/index.html b/0.19.0/api/metrics/base/Metrics/index.html new file mode 100644 index 0000000000..eac03f66db --- /dev/null +++ b/0.19.0/api/metrics/base/Metrics/index.html @@ -0,0 +1,4635 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Metrics - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Metrics

+

A container class for handling multiple metrics at once.

+

Parameters

+
    +
  • +

    metrics

    +
  • +
  • +

    str_sep

    +

    Default,

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/MultiClassMetric/index.html b/0.19.0/api/metrics/base/MultiClassMetric/index.html new file mode 100644 index 0000000000..410a1e5e1d --- /dev/null +++ b/0.19.0/api/metrics/base/MultiClassMetric/index.html @@ -0,0 +1,4651 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiClassMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiClassMetric

+

Mother class for all multi-class classification metrics.

+

Parameters

+
    +
  • +

    cm

    +

    Typeconfusion.ConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +

    Indicates if labels are required, rather than probabilities.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/RegressionMetric/index.html b/0.19.0/api/metrics/base/RegressionMetric/index.html new file mode 100644 index 0000000000..fe9c2f2ee8 --- /dev/null +++ b/0.19.0/api/metrics/base/RegressionMetric/index.html @@ -0,0 +1,4622 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegressionMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RegressionMetric

+

Mother class for all regression metrics.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'numbers.Number'
  • +
  • y_pred'numbers.Number'
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/base/WrapperMetric/index.html b/0.19.0/api/metrics/base/WrapperMetric/index.html new file mode 100644 index 0000000000..811afb3c93 --- /dev/null +++ b/0.19.0/api/metrics/base/WrapperMetric/index.html @@ -0,0 +1,4628 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + WrapperMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

WrapperMetric

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    metric

    +

    Gives access to the wrapped metric.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/ExactMatch/index.html b/0.19.0/api/metrics/multioutput/ExactMatch/index.html new file mode 100644 index 0000000000..d4afbd4fcf --- /dev/null +++ b/0.19.0/api/metrics/multioutput/ExactMatch/index.html @@ -0,0 +1,4687 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ExactMatch - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ExactMatch

+

Exact match score.

+

This is the most strict multi-label metric, defined as the number of samples that have all their labels correctly classified, divided by the total number of samples.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [
+    {0: False, 1: True, 2: True},
+    {0: True, 1: True, 2: False},
+    {0: True, 1: True, 2: False},
+]
+
+y_pred = [
+    {0: True, 1: True, 2: True},
+    {0: True, 1: False, 2: False},
+    {0: True, 1: True, 2: False},
+]
+
+metric = metrics.multioutput.ExactMatch()
+for yt, yp in zip(y_true, y_pred):
+    metric = metric.update(yt, yp)
+
+metric
+
+
ExactMatch: 33.33%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'dict[str | int, base.typing.ClfTarget]'
  • +
  • y_pred'dict[str | int, base.typing.ClfTarget] | dict[str | int, dict[base.typing.ClfTarget, float]]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'dict[str | int, base.typing.ClfTarget]'
  • +
  • y_pred'dict[str | int, base.typing.ClfTarget] | dict[str | int, dict[base.typing.ClfTarget, float]]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+
    +
  • model
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/MacroAverage/index.html b/0.19.0/api/metrics/multioutput/MacroAverage/index.html new file mode 100644 index 0000000000..9cd3f1c4d4 --- /dev/null +++ b/0.19.0/api/metrics/multioutput/MacroAverage/index.html @@ -0,0 +1,4675 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MacroAverage - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MacroAverage

+

Macro-average wrapper.

+

A copy of the provided metric is made for each output. The arithmetic average of all the metrics is returned.

+

Parameters

+
    +
  • +

    metric

    +

    A classification or a regression metric.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    metric

    +

    Gives access to the wrapped metric.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/MicroAverage/index.html b/0.19.0/api/metrics/multioutput/MicroAverage/index.html new file mode 100644 index 0000000000..321616ec22 --- /dev/null +++ b/0.19.0/api/metrics/multioutput/MicroAverage/index.html @@ -0,0 +1,4675 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MicroAverage - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MicroAverage

+

Micro-average wrapper.

+

The provided metric is updated with the value of each output.

+

Parameters

+
    +
  • +

    metric

    +

    A classification or a regression metric.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    metric

    +

    Gives access to the wrapped metric.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/MultiLabelConfusionMatrix/index.html b/0.19.0/api/metrics/multioutput/MultiLabelConfusionMatrix/index.html new file mode 100644 index 0000000000..7d7f4fa453 --- /dev/null +++ b/0.19.0/api/metrics/multioutput/MultiLabelConfusionMatrix/index.html @@ -0,0 +1,4632 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiLabelConfusionMatrix - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiLabelConfusionMatrix

+

Multi-label confusion matrix.

+

Under the hood, this stores one metrics.ConfusionMatrix for each output.

+

Examples

+

from river import metrics
+
+cm = metrics.multioutput.MultiLabelConfusionMatrix()
+
+y_true = [
+    {0: False, 1: True, 2: True},
+    {0: True, 1: True, 2: False}
+]
+
+y_pred = [
+    {0: True, 1: True, 2: True},
+    {0: True, 1: False, 2: False}
+]
+
+for yt, yp in zip(y_true, y_pred):
+    cm = cm.update(yt, yp)
+
+cm
+
+
0
+            False   True
+    False       0      1
+     True       0      1
+<BLANKLINE>
+1
+            False   True
+    False       0      0
+     True       1      1
+<BLANKLINE>
+2
+            False   True
+    False       1      0
+     True       0      1
+

+

Methods

+
+revert +
+
+update +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/PerOutput/index.html b/0.19.0/api/metrics/multioutput/PerOutput/index.html new file mode 100644 index 0000000000..bc55c33387 --- /dev/null +++ b/0.19.0/api/metrics/multioutput/PerOutput/index.html @@ -0,0 +1,4675 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PerOutput - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PerOutput

+

Per-output wrapper.

+

A copy of the metric is maintained for each output.

+

Parameters

+
    +
  • +

    metric

    +

    A classification or a regression metric.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    metric

    +

    Gives access to the wrapped metric.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/SampleAverage/index.html b/0.19.0/api/metrics/multioutput/SampleAverage/index.html new file mode 100644 index 0000000000..9edad8e1a1 --- /dev/null +++ b/0.19.0/api/metrics/multioutput/SampleAverage/index.html @@ -0,0 +1,4709 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SampleAverage - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SampleAverage

+

Sample-average wrapper.

+

The provided metric is evaluate on each sample. The arithmetic average over all the samples is returned. This is equivalent to using average='samples' in scikit-learn.

+

Parameters

+
    +
  • +

    metric

    +

    A classification or a regression metric.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    metric

    +

    Gives access to the wrapped metric.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Examples

+

from river import metrics
+
+y_true = [
+    {0: False, 1: True, 2: True},
+    {0: True, 1: True, 2: False}
+]
+y_pred = [
+    {0: True, 1: True, 2: True},
+    {0: True, 1: False, 2: False}
+]
+
+sample_jaccard = metrics.multioutput.SampleAverage(metrics.Jaccard())
+
+for yt, yp in zip(y_true, y_pred):
+    sample_jaccard = sample_jaccard.update(yt, yp)
+sample_jaccard
+
+
SampleAverage(Jaccard): 58.33%
+

+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/base/MultiOutputClassificationMetric/index.html b/0.19.0/api/metrics/multioutput/base/MultiOutputClassificationMetric/index.html new file mode 100644 index 0000000000..f5a548c6cf --- /dev/null +++ b/0.19.0/api/metrics/multioutput/base/MultiOutputClassificationMetric/index.html @@ -0,0 +1,4720 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiOutputClassificationMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiOutputClassificationMetric

+

Mother class for all multi-output classification metrics.

+

Parameters

+
    +
  • +

    cm

    +

    TypeMultiLabelConfusionMatrix | None

    +

    DefaultNone

    +

    This parameter allows sharing the same confusion matrix between multiple metrics. Sharing a confusion matrix reduces the amount of storage and computation time.

    +
  • +
+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    requires_labels

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'dict[str | int, base.typing.ClfTarget]'
  • +
  • y_pred'dict[str | int, base.typing.ClfTarget] | dict[str | int, dict[base.typing.ClfTarget, float]]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'dict[str | int, base.typing.ClfTarget]'
  • +
  • y_pred'dict[str | int, base.typing.ClfTarget] | dict[str | int, dict[base.typing.ClfTarget, float]]'
  • +
  • sample_weight — defaults to 1.0
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/metrics/multioutput/base/MultiOutputRegressionMetric/index.html b/0.19.0/api/metrics/multioutput/base/MultiOutputRegressionMetric/index.html new file mode 100644 index 0000000000..0f19c7b5a9 --- /dev/null +++ b/0.19.0/api/metrics/multioutput/base/MultiOutputRegressionMetric/index.html @@ -0,0 +1,4692 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiOutputRegressionMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiOutputRegressionMetric

+

Mother class for all multi-output regression metrics.

+

Attributes

+
    +
  • +

    bigger_is_better

    +

    Indicate if a high value is better than a low one or not.

    +
  • +
  • +

    works_with_weights

    +

    Indicate whether the model takes into consideration the effect of sample weights

    +
  • +
+

Methods

+
+get +

Return the current value of the metric.

+
+

+
+is_better_than +

Indicate if the current metric is better than another one.

+

Parameters

+
    +
  • other
  • +
+
+

+
+revert +

Revert the metric.

+

Parameters

+
    +
  • y_true'dict[str | int, float | int]'
  • +
  • y_pred'dict[str | int, float | int]'
  • +
+
+

+
+update +

Update the metric.

+

Parameters

+
    +
  • y_true'dict[str | int, float | int]'
  • +
  • y_pred'dict[str | int, float | int]'
  • +
+
+

+
+works_with +

Indicates whether or not a metric can work with a given model.

+

Parameters

+ +
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/misc/SDFT/index.html b/0.19.0/api/misc/SDFT/index.html new file mode 100644 index 0000000000..59d4be56ff --- /dev/null +++ b/0.19.0/api/misc/SDFT/index.html @@ -0,0 +1,3445 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SDFT - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SDFT

+

Sliding Discrete Fourier Transform (SDFT).

+

Initially, the coefficients are all equal to 0, up until enough values have been seen. A call to numpy.fft.fft is triggered once window_size values have been seen. Subsequent values will update the coefficients online. This is much faster than recomputing an FFT from scratch for every new value.

+

Parameters

+
    +
  • +

    window_size

    +

    The size of the window.

    +
  • +
+

Attributes

+
    +
  • window_size
  • +
+

Examples

+
import numpy as np
+from river import misc
+
+X = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+
+window_size = 5
+sdft = misc.SDFT(window_size)
+
+for i, x in enumerate(X):
+    sdft = sdft.update(x)
+
+    if i + 1 >= window_size:
+        assert np.allclose(sdft.coefficients, np.fft.fft(X[i+1 - window_size:i+1]))
+
+

Methods

+
+update +
+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/misc/Skyline/index.html b/0.19.0/api/misc/Skyline/index.html new file mode 100644 index 0000000000..d9144c7124 --- /dev/null +++ b/0.19.0/api/misc/Skyline/index.html @@ -0,0 +1,3524 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skyline - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Skyline

+

A skyline is set of points which is not dominated by any other point.

+

This implementation uses a block nested loop. Identical observations are all part of the skyline if applicable.

+

Parameters

+
    +
  • +

    minimize

    +

    Typelist | None

    +

    DefaultNone

    +

    A list of features for which the values need to be minimized. Can be omitted as long as maximize is specified.

    +
  • +
  • +

    maximize

    +

    Typelist | None

    +

    DefaultNone

    +

    A list of features for which the values need to be maximized. Can be omitted as long as minimize is specified.

    +
  • +
+

Examples

+

Here is an example taken from this blog post.

+

import random
+from river import misc
+
+city_prices = {
+    'Bordeaux': 4045,
+    'Lyon': 4547,
+    'Toulouse': 3278
+}
+
+def random_house():
+    city = random.choice(['Bordeaux', 'Lyon', 'Toulouse'])
+    size = round(random.gauss(200, 50))
+    price = round(random.uniform(0.8, 1.2) * city_prices[city] * size)
+    return {'city': city, 'size': size, 'price': price}
+
+skyline = misc.Skyline(minimize=['price'], maximize=['size'])
+
+random.seed(42)
+
+for _ in range(100):
+    house = random_house()
+    skyline = skyline.update(house)
+
+print(len(skyline))
+
+
13
+

+

print(skyline[0])
+
+
{'city': 'Toulouse', 'size': 280, 'price': 763202}
+

+

Here is another example using the kart data from Mario Kart: Double Dash!!.

+

import collections
+from river import misc
+
+Kart = collections.namedtuple(
+     'Kart',
+     'name speed off_road acceleration weight turbo'
+)
+
+karts = [
+    Kart('Red Fire', 5, 4, 4, 5, 2),
+    Kart('Green Fire', 7, 3, 3, 4, 2),
+    Kart('Heart Coach', 4, 6, 6, 5, 2),
+    Kart('Bloom Coach', 6, 4, 5, 3, 2),
+    Kart('Turbo Yoshi', 4, 5, 6, 6, 2),
+    Kart('Turbo Birdo', 6, 4, 4, 7, 2),
+    Kart('Goo-Goo Buggy', 1, 9, 9, 2, 3),
+    Kart('Rattle Buggy', 2, 9, 8, 2, 3),
+    Kart('Toad Kart', 3, 9, 7, 2, 3),
+    Kart('Toadette Kart', 1, 9, 9, 2, 3),
+    Kart('Koopa Dasher', 2, 8, 8, 3, 3),
+    Kart('Para-Wing', 1, 8, 9, 3, 3),
+    Kart('DK Jumbo', 8, 2, 2, 8, 1),
+    Kart('Barrel Train', 8, 7, 3, 5, 3),
+    Kart('Koopa King', 9, 1, 1, 9, 1),
+    Kart('Bullet Blaster', 8, 1, 4, 1, 3),
+    Kart('Wario Car', 7, 3, 3, 7, 1),
+    Kart('Waluigi Racer', 5, 9, 5, 6, 2),
+    Kart('Piranha Pipes', 8, 7, 2, 9, 1),
+    Kart('Boo Pipes', 2, 9, 8, 9, 1),
+    Kart('Parade Kart', 7, 3, 4, 7, 3)
+]
+
+skyline = misc.Skyline(
+    maximize=['speed', 'off_road', 'acceleration', 'turbo'],
+    minimize=['weight']
+)
+
+for kart in karts:
+    skyline = skyline.update(kart._asdict())
+
+best_cart_names = [kart['name'] for kart in skyline]
+for name in best_cart_names:
+    print(f'- {name}')
+
+
- Green Fire
+- Heart Coach
+- Bloom Coach
+- Goo-Goo Buggy
+- Rattle Buggy
+- Toad Kart
+- Toadette Kart
+- Barrel Train
+- Koopa King
+- Bullet Blaster
+- Waluigi Racer
+- Parade Kart
+

+

for name in sorted(set(kart.name for kart in karts) - set(best_cart_names)):
+    print(f'- {name}')
+
+
- Boo Pipes
+- DK Jumbo
+- Koopa Dasher
+- Para-Wing
+- Piranha Pipes
+- Red Fire
+- Turbo Birdo
+- Turbo Yoshi
+- Wario Car
+

+

Methods

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/BanditClassifier/index.html b/0.19.0/api/model-selection/BanditClassifier/index.html new file mode 100644 index 0000000000..5d40fd1baf --- /dev/null +++ b/0.19.0/api/model-selection/BanditClassifier/index.html @@ -0,0 +1,3603 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BanditClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BanditClassifier

+

Bandit-based model selection for classification.

+

Each model is associated with an arm. At each learn_one call, the policy decides which arm/model to pull. The reward is the performance of the model on the provided sample. The predict_one and predict_proba_one methods use the current best model.

+

Parameters

+ +

Attributes

+
    +
  • +

    best_model

    +
  • +
  • +

    models

    +
  • +
+

Examples

+

from river import bandit
+from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import model_selection
+from river import optim
+from river import preprocessing
+
+models = [
+    linear_model.LogisticRegression(optimizer=optim.SGD(lr=lr))
+    for lr in [0.0001, 0.001, 1e-05, 0.01]
+]
+
+dataset = datasets.Phishing()
+model = (
+    preprocessing.StandardScaler() |
+    model_selection.BanditClassifier(
+        models,
+        metric=metrics.Accuracy(),
+        policy=bandit.EpsilonGreedy(
+            epsilon=0.1,
+            decay=0.001,
+            burn_in=20,
+            seed=42
+        )
+    )
+)
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 88.96%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/BanditRegressor/index.html b/0.19.0/api/model-selection/BanditRegressor/index.html new file mode 100644 index 0000000000..137a28f67c --- /dev/null +++ b/0.19.0/api/model-selection/BanditRegressor/index.html @@ -0,0 +1,3617 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BanditRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BanditRegressor

+

Bandit-based model selection for regression.

+

Each model is associated with an arm. At each learn_one call, the policy decides which arm/model to pull. The reward is the performance of the model on the provided sample. The predict_one method uses the current best model.

+

Parameters

+ +

Attributes

+
    +
  • +

    best_model

    +
  • +
  • +

    models

    +
  • +
+

Examples

+

from river import bandit
+from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import model_selection
+from river import optim
+from river import preprocessing
+
+models = [
+    linear_model.LinearRegression(optimizer=optim.SGD(lr=lr))
+    for lr in [0.0001, 0.001, 1e-05, 0.01]
+]
+
+dataset = datasets.TrumpApproval()
+model = (
+    preprocessing.StandardScaler() |
+    model_selection.BanditRegressor(
+        models,
+        metric=metrics.MAE(),
+        policy=bandit.EpsilonGreedy(
+            epsilon=0.1,
+            decay=0.001,
+            burn_in=100,
+            seed=42
+        )
+    )
+)
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 3.134089
+

+

Here's another example using the UCB policy. The latter is more sensitive to the target scale, +and usually works better when the target is rescaled.

+

models = [
+    linear_model.LinearRegression(optimizer=optim.SGD(lr=lr))
+    for lr in [0.0001, 0.001, 1e-05, 0.01]
+]
+
+model = (
+    preprocessing.StandardScaler() |
+    preprocessing.TargetStandardScaler(
+        model_selection.BanditRegressor(
+            models,
+            metric=metrics.MAE(),
+            policy=bandit.UCB(
+                delta=1,
+                burn_in=100
+            )
+        )
+    )
+)
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.875333
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/GreedyRegressor/index.html b/0.19.0/api/model-selection/GreedyRegressor/index.html new file mode 100644 index 0000000000..452f893ad6 --- /dev/null +++ b/0.19.0/api/model-selection/GreedyRegressor/index.html @@ -0,0 +1,3579 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GreedyRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GreedyRegressor

+

Greedy selection regressor.

+

This selection method simply updates each model at each time step. The current best model is used to make predictions. It's greedy in the sense that updating each model can be costly. On the other hand, bandit-like algorithms are more temperate in that only update a subset of the models at each step.

+

Parameters

+
    +
  • +

    models

    +

    Typelist[base.Regressor]

    +

    The models to select from.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.RegressionMetric | None

    +

    DefaultNone

    +

    The metric that is used to measure the performance of each model.

    +
  • +
+

Attributes

+
    +
  • +

    best_model

    +

    The current best model.

    +
  • +
  • +

    models

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import model_selection
+from river import optim
+from river import preprocessing
+
+models = [
+    linear_model.LinearRegression(optimizer=optim.SGD(lr=lr))
+    for lr in [1e-5, 1e-4, 1e-3, 1e-2]
+]
+
+dataset = datasets.TrumpApproval()
+metric = metrics.MAE()
+model = (
+    preprocessing.StandardScaler() |
+    model_selection.GreedyRegressor(models, metric)
+)
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 1.319678
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/SuccessiveHalvingClassifier/index.html b/0.19.0/api/model-selection/SuccessiveHalvingClassifier/index.html new file mode 100644 index 0000000000..9161fbb29b --- /dev/null +++ b/0.19.0/api/model-selection/SuccessiveHalvingClassifier/index.html @@ -0,0 +1,3696 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SuccessiveHalvingClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SuccessiveHalvingClassifier

+

Successive halving algorithm for classification.

+

Successive halving is a method for performing model selection without having to train each model on all the dataset. At certain points in time (called "rungs"), the worst performing will be discarded and the best ones will keep competing between each other. The rung values are designed so that at most budget model updates will be performed in total.

+

If you have k combinations of hyperparameters and that your dataset contains n observations, then the maximal budget you can allocate is:

+
\[\frac{2kn}{eta}\]
+

It is recommended that you check this beforehand. This bound can't be checked by the function because the size of the dataset is not known. In fact it is potentially infinite, in which case the algorithm will terminate once all the budget has been spent.

+

If you have a budget of B, and that your dataset contains n observations, then the number of hyperparameter combinations that will spend all the budget and go through all the data is:

+
\[\left\lceil\left\lfloor\frac{B}{2n}\right\rfloor \times eta \right\rceil\]
+

Parameters

+
    +
  • +

    models

    +

    The models to compare.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.Metric

    +

    Metric used for comparing models with.

    +
  • +
  • +

    budget

    +

    Typeint

    +

    Total number of model updates you wish to allocate.

    +
  • +
  • +

    eta

    +

    Default2

    +

    Rate of elimination. At every rung, math.ceil(k / eta) models are kept, where k is the number of models that have reached the rung. A higher eta value will focus on less models but will allocate more iterations to the best models.

    +
  • +
  • +

    verbose

    +

    DefaultFalse

    +

    Whether to display progress or not.

    +
  • +
  • +

    print_kwargs

    +

    Extra keyword arguments are passed to the print function. For instance, this allows providing a file argument, which indicates where to output progress.

    +
  • +
+

Attributes

+
    +
  • +

    best_model

    +

    The current best model.

    +
  • +
  • +

    models

    +
  • +
+

Examples

+

As an example, let's use successive halving to tune the optimizer of a logistic regression. +We'll first define the model.

+
from river import linear_model
+from river import preprocessing
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression()
+)
+
+

Let's now define a grid of parameters which we would like to compare. We'll try +different optimizers with various learning rates.

+
from river import utils
+from river import optim
+
+models = utils.expand_param_grid(model, {
+    'LogisticRegression': {
+        'optimizer': [
+            (optim.SGD, {'lr': [.1, .01, .005]}),
+            (optim.Adam, {'beta_1': [.01, .001], 'lr': [.1, .01, .001]}),
+            (optim.Adam, {'beta_1': [.1], 'lr': [.001]}),
+        ]
+    }
+})
+
+

We can check how many models we've created.

+

len(models)
+
+
10
+

+

We can now pass these models to a SuccessiveHalvingClassifier. We also need to pick a +metric to compare the models, and a budget which indicates how many iterations to run +before picking the best model and discarding the rest.

+
from river import model_selection
+
+sh = model_selection.SuccessiveHalvingClassifier(
+    models,
+    metric=metrics.Accuracy(),
+    budget=2000,
+    eta=2,
+    verbose=True
+)
+
+

A SuccessiveHalvingClassifier is also a classifier with a learn_one and a +predict_proba_one method. We can therefore evaluate it like any other classifier with +evaluate.progressive_val_score.

+

from river import datasets
+from river import evaluate
+from river import metrics
+
+evaluate.progressive_val_score(
+    dataset=datasets.Phishing(),
+    model=sh,
+    metric=metrics.ROCAUC()
+)
+
+
[1] 5 removed       5 left  50 iterations   budget used: 500        budget left: 1500       best Accuracy: 80.00%
+[2] 2 removed       3 left  100 iterations  budget used: 1000       budget left: 1000       best Accuracy: 84.00%
+[3] 1 removed       2 left  166 iterations  budget used: 1498       budget left: 502        best Accuracy: 86.14%
+[4] 1 removed       1 left  250 iterations  budget used: 1998       budget left: 2  best Accuracy: 84.80%
+ROCAUC: 95.22%
+

+

We can now view the best model.

+

sh.best_model
+
+
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LogisticRegression (
+    optimizer=Adam (
+      lr=Constant (
+        learning_rate=0.01
+      )
+      beta_1=0.01
+      beta_2=0.999
+      eps=1e-08
+    )
+    loss=Log (
+      weight_pos=1.
+      weight_neg=1.
+    )
+    l2=0.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/SuccessiveHalvingRegressor/index.html b/0.19.0/api/model-selection/SuccessiveHalvingRegressor/index.html new file mode 100644 index 0000000000..7b15e3d9e2 --- /dev/null +++ b/0.19.0/api/model-selection/SuccessiveHalvingRegressor/index.html @@ -0,0 +1,3681 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SuccessiveHalvingRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SuccessiveHalvingRegressor

+

Successive halving algorithm for regression.

+

Successive halving is a method for performing model selection without having to train each model on all the dataset. At certain points in time (called "rungs"), the worst performing will be discarded and the best ones will keep competing between each other. The rung values are designed so that at most budget model updates will be performed in total.

+

If you have k combinations of hyperparameters and that your dataset contains n observations, then the maximal budget you can allocate is:

+
\[\frac{2kn}{eta}\]
+

It is recommended that you check this beforehand. This bound can't be checked by the function because the size of the dataset is not known. In fact it is potentially infinite, in which case the algorithm will terminate once all the budget has been spent.

+

If you have a budget of B, and that your dataset contains n observations, then the number of hyperparameter combinations that will spend all the budget and go through all the data is:

+
\[\left\lceil\left\lfloor\frac{B}{2n}\right\rfloor \times eta \right\rceil\]
+

Parameters

+
    +
  • +

    models

    +

    The models to compare.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.Metric

    +

    Metric used for comparing models with.

    +
  • +
  • +

    budget

    +

    Typeint

    +

    Total number of model updates you wish to allocate.

    +
  • +
  • +

    eta

    +

    Default2

    +

    Rate of elimination. At every rung, math.ceil(k / eta) models are kept, where k is the number of models that have reached the rung. A higher eta value will focus on less models but will allocate more iterations to the best models.

    +
  • +
  • +

    verbose

    +

    DefaultFalse

    +

    Whether to display progress or not.

    +
  • +
  • +

    print_kwargs

    +

    Extra keyword arguments are passed to the print function. For instance, this allows providing a file argument, which indicates where to output progress.

    +
  • +
+

Attributes

+
    +
  • +

    best_model

    +

    The current best model.

    +
  • +
  • +

    models

    +
  • +
+

Examples

+

As an example, let's use successive halving to tune the optimizer of a linear regression. +We'll first define the model.

+
from river import linear_model
+from river import preprocessing
+
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LinearRegression(intercept_lr=.1)
+)
+
+

Let's now define a grid of parameters which we would like to compare. We'll try +different optimizers with various learning rates.

+
from river import optim
+from river import utils
+
+models = utils.expand_param_grid(model, {
+    'LinearRegression': {
+        'optimizer': [
+            (optim.SGD, {'lr': [.1, .01, .005]}),
+            (optim.Adam, {'beta_1': [.01, .001], 'lr': [.1, .01, .001]}),
+            (optim.Adam, {'beta_1': [.1], 'lr': [.001]}),
+        ]
+    }
+})
+
+

We can check how many models we've created.

+

len(models)
+
+
10
+

+

We can now pass these models to a SuccessiveHalvingRegressor. We also need to pick a +metric to compare the models, and a budget which indicates how many iterations to run +before picking the best model and discarding the rest.

+
from river import model_selection
+
+sh = model_selection.SuccessiveHalvingRegressor(
+    models,
+    metric=metrics.MAE(),
+    budget=2000,
+    eta=2,
+    verbose=True
+)
+
+

A SuccessiveHalvingRegressor is also a regressor with a learn_one and a predict_one +method. We can therefore evaluate it like any other classifier with +evaluate.progressive_val_score.

+

from river import datasets
+from river import evaluate
+from river import metrics
+
+evaluate.progressive_val_score(
+    dataset=datasets.TrumpApproval(),
+    model=sh,
+    metric=metrics.MAE()
+)
+
+
[1] 5 removed       5 left  50 iterations   budget used: 500        budget left: 1500       best MAE: 4.419643
+[2] 2 removed       3 left  100 iterations  budget used: 1000       budget left: 1000       best MAE: 2.392266
+[3] 1 removed       2 left  166 iterations  budget used: 1498       budget left: 502        best MAE: 1.541383
+[4] 1 removed       1 left  250 iterations  budget used: 1998       budget left: 2  best MAE: 1.112122
+MAE: 0.490688
+

+

We can now view the best model.

+

sh.best_model
+
+
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LinearRegression (
+    optimizer=Adam (
+      lr=Constant (
+        learning_rate=0.1
+      )
+      beta_1=0.01
+      beta_2=0.999
+      eps=1e-08
+    )
+    loss=Squared ()
+    l2=0.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.1
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/base/ModelSelectionClassifier/index.html b/0.19.0/api/model-selection/base/ModelSelectionClassifier/index.html new file mode 100644 index 0000000000..04bd7f3ea5 --- /dev/null +++ b/0.19.0/api/model-selection/base/ModelSelectionClassifier/index.html @@ -0,0 +1,3596 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ModelSelectionClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ModelSelectionClassifier

+

A model selector for classification.

+

Parameters

+ +

Attributes

+
    +
  • +

    best_model

    +

    The current best model.

    +
  • +
  • +

    models

    +
  • +
+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/model-selection/base/ModelSelectionRegressor/index.html b/0.19.0/api/model-selection/base/ModelSelectionRegressor/index.html new file mode 100644 index 0000000000..ea701f9d03 --- /dev/null +++ b/0.19.0/api/model-selection/base/ModelSelectionRegressor/index.html @@ -0,0 +1,3584 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ModelSelectionRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ModelSelectionRegressor

+

A model selector for regression.

+

Parameters

+ +

Attributes

+
    +
  • +

    best_model

    +

    The current best model.

    +
  • +
  • +

    models

    +
  • +
+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multiclass/OneVsOneClassifier/index.html b/0.19.0/api/multiclass/OneVsOneClassifier/index.html new file mode 100644 index 0000000000..948b68b579 --- /dev/null +++ b/0.19.0/api/multiclass/OneVsOneClassifier/index.html @@ -0,0 +1,3496 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OneVsOneClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OneVsOneClassifier

+

One-vs-One (OvO) multiclass strategy.

+

This strategy consists in fitting one binary classifier for each pair of classes. Because we are in a streaming context, the number of classes isn't known from the start, hence new classifiers are instantiated on the fly.

+

The number of classifiers is k * (k - 1) / 2, where k is the number of classes. However, each call to learn_one only requires training k - 1 models. Indeed, only the models that pertain to the given label have to be trained. Meanwhile, making a prediction requires going through each and every model.

+

Parameters

+
    +
  • +

    classifier

    +

    A binary classifier, although a multi-class classifier will work too.

    +
  • +
+

Attributes

+
    +
  • +

    classifiers (dict)

    +

    A mapping between pairs of classes and classifiers. The keys are tuples which contain a pair of classes. Each pair is sorted in lexicographical order.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import multiclass
+from river import preprocessing
+
+dataset = datasets.ImageSegments()
+
+scaler = preprocessing.StandardScaler()
+ovo = multiclass.OneVsOneClassifier(linear_model.LogisticRegression())
+model = scaler | ovo
+
+metric = metrics.MacroF1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MacroF1: 80.76%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multiclass/OneVsRestClassifier/index.html b/0.19.0/api/multiclass/OneVsRestClassifier/index.html new file mode 100644 index 0000000000..d94ee9b1bd --- /dev/null +++ b/0.19.0/api/multiclass/OneVsRestClassifier/index.html @@ -0,0 +1,3514 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OneVsRestClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OneVsRestClassifier

+

One-vs-the-rest (OvR) multiclass strategy.

+

This strategy consists in fitting one binary classifier per class. Because we are in a streaming context, the number of classes isn't known from the start. Hence, new classifiers are instantiated on the fly. Likewise, the predicted probabilities will only include the classes seen up to a given point in time.

+

Note that this classifier supports mini-batches as well as single instances.

+

The computational complexity for both learning and predicting grows linearly with the number of classes. If you have a very large number of classes, then you might want to consider using an multiclass.OutputCodeClassifier instead.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +

    A binary classifier, although a multi-class classifier will work too.

    +
  • +
+

Attributes

+
    +
  • +

    classifiers (dict)

    +

    A mapping between classes and classifiers.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import multiclass
+from river import preprocessing
+
+dataset = datasets.ImageSegments()
+
+scaler = preprocessing.StandardScaler()
+ovr = multiclass.OneVsRestClassifier(linear_model.LogisticRegression())
+model = scaler | ovr
+
+metric = metrics.MacroF1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MacroF1: 77.46%
+

+

This estimator also also supports mini-batching.

+
for X in pd.read_csv(dataset.path, chunksize=64):
+    y = X.pop('category')
+    y_pred = model.predict_many(X)
+    model = model.learn_many(X, y)
+
+

Methods

+
+learn_many +
+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_many +
+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +
+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multiclass/OutputCodeClassifier/index.html b/0.19.0/api/multiclass/OutputCodeClassifier/index.html new file mode 100644 index 0000000000..e5609ae747 --- /dev/null +++ b/0.19.0/api/multiclass/OutputCodeClassifier/index.html @@ -0,0 +1,3512 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OutputCodeClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OutputCodeClassifier

+

Output-code multiclass strategy.

+

This also referred to as "error-correcting output codes".

+

This class allows to learn a multi-class classification problem with a binary classifier. Each class is converted to a code of 0s and 1s. The length of the code is called the code size. A copy of the classifier made for code. The codes associated with the classes are stored in a code book.

+

When a new sample arrives, the label's code is retrieved from the code book. Then, each classifier is trained on the relevant part of code, which is either a 0 or a 1.

+

For predicting, each classifier outputs a probability. These are then compared to each code in the code book, and the label which is the "closest" is chosen as the most likely class. Closeness is determined in terms of Manhattan distance.

+

One specificity of online learning is that we don't how many classes there are initially. Therefore, a random procedure generates random codes on the fly whenever a previously unseed label appears.

+

Parameters

+
    +
  • +

    classifier

    +

    Typebase.Classifier

    +

    A binary classifier, although a multi-class classifier will work too.

    +
  • +
  • +

    code_size

    +

    Typeint

    +

    The code size, which dictates how many copies of the provided classifiers to train. Must be strictly positive.

    +
  • +
  • +

    coding_method

    +

    Typestr

    +

    Defaultrandom

    +

    The method used to generate the codes. Can be either 'exact' or 'random'. The 'exact' method generates all possible codes of a given size in memory, and streams them in a random order. The 'random' method generates random codes of a given size on the fly. The 'exact' method necessarily generates different codes for each class, but requires more memory. The 'random' method can generate duplicate codes for different classes, but requires less memory.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    A random seed number that can be set for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import multiclass
+from river import preprocessing
+
+dataset = datasets.ImageSegments()
+
+scaler = preprocessing.StandardScaler()
+ooc = multiclass.OutputCodeClassifier(
+    classifier=linear_model.LogisticRegression(),
+    code_size=10,
+    coding_method='random',
+    seed=1
+)
+model = scaler | ooc
+
+metric = metrics.MacroF1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MacroF1: 79.58%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multioutput/ClassifierChain/index.html b/0.19.0/api/multioutput/ClassifierChain/index.html new file mode 100644 index 0000000000..ee880614a1 --- /dev/null +++ b/0.19.0/api/multioutput/ClassifierChain/index.html @@ -0,0 +1,3545 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ClassifierChain - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ClassifierChain

+

A multi-output model that arranges classifiers into a chain.

+

This will create one model per output. The prediction of the first output will be used as a feature in the second model. The prediction for the second output will be used as a feature for the third model, etc. This "chain model" is therefore capable of capturing dependencies between outputs.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +

    A classifier model used for each label.

    +
  • +
  • +

    order

    +

    Typelist | None

    +

    DefaultNone

    +

    A list with the targets order in which to construct the chain. If None then the order will be inferred from the order of the keys in the target.

    +
  • +
+

Examples

+

from river import feature_selection
+from river import linear_model
+from river import metrics
+from river import multioutput
+from river import preprocessing
+from river import stream
+from sklearn import datasets
+
+dataset = stream.iter_sklearn_dataset(
+    dataset=datasets.fetch_openml('yeast', version=4, parser='auto', as_frame=False),
+    shuffle=True,
+    seed=42
+)
+
+model = feature_selection.VarianceThreshold(threshold=0.01)
+model |= preprocessing.StandardScaler()
+model |= multioutput.ClassifierChain(
+    model=linear_model.LogisticRegression(),
+    order=list(range(14))
+)
+
+metric = metrics.multioutput.MicroAverage(metrics.Jaccard())
+
+for x, y in dataset:
+    # Convert y values to booleans
+    y = {i: yi == 'TRUE' for i, yi in y.items()}
+    y_pred = model.predict_one(x)
+    metric = metric.update(y, y_pred)
+    model = model.learn_one(x, y)
+
+metric
+
+
MicroAverage(Jaccard): 41.81%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and the labels y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the labels of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, bool]: The predicted labels.

+
+

+
+predict_proba_one +

Predict the probability of each label appearing given dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multioutput/MonteCarloClassifierChain/index.html b/0.19.0/api/multioutput/MonteCarloClassifierChain/index.html new file mode 100644 index 0000000000..ec4ab55993 --- /dev/null +++ b/0.19.0/api/multioutput/MonteCarloClassifierChain/index.html @@ -0,0 +1,3546 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MonteCarloClassifierChain - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MonteCarloClassifierChain

+

Monte Carlo Sampling Classifier Chains.

+

Probabilistic Classifier Chains using Monte Carlo sampling, as described in 1.

+

m samples are taken from the posterior distribution. Therefore we need a probabilistic interpretation of the output, and thus, this is a particular variety of ProbabilisticClassifierChain.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Classifier

    +
  • +
  • +

    m

    +

    Typeint

    +

    Default10

    +

    Number of samples to take from the posterior distribution.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Examples

+

from river import feature_selection
+from river import linear_model
+from river import metrics
+from river import multioutput
+from river import preprocessing
+from river.datasets import synth
+
+dataset = synth.Logical(seed=42, n_tiles=100)
+
+model = multioutput.MonteCarloClassifierChain(
+    model=linear_model.LogisticRegression(),
+    m=10,
+    seed=42
+)
+
+metric = metrics.multioutput.MicroAverage(metrics.Jaccard())
+
+for x, y in dataset:
+   y_pred = model.predict_one(x)
+   y_pred = {k: y_pred.get(k, 0) for k in y}
+   metric = metric.update(y, y_pred)
+   model = model.learn_one(x, y)
+
+metric
+
+
MicroAverage(Jaccard): 51.79%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and the labels y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the labels of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, bool]: The predicted labels.

+
+

+
+predict_proba_one +

Predict the probability of each label appearing given dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+
+
    +
  1. +

    Read, J., Martino, L., & Luengo, D. (2014). Efficient monte carlo + methods for multi-dimensional learning with classifier chains. + Pattern Recognition, 47(3), 1535-1546. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multioutput/MultiClassEncoder/index.html b/0.19.0/api/multioutput/MultiClassEncoder/index.html new file mode 100644 index 0000000000..480ae20ebe --- /dev/null +++ b/0.19.0/api/multioutput/MultiClassEncoder/index.html @@ -0,0 +1,3520 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiClassEncoder - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiClassEncoder

+

Convert a multi-label task into multiclass.

+

Assigns a class to each unique combination of labels, and proceeds with training the supplied multi-class classifier.

+

The transformation is done by converting the label set, which could be seen as a binary number, into an integer representing a class. At prediction time, the predicted integer is converted back to a binary number which is the predicted label set.

+

Parameters

+ +

Examples

+

from river import forest
+from river import metrics
+from river import multioutput
+from river.datasets import synth
+
+dataset = synth.Logical(seed=42, n_tiles=100)
+
+model = multioutput.MultiClassEncoder(
+    model=forest.ARFClassifier(seed=7)
+)
+
+metric = metrics.multioutput.MicroAverage(metrics.Jaccard())
+
+for x, y in dataset:
+   y_pred = model.predict_one(x)
+   y_pred = {k: y_pred.get(k, 0) for k in y}
+   metric = metric.update(y, y_pred)
+   model = model.learn_one(x, y)
+
+metric
+
+
MicroAverage(Jaccard): 95.10%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and the labels y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'dict[FeatureName, bool]'
  • +
+

Returns

+

MultiLabelClassifier: self

+
+

+
+predict_one +

Predict the labels of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, bool]: The predicted labels.

+
+

+
+predict_proba_one +

Predict the probability of each label appearing given dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, dict[bool, float]]: A dictionary that associates a probability which each label.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multioutput/ProbabilisticClassifierChain/index.html b/0.19.0/api/multioutput/ProbabilisticClassifierChain/index.html new file mode 100644 index 0000000000..2d86db714b --- /dev/null +++ b/0.19.0/api/multioutput/ProbabilisticClassifierChain/index.html @@ -0,0 +1,3531 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ProbabilisticClassifierChain - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ProbabilisticClassifierChain

+

Probabilistic Classifier Chains.

+

The Probabilistic Classifier Chains (PCC) 1 is a Bayes-optimal method based on the Classifier Chains (CC).

+

Consider the concept of chaining classifiers as searching a path in a binary tree whose leaf nodes are associated with a label \(y \in Y\). While CC searches only a single path in the aforementioned binary tree, PCC looks at each of the \(2^l\) paths, where \(l\) is the number of labels. This limits the applicability of the method to data sets with a small to moderate number of labels. The authors recommend no more than about 15 labels for real-world applications.

+

Parameters

+ +

Examples

+

from river import linear_model
+from river import metrics
+from river import multioutput
+from river.datasets import synth
+
+dataset = synth.Logical(seed=42, n_tiles=100)
+
+model = multioutput.ProbabilisticClassifierChain(
+    model=linear_model.LogisticRegression()
+)
+
+metric = metrics.multioutput.MicroAverage(metrics.Jaccard())
+
+for x, y in dataset:
+   y_pred = model.predict_one(x)
+   y_pred = {k: y_pred.get(k, 0) for k in y}
+   metric = metric.update(y, y_pred)
+   model = model.learn_one(x, y)
+
+metric
+
+
MicroAverage(Jaccard): 51.84%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and the labels y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the labels of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[FeatureName, bool]: The predicted labels.

+
+

+
+predict_proba_one +

Predict the probability of each label appearing given dictionary of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+
+
    +
  1. +

    Cheng, W., Hüllermeier, E., & Dembczynski, K. J. (2010). + Bayes optimal multilabel classification via probabilistic classifier + chains. In Proceedings of the 27th international conference on + machine learning (ICML-10) (pp. 279-286). 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/multioutput/RegressorChain/index.html b/0.19.0/api/multioutput/RegressorChain/index.html new file mode 100644 index 0000000000..32763f73f7 --- /dev/null +++ b/0.19.0/api/multioutput/RegressorChain/index.html @@ -0,0 +1,3520 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegressorChain - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RegressorChain

+

A multi-output model that arranges regressors into a chain.

+

This will create one model per output. The prediction of the first output will be used as a feature in the second output. The prediction for the second output will be used as a feature for the third, etc. This "chain model" is therefore capable of capturing dependencies between outputs.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Regressor

    +

    The regression model used to make predictions for each target.

    +
  • +
  • +

    order

    +

    Typelist | None

    +

    DefaultNone

    +

    A list with the targets order in which to construct the chain. If None then the order will be inferred from the order of the keys in the target.

    +
  • +
+

Examples

+

from river import evaluate
+from river import linear_model
+from river import metrics
+from river import multioutput
+from river import preprocessing
+from river import stream
+
+from sklearn import datasets
+
+dataset = stream.iter_sklearn_dataset(
+    dataset=datasets.load_linnerud(),
+    shuffle=True,
+    seed=42
+)
+
+model = multioutput.RegressorChain(
+    model=(
+        preprocessing.StandardScaler() |
+        linear_model.LinearRegression(intercept_lr=0.3)
+    ),
+    order=[0, 1, 2]
+)
+
+metric = metrics.multioutput.MicroAverage(metrics.MAE())
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MicroAverage(MAE): 12.733525
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the outputs of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The predictions.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/naive-bayes/BernoulliNB/index.html b/0.19.0/api/naive-bayes/BernoulliNB/index.html new file mode 100644 index 0000000000..17ef4c469e --- /dev/null +++ b/0.19.0/api/naive-bayes/BernoulliNB/index.html @@ -0,0 +1,3644 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BernoulliNB - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BernoulliNB

+

Bernoulli Naive Bayes.

+

Bernoulli Naive Bayes model learns from occurrences between features such as word counts and discrete classes. The input vector must contain positive values, such as counts or TF-IDF values.

+

Parameters

+
    +
  • +

    alpha

    +

    Default1.0

    +

    Additive (Laplace/Lidstone) smoothing parameter (use 0 for no smoothing).

    +
  • +
  • +

    true_threshold

    +

    Default0.0

    +

    Threshold for binarizing (mapping to booleans) features.

    +
  • +
+

Attributes

+
    +
  • +

    class_counts (collections.Counter)

    +

    Number of times each class has been seen.

    +
  • +
  • +

    feature_counts (collections.defaultdict)

    +

    Total frequencies per feature and class.

    +
  • +
+

Examples

+

import pandas as pd
+from river import compose
+from river import feature_extraction
+from river import naive_bayes
+
+docs = [
+    ("Chinese Beijing Chinese", "yes"),
+    ("Chinese Chinese Shanghai", "yes"),
+    ("Chinese Macao", "yes"),
+    ("Tokyo Japan Chinese", "no")
+]
+
+model = compose.Pipeline(
+    ("tokenize", feature_extraction.BagOfWords(lowercase=False)),
+    ("nb", naive_bayes.BernoulliNB(alpha=1))
+)
+
+for sentence, label in docs:
+    model = model.learn_one(sentence, label)
+
+model["nb"].p_class("yes")
+
+
0.75
+
+
model["nb"].p_class("no")
+
+
0.25
+

+

model.predict_proba_one("test")
+
+
{'yes': 0.8831539823829913, 'no': 0.11684601761700895}
+

+

model.predict_one("test")
+
+
'yes'
+

+

You can train the model and make predictions in mini-batch mode using the class methods +learn_many and predict_many.

+

df_docs = pd.DataFrame(docs, columns = ["docs", "y"])
+
+X = pd.Series([
+   "Chinese Beijing Chinese",
+   "Chinese Chinese Shanghai",
+   "Chinese Macao",
+   "Tokyo Japan Chinese"
+])
+
+y = pd.Series(["yes", "yes", "yes", "no"])
+
+model = compose.Pipeline(
+    ("tokenize", feature_extraction.BagOfWords(lowercase=False)),
+    ("nb", naive_bayes.BernoulliNB(alpha=1))
+)
+
+model = model.learn_many(X, y)
+
+unseen = pd.Series(["Taiwanese Taipei", "Chinese Shanghai"])
+
+model.predict_proba_many(unseen)
+
+
         no       yes
+0  0.116846  0.883154
+1  0.047269  0.952731
+

+

model.predict_many(unseen)
+
+
0    yes
+1    yes
+dtype: object
+

+

Methods

+
+joint_log_likelihood +

Computes the joint log likelihood of input features.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

float: Mapping between classes and joint log likelihood.

+
+

+
+joint_log_likelihood_many +

Computes the joint log likelihood of input features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: Input samples joint log likelihood.

+
+

+
+learn_many +

Learn from a batch of count vectors.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
+

Returns

+

MiniBatchClassifier: self

+
+

+
+learn_one +

Updates the model with a single observation.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+p_class +
+
+p_class_many +
+
+p_feature_given_class +
+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted labels.

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +

Return probabilities using the log-likelihoods in mini-batchs setting.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+predict_proba_one +

Return probabilities using the log-likelihoods.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+
+
+
    +
  1. +

    The Bernoulli model 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/naive-bayes/ComplementNB/index.html b/0.19.0/api/naive-bayes/ComplementNB/index.html new file mode 100644 index 0000000000..3fb606c597 --- /dev/null +++ b/0.19.0/api/naive-bayes/ComplementNB/index.html @@ -0,0 +1,3647 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ComplementNB - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ComplementNB

+

Naive Bayes classifier for multinomial models.

+

Complement Naive Bayes model learns from occurrences between features such as word counts and discrete classes. ComplementNB is suitable for imbalance dataset. The input vector must contain positive values, such as counts or TF-IDF values.

+

Parameters

+
    +
  • +

    alpha

    +

    Default1.0

    +

    Additive (Laplace/Lidstone) smoothing parameter (use 0 for no smoothing).

    +
  • +
+

Attributes

+
    +
  • +

    class_dist (proba.Multinomial)

    +

    Class prior probability distribution.

    +
  • +
  • +

    feature_counts (collections.defaultdict)

    +

    Total frequencies per feature and class.

    +
  • +
  • +

    class_totals (collections.Counter)

    +

    Total frequencies per class.

    +
  • +
+

Examples

+

import pandas as pd
+from river import compose
+from river import feature_extraction
+from river import naive_bayes
+
+docs = [
+    ("Chinese Beijing Chinese", "yes"),
+    ("Chinese Chinese Shanghai", "yes"),
+    ("Chinese Macao", "maybe"),
+    ("Tokyo Japan Chinese", "no")
+]
+
+model = compose.Pipeline(
+    ("tokenize", feature_extraction.BagOfWords(lowercase=False)),
+    ("nb", naive_bayes.ComplementNB(alpha=1))
+)
+
+for sentence, label in docs:
+    model = model.learn_one(sentence, label)
+
+model["nb"].p_class("yes")
+
+
0.5
+

+

model["nb"].p_class("no")
+
+
0.25
+

+

model["nb"].p_class("maybe")
+
+
0.25
+

+

model.predict_proba_one("test")
+
+
{'yes': 0.275, 'maybe': 0.375, 'no': 0.35}
+

+

model.predict_one("test")
+
+
'maybe'
+

+

You can train the model and make predictions in mini-batch mode using the class methods +learn_many and predict_many.

+

df_docs = pd.DataFrame(docs, columns = ["docs", "y"])
+
+X = pd.Series([
+   "Chinese Beijing Chinese",
+   "Chinese Chinese Shanghai",
+   "Chinese Macao",
+   "Tokyo Japan Chinese"
+])
+
+y = pd.Series(["yes", "yes", "maybe", "no"])
+
+model = compose.Pipeline(
+    ("tokenize", feature_extraction.BagOfWords(lowercase=False)),
+    ("nb", naive_bayes.ComplementNB(alpha=1))
+)
+
+model = model.learn_many(X, y)
+
+unseen = pd.Series(["Taiwanese Taipei", "Chinese Shanghai"])
+
+model.predict_proba_many(unseen)
+
+
      maybe        no       yes
+0  0.415129  0.361624  0.223247
+1  0.248619  0.216575  0.534807
+

+

model.predict_many(unseen)
+
+
0    maybe
+1      yes
+dtype: object
+

+

Methods

+
+joint_log_likelihood +

Computes the joint log likelihood of input features.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

float: Mapping between classes and joint log likelihood.

+
+

+
+joint_log_likelihood_many +

Computes the joint log likelihood of input features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: Input samples joint log likelihood.

+
+

+
+learn_many +

Learn from a batch of count vectors.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
+

Returns

+

MiniBatchClassifier: self

+
+

+
+learn_one +

Updates the model with a single observation.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+p_class +
+
+p_class_many +
+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted labels.

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +

Return probabilities using the log-likelihoods in mini-batchs setting.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+predict_proba_one +

Return probabilities using the log-likelihoods.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/naive-bayes/GaussianNB/index.html b/0.19.0/api/naive-bayes/GaussianNB/index.html new file mode 100644 index 0000000000..1d683aa6f6 --- /dev/null +++ b/0.19.0/api/naive-bayes/GaussianNB/index.html @@ -0,0 +1,3476 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GaussianNB - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GaussianNB

+

Gaussian Naive Bayes.

+

A Gaussian distribution \(G_{cf}\) is maintained for each class \(c\) and each feature \(f\). Each Gaussian is updated using the amount associated with each feature; the details can be be found in proba.Gaussian. The joint log-likelihood is then obtained by summing the log probabilities of each feature associated with each class.

+

Examples

+

from river import naive_bayes
+from river import stream
+import numpy as np
+
+X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+Y = np.array([1, 1, 1, 2, 2, 2])
+
+model = naive_bayes.GaussianNB()
+
+for x, y in stream.iter_array(X, Y):
+    _ = model.learn_one(x, y)
+
+model.predict_one({0: -0.8, 1: -1})
+
+
1
+

+

Methods

+
+joint_log_likelihood +
+
+joint_log_likelihood_many +
+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+p_class +
+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Return probabilities using the log-likelihoods.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/naive-bayes/MultinomialNB/index.html b/0.19.0/api/naive-bayes/MultinomialNB/index.html new file mode 100644 index 0000000000..de0108dad4 --- /dev/null +++ b/0.19.0/api/naive-bayes/MultinomialNB/index.html @@ -0,0 +1,3647 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultinomialNB - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultinomialNB

+

Naive Bayes classifier for multinomial models.

+

Multinomial Naive Bayes model learns from occurrences between features such as word counts and discrete classes. The input vector must contain positive values, such as counts or TF-IDF values.

+

Parameters

+
    +
  • +

    alpha

    +

    Default1.0

    +

    Additive (Laplace/Lidstone) smoothing parameter (use 0 for no smoothing).

    +
  • +
+

Attributes

+
    +
  • +

    class_dist (proba.Multinomial)

    +

    Class prior probability distribution.

    +
  • +
  • +

    feature_counts (collections.defaultdict)

    +

    Total frequencies per feature and class.

    +
  • +
  • +

    class_totals (collections.Counter)

    +

    Total frequencies per class.

    +
  • +
+

Examples

+

import pandas as pd
+from river import compose
+from river import feature_extraction
+from river import naive_bayes
+
+docs = [
+    ("Chinese Beijing Chinese", "yes"),
+    ("Chinese Chinese Shanghai", "yes"),
+    ("Chinese Macao", "maybe"),
+    ("Tokyo Japan Chinese", "no")
+]
+
+model = compose.Pipeline(
+    ("tokenize", feature_extraction.BagOfWords(lowercase=False)),
+    ("nb", naive_bayes.MultinomialNB(alpha=1))
+)
+
+for sentence, label in docs:
+    model = model.learn_one(sentence, label)
+
+model["nb"].p_class("yes")
+
+
0.5
+

+

model["nb"].p_class("no")
+
+
0.25
+

+

model["nb"].p_class("maybe")
+
+
0.25
+

+

model.predict_proba_one("test")
+
+
{'yes': 0.413, 'maybe': 0.310, 'no': 0.275}
+

+

model.predict_one("test")
+
+
'yes'
+

+

You can train the model and make predictions in mini-batch mode using the class methods +learn_many and predict_many.

+

df_docs = pd.DataFrame(docs, columns = ["docs", "y"])
+
+X = pd.Series([
+   "Chinese Beijing Chinese",
+   "Chinese Chinese Shanghai",
+   "Chinese Macao",
+   "Tokyo Japan Chinese"
+])
+
+y = pd.Series(["yes", "yes", "maybe", "no"])
+
+model = compose.Pipeline(
+    ("tokenize", feature_extraction.BagOfWords(lowercase=False)),
+    ("nb", naive_bayes.MultinomialNB(alpha=1))
+)
+
+model = model.learn_many(X, y)
+
+unseen = pd.Series(["Taiwanese Taipei", "Chinese Shanghai"])
+
+model.predict_proba_many(unseen)
+
+
      maybe        no       yes
+0  0.373272  0.294931  0.331797
+1  0.160396  0.126733  0.712871
+

+

model.predict_many(unseen)
+
+
0    maybe
+1      yes
+dtype: object
+

+

Methods

+
+joint_log_likelihood +

Computes the joint log likelihood of input features.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

float: Mapping between classes and joint log likelihood.

+
+

+
+joint_log_likelihood_many +

Computes the joint log likelihood of input features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: Input samples joint log likelihood.

+
+

+
+learn_many +

Learn from a batch of count vectors.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.Series'
  • +
+

Returns

+

MiniBatchClassifier: self

+
+

+
+learn_one +

Updates the model with a single observation.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+p_class +
+
+p_class_many +
+
+p_feature_given_class +
+
+predict_many +

Predict the outcome for each given sample.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.Series: The predicted labels.

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_many +

Return probabilities using the log-likelihoods in mini-batchs setting.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+predict_proba_one +

Return probabilities using the log-likelihoods.

+

Parameters

+
    +
  • x'dict'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neighbors/KNNClassifier/index.html b/0.19.0/api/neighbors/KNNClassifier/index.html new file mode 100644 index 0000000000..056bdd978f --- /dev/null +++ b/0.19.0/api/neighbors/KNNClassifier/index.html @@ -0,0 +1,3554 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KNNClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

KNNClassifier

+

K-Nearest Neighbors (KNN) for classification.

+

Samples are stored using a first-in, first-out strategy. The strategy to perform search queries in the data buffer is defined by the engine parameter.

+

Parameters

+
    +
  • +

    n_neighbors

    +

    Typeint

    +

    Default5

    +

    The number of nearest neighbors to search for.

    +
  • +
  • +

    engine

    +

    TypeBaseNN | None

    +

    DefaultNone

    +

    The search engine used to store the instances and perform search queries. Depending on the choose engine, search will be exact or approximate. Please, consult the documentation of each available search engine for more details on its usage. By default, use the SWINN search engine for approximate search queries.

    +
  • +
  • +

    weighted

    +

    Typebool

    +

    DefaultTrue

    +

    Weight the contribution of each neighbor by it's inverse distance.

    +
  • +
  • +

    cleanup_every

    +

    Typeint

    +

    Default0

    +

    This determines at which rate old classes are cleaned up. Classes that have been seen in the past but that are not present in the current window are dropped. Classes are never dropped when this is set to 0.

    +
  • +
  • +

    softmax

    +

    Typebool

    +

    DefaultFalse

    +

    Whether or not to use softmax normalization to normalize the neighbors contributions. Votes are divided by the total number of votes if this is False.

    +
  • +
+

Examples

+
import functools
+from river import datasets
+from river import evaluate
+from river import metrics
+from river import neighbors
+from river import preprocessing
+from river import utils
+
+dataset = datasets.Phishing()
+
+

To select a custom distance metric which takes one or several parameter, you can wrap your +chosen distance using functools.partial:

+

l1_dist = functools.partial(utils.math.minkowski_distance, p=1)
+
+model = (
+    preprocessing.StandardScaler() |
+    neighbors.KNNClassifier(
+        engine=neighbors.SWINN(
+            dist_func=l1_dist,
+            seed=42
+        )
+    )
+)
+
+evaluate.progressive_val_score(dataset, model, metrics.Accuracy())
+
+
Accuracy: 89.67%
+

+

Methods

+
+clean_up_classes +

Clean up classes added to the window.

+

Classes that are added (and removed) from the window may no longer be valid. This method cleans up the window and and ensures only known classes are added, and we do not consider "None" a class. It is called every cleanup_every step, or can be called manually.

+
+

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+

Notes

+

Note that since the window is moving and we keep track of all classes that +are added at some point, a class might be returned in a result (with a +value of 0) if it is no longer in the window. You can call +model.clean_up_classes(), or set cleanup_every to a non-zero value.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neighbors/KNNRegressor/index.html b/0.19.0/api/neighbors/KNNRegressor/index.html new file mode 100644 index 0000000000..22671f52a1 --- /dev/null +++ b/0.19.0/api/neighbors/KNNRegressor/index.html @@ -0,0 +1,3490 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + KNNRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

KNNRegressor

+

K-Nearest Neighbors regressor.

+

Samples are stored using a first-in, first-out strategy. The strategy to perform search queries in the data buffer is defined by the engine parameter. Predictions are obtained by aggregating the values of the closest n_neighbors stored samples with respect to a query sample.

+

Parameters

+
    +
  • +

    n_neighbors

    +

    Typeint

    +

    Default5

    +

    The number of nearest neighbors to search for.

    +
  • +
  • +

    engine

    +

    TypeBaseNN | None

    +

    DefaultNone

    +

    The search engine used to store the instances and perform search queries. Depending on the choose engine, search will be exact or approximate. Please, consult the documentation of each available search engine for more details on its usage. By default, use the SWINN search engine for approximate search queries.

    +
  • +
  • +

    aggregation_method

    +

    Typestr

    +

    Defaultmean

    +

    The method to aggregate the target values of neighbors. | 'mean' | 'median' | 'weighted_mean'

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import metrics
+from river import neighbors
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+
+model = neighbors.KNNRegressor()
+evaluate.progressive_val_score(dataset, model, metrics.RMSE())
+
+
RMSE: 1.427743
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neighbors/LazySearch/index.html b/0.19.0/api/neighbors/LazySearch/index.html new file mode 100644 index 0000000000..45d9d00349 --- /dev/null +++ b/0.19.0/api/neighbors/LazySearch/index.html @@ -0,0 +1,3497 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LazySearch - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LazySearch

+

Exact nearest neighbors using a lazy search estrategy.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Default50

    +

    Size of the sliding window use to search neighbors with.

    +
  • +
  • +

    min_distance_keep

    +

    Typefloat

    +

    Default0.0

    +

    The minimum distance (similarity) to consider adding a point to the window. E.g., a value of 0.0 will add even exact duplicates.

    +
  • +
  • +

    dist_func

    +

    TypeDistanceFunc | FunctionWrapper | None

    +

    DefaultNone

    +

    A distance function which accepts two input items to compare. If not set, use the Minkowski distance with p=2.

    +
  • +
+

Methods

+
+append +

Add a point to the window, optionally with extra metadata.

+

Parameters

+
    +
  • item'typing.Any'
  • +
  • extra'typing.Any | None' — defaults to None
  • +
  • kwargs
  • +
+
+

+
+search +

Find the n_neighbors closest points to item, along with their distances.

+

Parameters

+
    +
  • item'typing.Any'
  • +
  • n_neighbors'int'
  • +
  • kwargs
  • +
+
+

+
+update +

Update the window with a new point, only added if > min distance.

+

If min distance is 0, we do not need to do the calculation. The item (and extra metadata) will not be added to the window if it is too close to an existing point.

+

Parameters

+
    +
  • item'typing.Any'
  • +
  • n_neighbors'int' — defaults to 1
  • +
  • extra'typing.Any | None' — defaults to None
  • +
+

Returns

+

A boolean (true/false) to indicate if the point was added.

+
+

+

Notes

+

Updates are by default stored by the FIFO (first in first out) method, +which means that when the size limit is reached, old samples are dumped to +give room for new samples. This is circular, meaning that older points +are dumped first. This also gives the implementation a temporal aspect, +because older samples are replaced with newer ones.

+

The parameter min_dinstance_keep controls the addition of new items to the +window - items that are far enough away (> min_distance_keep) are added to +the window. Thus a value of 0 indicates that we add all points, and +increasing from 0 makes it less likely we will keep a new item.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neighbors/SWINN/index.html b/0.19.0/api/neighbors/SWINN/index.html new file mode 100644 index 0000000000..9121de5b5e --- /dev/null +++ b/0.19.0/api/neighbors/SWINN/index.html @@ -0,0 +1,3553 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SWINN - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SWINN

+

Sliding WIndow-based Nearest Neighbor (SWINN) search using Graphs.

+

Extends the NNDescent algorithm1 to handle vertex addition and removal in a FIFO data ingestion policy. SWINN builds and keeps a directed graph where edges connect the nearest neighbors. Any distance metric can be used to build the graph. By using a directed graph, the user must set the desired number of neighbors. More neighbors imply more accurate search queries at the cost of increased running time and memory usage. Note that although the number of directed neighbors is limited by the user, there is no direct control on the number of reverse neighbors, i.e., the number of vertices that have an edge to a given vertex.

+

The basic idea of SWINN and NNDescent is that "the neighbor of my neighbors might as well be my neighbor". Hence, the connections are constantly revisited to improve the graph structure. The algorithm for creating and maintaining the search graph can be described in general lines as follows:

+
    +
  • +

    Start with a random neighborhood graph;

    +
  • +
  • +

    For each node in the search graph: refine the current neighborhood by checking if there are better neighborhood options among the neighbors of the current neighbors;

    +
  • +
  • +

    If the total number of neighborhood changes is smaller than a given stopping criterion, then stop.

    +
  • +
+

SWINN adds strategies to remove vertices from the search graph and pruning redundant edges. SWINN is more efficient when the selected maxlen is greater than 500. For small sized data windows, using the lazy/exhaustive search, i.e., neighbors.LazySearch might be a better idea.

+

Parameters

+
    +
  • +

    graph_k

    +

    Typeint

    +

    Default20

    +

    The maximum number of direct nearest neighbors each node has.

    +
  • +
  • +

    dist_func

    +

    TypeDistanceFunc | FunctionWrapper | None

    +

    DefaultNone

    +

    The distance function used to compare two items. If not set, use the Minkowski distance with p=2.

    +
  • +
  • +

    maxlen

    +

    Typeint

    +

    Default1000

    +

    The maximum size of the data buffer.

    +
  • +
  • +

    warm_up

    +

    Typeint

    +

    Default500

    +

    How many data instances to observe before starting the search graph.

    +
  • +
  • +

    max_candidates

    +

    Typeint

    +

    DefaultNone

    +

    The maximum number of vertices to consider when performing local neighborhood joins. If not set SWINN will use min(50, max(50, self.graph_k)).

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default0.0001

    +

    Early stop parameter for the neighborhood refinement procedure. NNDescent will stop running if the maximum number of iterations is reached or the number of edge changes after an iteration is smaller than or equal to delta * graph_k * n_nodes. In the last expression, n_nodes refers to the number of graph nodes involved in the (local) neighborhood refinement.

    +
  • +
  • +

    prune_prob

    +

    Typefloat

    +

    Default0.0

    +

    The probability of removing redundant edges. Must be between 0 and 1. If set to zero, no edge will be pruned. When set to one, every potentially redundant edge will be dropped.

    +
  • +
  • +

    n_iters

    +

    Typeint

    +

    Default10

    +

    The maximum number of NNDescent iterations to perform to refine the search index.

    +
  • +
  • +

    seed

    +

    Typeint

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Methods

+
+append +

Add a new item to the search index.

+

Data is stored using the FIFO strategy. Both the data buffer and the search graph are updated. The addition of a new item will trigger the removal of the oldest item, if the maximum size was reached. All edges of the removed node are also dropped and safety procedures are applied to ensure its neighbors keep accessible. The addition of a new item also trigger local neighborhood refinement procedures, to ensure the search index is effective and the node degree constraints are met.

+

Parameters

+
    +
  • item'typing.Any'
  • +
  • kwargs
  • +
+
+

+
+connectivity +

Get a list with the size of each connected component in the search graph.

+

This metric provides an overview of reachability in the search index by using Kruskal's algorithm to build a forest of connected components. We want our search index to have a single connected component, i.e., the case where we get a list containing a single number which is equal to maxlen. If that is not the case, not every node in the search graph can be reached from any given starting point. You may want to try increasing graph_k to improve connectivity. However, keep in mind the following aspects: 1) computing this metric is a costly operation (\(O(E\log V)\)), where \(E\) and \(V\) are, respectively, the number of edges and vertices in the search graph; 2) often, connectivity comes at the price of increased computational costs. Tweaking the sample_rate might help in such situations. The best possible scenario is to decrease the value of graph_k while keeping a single connected component.

+

Returns

+

list[int]: A list of the number of elements in each connected component of the graph.

+
+

+
+search +

Search the underlying nearest neighbor graph given a query item.

+

In case not enough samples were observed, i.e., the number of stored samples is smaller than warm_up, then the search switches to a brute force strategy.

+

Parameters

+
    +
  • item'typing.Any'
  • +
  • n_neighbors'int'
  • +
  • epsilon'float' — defaults to 0.1
  • +
  • kwargs
  • +
+

Returns

+

tuple[list, list]: neighbors, dists

+
+

+

Notes

+

There is an accuracy/speed trade-off between graph_k and sample_rate. To ensure a single +connected component, and thus an effective search index, one can increase graph_k. The +connectivity method is a helper to determine whether the search index has a single connected component. +However, search accuracy might come at the cost of increased memory usage and slow processing. To alleviate +that, one can rely on decreasing the sample_rate to avoid exploring all the undirected edges of a node +during search queries and local graph refinements. Moreover, the edge pruning procedures also help +decreasing the computational costs. Note that, anything that limits the number of explored neighbors or +prunes edges might have a negative impact on search accuracy.

+
+
+
    +
  1. +

    Dong, W., Moses, C., & Li, K. (2011, March). Efficient k-nearest neighbor graph construction for +generic similarity measures. In Proceedings of the 20th international conference on World wide web (pp. 577-586). 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neural-net/MLPRegressor/index.html b/0.19.0/api/neural-net/MLPRegressor/index.html new file mode 100644 index 0000000000..bc11e7fb4c --- /dev/null +++ b/0.19.0/api/neural-net/MLPRegressor/index.html @@ -0,0 +1,3587 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MLPRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MLPRegressor

+

Multi-layer Perceptron for regression.

+

This model is still work in progress. Here are some features that still need implementing:

+
    +
  • +

    learn_one and predict_one just cast the input dict to a single row dataframe and then

    +

    call learn_many and predict_many respectively. This is very inefficient. - Not all of the optimizers in the optim module can be used as they are not all vectorised.

    +
  • +
  • +

    Emerging and disappearing features are not supported. Each instance/batch has to have the

    +

    same features. - The gradient haven't been numerically checked.

    +
  • +
+

Parameters

+
    +
  • +

    hidden_dims

    +

    The dimensions of the hidden layers. For example, specifying (10, 20) means that there are two hidden layers with 10 and 20 neurons, respectively. Note that the number of layers the network contains is equal to the number of hidden layers plus two (to account for the input and output layers).

    +
  • +
  • +

    activations

    +

    The activation functions to use at each layer, including the input and output layers. Therefore you need to specify three activation if you specify one hidden layer.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.Loss | None

    +

    DefaultNone

    +

    Loss function. Defaults to optim.losses.Squared.

    +
  • +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    Optimizer. Defaults to optim.SGD with the learning rate set to 0.01.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generation seed. Set this for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    n_layers

    +

    Return the number of layers in the network. The number of layers is equal to the number of hidden layers plus 2. The 2 accounts for the input layer and the output layer.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import neural_net as nn
+from river import optim
+from river import preprocessing as pp
+from river import metrics
+
+model = (
+    pp.StandardScaler() |
+    nn.MLPRegressor(
+        hidden_dims=(5,),
+        activations=(
+            nn.activations.ReLU,
+            nn.activations.ReLU,
+            nn.activations.Identity
+        ),
+        optimizer=optim.SGD(1e-3),
+        seed=42
+    )
+)
+
+dataset = datasets.TrumpApproval()
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 1.580578
+

+

You can also use this to process mini-batches of data.

+

model = (
+    pp.StandardScaler() |
+    nn.MLPRegressor(
+        hidden_dims=(10,),
+        activations=(
+            nn.activations.ReLU,
+            nn.activations.ReLU,
+            nn.activations.ReLU
+        ),
+        optimizer=optim.SGD(1e-4),
+        seed=42
+    )
+)
+
+dataset = datasets.TrumpApproval()
+batch_size = 32
+
+for epoch in range(10):
+    for xb in pd.read_csv(dataset.path, chunksize=batch_size):
+        yb = xb.pop('five_thirty_eight')
+        y_pred = model.predict_many(xb)
+        model = model.learn_many(xb, yb)
+
+model.predict_many(xb)
+
+
      five_thirty_eight
+992           39.405231
+993           46.447481
+994           42.121865
+995           40.251148
+996           40.836378
+997           40.893153
+998           40.949927
+999           48.416504
+1000          42.077830
+

+

Methods

+
+call +

Make predictions.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+learn_many +

Train the network.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y'pd.DataFrame'
  • +
+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_many +
+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neural-net/activations/Identity/index.html b/0.19.0/api/neural-net/activations/Identity/index.html new file mode 100644 index 0000000000..b0b1e84272 --- /dev/null +++ b/0.19.0/api/neural-net/activations/Identity/index.html @@ -0,0 +1,3470 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Identity - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Identity

+

Identity activation function.

+

Methods

+
+apply +

Apply the activation function to a layer output z.

+
    +
  • z
  • +
+
+

+
+gradient +

Return the gradient with respect to a layer output z.

+
    +
  • z
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neural-net/activations/ReLU/index.html b/0.19.0/api/neural-net/activations/ReLU/index.html new file mode 100644 index 0000000000..b2a9f94379 --- /dev/null +++ b/0.19.0/api/neural-net/activations/ReLU/index.html @@ -0,0 +1,3470 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ReLU - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ReLU

+

Rectified Linear Unit (ReLU) activation function.

+

Methods

+
+apply +

Apply the activation function to a layer output z.

+
    +
  • z
  • +
+
+

+
+gradient +

Return the gradient with respect to a layer output z.

+
    +
  • z
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/neural-net/activations/Sigmoid/index.html b/0.19.0/api/neural-net/activations/Sigmoid/index.html new file mode 100644 index 0000000000..23480fb71f --- /dev/null +++ b/0.19.0/api/neural-net/activations/Sigmoid/index.html @@ -0,0 +1,3470 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sigmoid - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Sigmoid

+

Sigmoid activation function.

+

Methods

+
+apply +

Apply the activation function to a layer output z.

+
    +
  • z
  • +
+
+

+
+gradient +

Return the gradient with respect to a layer output z.

+
    +
  • z
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/AMSGrad/index.html b/0.19.0/api/optim/AMSGrad/index.html new file mode 100644 index 0000000000..328b438500 --- /dev/null +++ b/0.19.0/api/optim/AMSGrad/index.html @@ -0,0 +1,3911 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AMSGrad - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AMSGrad

+

AMSGrad optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Typeint | float | optim.base.Scheduler

    +

    Default0.1

    +

    The learning rate.

    +
  • +
  • +

    beta_1

    +

    Default0.9

    +
  • +
  • +

    beta_2

    +

    Default0.999

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
  • +

    correct_bias

    +

    DefaultTrue

    +
  • +
+

Attributes

+
    +
  • +

    m (collections.defaultdict)

    +
  • +
  • +

    v (collections.defaultdict)

    +
  • +
  • +

    v_hat (collections.defaultdict)

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.AMSGrad()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 86.60%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/AdaBound/index.html b/0.19.0/api/optim/AdaBound/index.html new file mode 100644 index 0000000000..97b9a8d1c5 --- /dev/null +++ b/0.19.0/api/optim/AdaBound/index.html @@ -0,0 +1,3911 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdaBound - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdaBound

+

AdaBound optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.001

    +

    The learning rate.

    +
  • +
  • +

    beta_1

    +

    Default0.9

    +
  • +
  • +

    beta_2

    +

    Default0.999

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
  • +

    gamma

    +

    Default0.001

    +
  • +
  • +

    final_lr

    +

    Default0.1

    +
  • +
+

Attributes

+
    +
  • +

    m (collections.defaultdict)

    +
  • +
  • +

    s (collections.defaultdict)

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.AdaBound()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 88.06%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/AdaDelta/index.html b/0.19.0/api/optim/AdaDelta/index.html new file mode 100644 index 0000000000..7e9e04b8fd --- /dev/null +++ b/0.19.0/api/optim/AdaDelta/index.html @@ -0,0 +1,3894 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdaDelta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdaDelta

+

AdaDelta optimizer.

+

Parameters

+
    +
  • +

    rho

    +

    Default0.95

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
+

Attributes

+
    +
  • +

    g2 (collections.defaultdict)

    +
  • +
  • +

    s2 (collections.defaultdict)

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.AdaDelta()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 80.56%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/AdaGrad/index.html b/0.19.0/api/optim/AdaGrad/index.html new file mode 100644 index 0000000000..f49e123389 --- /dev/null +++ b/0.19.0/api/optim/AdaGrad/index.html @@ -0,0 +1,3889 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdaGrad - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdaGrad

+

AdaGrad optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
+

Attributes

+
    +
  • g2 (collections.defaultdict)
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.AdaGrad()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 88.01%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/AdaMax/index.html b/0.19.0/api/optim/AdaMax/index.html new file mode 100644 index 0000000000..50e1bbf1a0 --- /dev/null +++ b/0.19.0/api/optim/AdaMax/index.html @@ -0,0 +1,3905 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdaMax - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdaMax

+

AdaMax optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    beta_1

    +

    Default0.9

    +
  • +
  • +

    beta_2

    +

    Default0.999

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
+

Attributes

+
    +
  • +

    m (collections.defaultdict)

    +
  • +
  • +

    v (collections.defaultdict)

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.AdaMax()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.61%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/Adam/index.html b/0.19.0/api/optim/Adam/index.html new file mode 100644 index 0000000000..02836d5fce --- /dev/null +++ b/0.19.0/api/optim/Adam/index.html @@ -0,0 +1,3902 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Adam - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Adam

+

Adam optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    beta_1

    +

    Default0.9

    +
  • +
  • +

    beta_2

    +

    Default0.999

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
+

Attributes

+
    +
  • +

    m (collections.defaultdict)

    +
  • +
  • +

    v (collections.defaultdict)

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.Adam()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 86.52%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/Averager/index.html b/0.19.0/api/optim/Averager/index.html new file mode 100644 index 0000000000..422f53ef55 --- /dev/null +++ b/0.19.0/api/optim/Averager/index.html @@ -0,0 +1,3899 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Averager - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Averager

+

Averaged stochastic gradient descent.

+

This is a wrapper that can be applied to any stochastic gradient descent optimiser. Note that this implementation differs than what may be found elsewhere. Essentially, the average of the weights is usually only used at the end of the optimisation, once all the data has been seen. However, in this implementation the optimiser returns the current averaged weights.

+

Parameters

+
    +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer

    +

    An optimizer for which the produced weights will be averaged.

    +
  • +
  • +

    start

    +

    Typeint

    +

    Default0

    +

    Indicates the number of iterations to wait before starting the average. Essentially, nothing happens differently before the number of iterations reaches this value.

    +
  • +
+

Attributes

+
    +
  • learning_rate
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.Averager(optim.SGD(0.01), 100)
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.97%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/FTRLProximal/index.html b/0.19.0/api/optim/FTRLProximal/index.html new file mode 100644 index 0000000000..78989f383e --- /dev/null +++ b/0.19.0/api/optim/FTRLProximal/index.html @@ -0,0 +1,3905 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FTRLProximal - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FTRLProximal

+

FTRL-Proximal optimizer.

+

Parameters

+
    +
  • +

    alpha

    +

    Default0.05

    +
  • +
  • +

    beta

    +

    Default1.0

    +
  • +
  • +

    l1

    +

    Default0.0

    +
  • +
  • +

    l2

    +

    Default1.0

    +
  • +
+

Attributes

+
    +
  • +

    z (collections.defaultdict)

    +
  • +
  • +

    n (collections.defaultdict)

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.FTRLProximal()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.56%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/Momentum/index.html b/0.19.0/api/optim/Momentum/index.html new file mode 100644 index 0000000000..6d54b4fe82 --- /dev/null +++ b/0.19.0/api/optim/Momentum/index.html @@ -0,0 +1,3881 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Momentum - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Momentum

+

Momentum optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    rho

    +

    Default0.9

    +
  • +
+

Attributes

+
    +
  • learning_rate
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.Momentum()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 84.09%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/Nadam/index.html b/0.19.0/api/optim/Nadam/index.html new file mode 100644 index 0000000000..efc5251d80 --- /dev/null +++ b/0.19.0/api/optim/Nadam/index.html @@ -0,0 +1,3897 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Nadam - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Nadam

+

Nadam optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    beta_1

    +

    Default0.9

    +
  • +
  • +

    beta_2

    +

    Default0.999

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
+

Attributes

+
    +
  • learning_rate
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.Nadam()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 86.60%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/NesterovMomentum/index.html b/0.19.0/api/optim/NesterovMomentum/index.html new file mode 100644 index 0000000000..7b62673d3f --- /dev/null +++ b/0.19.0/api/optim/NesterovMomentum/index.html @@ -0,0 +1,3881 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NesterovMomentum - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

NesterovMomentum

+

Nesterov Momentum optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    rho

    +

    Default0.9

    +
  • +
+

Attributes

+
    +
  • learning_rate
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.NesterovMomentum()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 84.22%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/RMSProp/index.html b/0.19.0/api/optim/RMSProp/index.html new file mode 100644 index 0000000000..e8bb6393eb --- /dev/null +++ b/0.19.0/api/optim/RMSProp/index.html @@ -0,0 +1,3893 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RMSProp - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RMSProp

+

RMSProp optimizer.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.1

    +
  • +
  • +

    rho

    +

    Default0.9

    +
  • +
  • +

    eps

    +

    Default1e-08

    +
  • +
+

Attributes

+
    +
  • learning_rate
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.RMSProp()
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.24%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/SGD/index.html b/0.19.0/api/optim/SGD/index.html new file mode 100644 index 0000000000..8c2ae40297 --- /dev/null +++ b/0.19.0/api/optim/SGD/index.html @@ -0,0 +1,3885 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SGD - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SGD

+

Plain stochastic gradient descent.

+

Parameters

+
    +
  • +

    lr

    +

    Default0.01

    +
  • +
+

Attributes

+
    +
  • learning_rate
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import optim
+from river import preprocessing
+
+dataset = datasets.Phishing()
+optimizer = optim.SGD(0.1)
+model = (
+    preprocessing.StandardScaler() |
+    linear_model.LogisticRegression(optimizer)
+)
+metric = metrics.F1()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
F1: 87.85%
+

+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/base/Initializer/index.html b/0.19.0/api/optim/base/Initializer/index.html new file mode 100644 index 0000000000..039bce71cc --- /dev/null +++ b/0.19.0/api/optim/base/Initializer/index.html @@ -0,0 +1,3879 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Initializer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Initializer

+

An initializer is used to set initial weights in a model.

+

Methods

+
+call +

Returns a fresh set of weights.

+

Parameters

+
    +
  • shape — defaults to 1
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/base/Loss/index.html b/0.19.0/api/optim/base/Loss/index.html new file mode 100644 index 0000000000..1cc9407bac --- /dev/null +++ b/0.19.0/api/optim/base/Loss/index.html @@ -0,0 +1,3906 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Loss - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Loss

+

Base class for all loss functions.

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/base/Optimizer/index.html b/0.19.0/api/optim/base/Optimizer/index.html new file mode 100644 index 0000000000..035c7be777 --- /dev/null +++ b/0.19.0/api/optim/base/Optimizer/index.html @@ -0,0 +1,3935 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Optimizer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Optimizer

+

Optimizer interface.

+

Every optimizer inherits from this base interface.

+

Parameters

+
    +
  • +

    lr

    +

    Typeint | float | Scheduler

    +
  • +
+

Attributes

+
    +
  • +

    learning_rate (float)

    +

    Returns the current learning rate value.

    +
  • +
+

Methods

+
+look_ahead +

Updates a weight vector before a prediction is made.

+

Parameters: w (dict): A dictionary of weight parameters. The weights are modified in-place. Returns: The updated weights.

+

Parameters

+
    +
  • w'dict'
  • +
+
+

+
+step +

Updates a weight vector given a gradient.

+

Parameters

+
    +
  • w'dict | VectorLike'
  • +
  • g'dict | VectorLike'
  • +
+

Returns

+

dict | VectorLike: The updated weights.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/base/Scheduler/index.html b/0.19.0/api/optim/base/Scheduler/index.html new file mode 100644 index 0000000000..8339785b95 --- /dev/null +++ b/0.19.0/api/optim/base/Scheduler/index.html @@ -0,0 +1,3879 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Scheduler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Scheduler

+

Can be used to program the learning rate schedule of an optim.base.Optimizer.

+

Methods

+
+get +

Returns the learning rate at a given iteration.

+

Parameters

+
    +
  • t'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/initializers/Constant/index.html b/0.19.0/api/optim/initializers/Constant/index.html new file mode 100644 index 0000000000..63ddb82bcd --- /dev/null +++ b/0.19.0/api/optim/initializers/Constant/index.html @@ -0,0 +1,3907 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Constant - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Constant

+

Constant initializer which always returns the same value.

+

Parameters

+
    +
  • +

    value

    +

    Typefloat

    +
  • +
+

Examples

+

from river import optim
+
+init = optim.initializers.Constant(value=3.14)
+
+init(shape=1)
+
+
3.14
+

+

init(shape=2)
+
+
array([3.14, 3.14])
+

+

Methods

+
+call +

Returns a fresh set of weights.

+

Parameters

+
    +
  • shape — defaults to 1
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/initializers/Normal/index.html b/0.19.0/api/optim/initializers/Normal/index.html new file mode 100644 index 0000000000..92ef048f03 --- /dev/null +++ b/0.19.0/api/optim/initializers/Normal/index.html @@ -0,0 +1,3919 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Normal - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Normal

+

Random normal initializer which simulate a normal distribution with specified parameters.

+

Parameters

+
    +
  • +

    mu

    +

    Default0.0

    +

    The mean of the normal distribution

    +
  • +
  • +

    sigma

    +

    Default1.0

    +

    The standard deviation of the normal distribution

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generation seed that can be set for reproducibility.

    +
  • +
+

Examples

+

from river import optim
+
+init = optim.initializers.Normal(mu=0, sigma=1, seed=42)
+
+init(shape=1)
+
+
0.496714
+

+

init(shape=2)
+
+
array([-0.1382643 ,  0.64768854])
+

+

Methods

+
+call +

Returns a fresh set of weights.

+

Parameters

+
    +
  • shape — defaults to 1
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/initializers/Zeros/index.html b/0.19.0/api/optim/initializers/Zeros/index.html new file mode 100644 index 0000000000..062719ee0a --- /dev/null +++ b/0.19.0/api/optim/initializers/Zeros/index.html @@ -0,0 +1,3886 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Zeros - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Zeros

+

Constant initializer which always returns zeros.

+

Examples

+

from river import optim
+
+init = optim.initializers.Zeros()
+
+init(shape=1)
+
+
0.0
+

+

init(shape=2)
+
+
array([0., 0.])
+

+

Methods

+
+call +

Returns a fresh set of weights.

+

Parameters

+
    +
  • shape — defaults to 1
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Absolute/index.html b/0.19.0/api/optim/losses/Absolute/index.html new file mode 100644 index 0000000000..75a0b8915e --- /dev/null +++ b/0.19.0/api/optim/losses/Absolute/index.html @@ -0,0 +1,4140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Absolute - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Absolute

+

Absolute loss, also known as the mean absolute error or L1 loss.

+

Mathematically, it is defined as

+
\[L = |p_i - y_i|\]
+

It's gradient w.r.t. to \(p_i\) is

+
\[\frac{\partial L}{\partial p_i} = sgn(p_i - y_i)\]
+

Examples

+

from river import optim
+
+loss = optim.losses.Absolute()
+loss(-42, 42)
+
+
84
+
+
loss.gradient(1, 2)
+
+
1
+
+
loss.gradient(2, 1)
+
+
-1
+

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/BinaryFocalLoss/index.html b/0.19.0/api/optim/losses/BinaryFocalLoss/index.html new file mode 100644 index 0000000000..d539176645 --- /dev/null +++ b/0.19.0/api/optim/losses/BinaryFocalLoss/index.html @@ -0,0 +1,4133 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryFocalLoss - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryFocalLoss

+

Binary focal loss.

+

This implements the "star" algorithm from the appendix of the focal loss paper.

+

Parameters

+
    +
  • +

    gamma

    +

    Default2

    +
  • +
  • +

    beta

    +

    Default1

    +
  • +
+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+1. Lin, T.Y., Goyal, P., Girshick, R., He, K. and Dollár, P., 2017. Focal loss for dense object detection. In Proceedings of the IEEE international conference on computer vision (pp. 2980-2988)

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/BinaryLoss/index.html b/0.19.0/api/optim/losses/BinaryLoss/index.html new file mode 100644 index 0000000000..6e75cbe348 --- /dev/null +++ b/0.19.0/api/optim/losses/BinaryLoss/index.html @@ -0,0 +1,4106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryLoss - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryLoss

+

A loss appropriate for binary classification tasks.

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Cauchy/index.html b/0.19.0/api/optim/losses/Cauchy/index.html new file mode 100644 index 0000000000..443495ec4e --- /dev/null +++ b/0.19.0/api/optim/losses/Cauchy/index.html @@ -0,0 +1,4138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cauchy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Cauchy

+

Cauchy loss function.

+

Parameters

+
    +
  • +

    C

    +

    Default80

    +
  • +
+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/CrossEntropy/index.html b/0.19.0/api/optim/losses/CrossEntropy/index.html new file mode 100644 index 0000000000..2575723f0b --- /dev/null +++ b/0.19.0/api/optim/losses/CrossEntropy/index.html @@ -0,0 +1,4181 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CrossEntropy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

CrossEntropy

+

Cross entropy loss.

+

This is a generalization of logistic loss to multiple classes.

+

Parameters

+
    +
  • +

    class_weight

    +

    Typedict[base.typing.ClfTarget, float] | None

    +

    DefaultNone

    +

    A dictionary that indicates what weight to associate with each class.

    +
  • +
+

Examples

+

from river import optim
+
+y_true = [0, 1, 2, 2]
+y_pred = [
+    {0: 0.29450637, 1: 0.34216758, 2: 0.36332605},
+    {0: 0.21290077, 1: 0.32728332, 2: 0.45981591},
+    {0: 0.42860913, 1: 0.33380113, 2: 0.23758974},
+    {0: 0.44941979, 1: 0.32962558, 2: 0.22095463}
+]
+
+loss = optim.losses.CrossEntropy()
+
+for yt, yp in zip(y_true, y_pred):
+    print(loss(yt, yp))
+
+
1.222454
+1.116929
+1.437209
+1.509797
+

+

for yt, yp in zip(y_true, y_pred):
+    print(loss.gradient(yt, yp))
+
+
{0: -0.70549363, 1: 0.34216758, 2: 0.36332605}
+{0: 0.21290077, 1: -0.67271668, 2: 0.45981591}
+{0: 0.42860913, 1: 0.33380113, 2: -0.76241026}
+{0: 0.44941979, 1: 0.32962558, 2: -0.77904537}
+

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/EpsilonInsensitiveHinge/index.html b/0.19.0/api/optim/losses/EpsilonInsensitiveHinge/index.html new file mode 100644 index 0000000000..2322aa2577 --- /dev/null +++ b/0.19.0/api/optim/losses/EpsilonInsensitiveHinge/index.html @@ -0,0 +1,4127 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EpsilonInsensitiveHinge - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EpsilonInsensitiveHinge

+

Epsilon-insensitive hinge loss.

+

Parameters

+
    +
  • +

    eps

    +

    Default0.1

    +
  • +
+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Hinge/index.html b/0.19.0/api/optim/losses/Hinge/index.html new file mode 100644 index 0000000000..0ebab3e52e --- /dev/null +++ b/0.19.0/api/optim/losses/Hinge/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Hinge - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Hinge

+

Computes the hinge loss.

+

Mathematically, it is defined as

+
\[L = max(0, 1 - p_i * y_i)\]
+

It's gradient w.r.t. to \(p_i\) is

+
\[ \\frac{\\partial L}{\\partial y_i} = \\left\{ \\begin{array}{ll} \\ 0 & p_iy_i \geqslant 1 \\\\ \\ - y_i & p_iy_i < 1 \\end{array} \\right. \]
+

Parameters

+
    +
  • +

    threshold

    +

    Default1.0

    +

    Margin threshold. 1 yield the loss used in SVMs, whilst 0 is equivalent to the loss used in the Perceptron algorithm.

    +
  • +
+

Examples

+

from river import optim
+
+loss = optim.losses.Hinge(threshold=1)
+loss(1, .2)
+
+
0.8
+

+

loss.gradient(1, .2)
+
+
-1
+

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Huber/index.html b/0.19.0/api/optim/losses/Huber/index.html new file mode 100644 index 0000000000..25cd3264b2 --- /dev/null +++ b/0.19.0/api/optim/losses/Huber/index.html @@ -0,0 +1,4129 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Huber - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Huber

+

Huber loss.

+

Variant of the squared loss that is robust to outliers.

+

Parameters

+
    +
  • +

    epsilon

    +

    Default0.1

    +
  • +
+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+1. Huber loss function - Wikipedia

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Log/index.html b/0.19.0/api/optim/losses/Log/index.html new file mode 100644 index 0000000000..c402c23eaf --- /dev/null +++ b/0.19.0/api/optim/losses/Log/index.html @@ -0,0 +1,4140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Log - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Log

+

Logarithmic loss.

+

This loss function expects each provided y_pred to be a logit. In other words if must be the raw output of a linear model or a neural network.

+

Parameters

+
    +
  • +

    weight_pos

    +

    Default1.0

    +
  • +
  • +

    weight_neg

    +

    Default1.0

    +
  • +
+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+
+
+
    +
  1. +

    Logit Wikipedia page 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/MultiClassLoss/index.html b/0.19.0/api/optim/losses/MultiClassLoss/index.html new file mode 100644 index 0000000000..b3c9f00b01 --- /dev/null +++ b/0.19.0/api/optim/losses/MultiClassLoss/index.html @@ -0,0 +1,4106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultiClassLoss - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultiClassLoss

+

A loss appropriate for multi-class classification tasks.

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Poisson/index.html b/0.19.0/api/optim/losses/Poisson/index.html new file mode 100644 index 0000000000..3a37c13386 --- /dev/null +++ b/0.19.0/api/optim/losses/Poisson/index.html @@ -0,0 +1,4111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Poisson - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Poisson

+

Poisson loss.

+

The Poisson loss is usually more suited for regression with count data than the squared loss.

+

Mathematically, it is defined as

+
\[L = exp(p_i) - y_i \times p_i\]
+

It's gradient w.r.t. to \(p_i\) is

+
\[\frac{\partial L}{\partial p_i} = exp(p_i) - y_i\]
+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Quantile/index.html b/0.19.0/api/optim/losses/Quantile/index.html new file mode 100644 index 0000000000..7cea46b37c --- /dev/null +++ b/0.19.0/api/optim/losses/Quantile/index.html @@ -0,0 +1,4169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Quantile - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Quantile

+

Quantile loss.

+

Parameters

+
    +
  • +

    alpha

    +

    Default0.5

    +

    Desired quantile to attain.

    +
  • +
+

Examples

+

from river import optim
+
+loss = optim.losses.Quantile(0.5)
+loss(1, 3)
+
+
1.0
+

+

loss.gradient(1, 3)
+
+
0.5
+

+

loss.gradient(3, 1)
+
+
-0.5
+

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/RegressionLoss/index.html b/0.19.0/api/optim/losses/RegressionLoss/index.html new file mode 100644 index 0000000000..3d15ab6f59 --- /dev/null +++ b/0.19.0/api/optim/losses/RegressionLoss/index.html @@ -0,0 +1,4106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RegressionLoss - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RegressionLoss

+

A loss appropriate for regression tasks.

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/losses/Squared/index.html b/0.19.0/api/optim/losses/Squared/index.html new file mode 100644 index 0000000000..908d473bfb --- /dev/null +++ b/0.19.0/api/optim/losses/Squared/index.html @@ -0,0 +1,4141 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Squared - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Squared

+

Squared loss, also known as the L2 loss.

+

Mathematically, it is defined as

+
\[L = (p_i - y_i) ^ 2\]
+

It's gradient w.r.t. to \(p_i\) is

+
\[\frac{\partial L}{\partial p_i} = 2 (p_i - y_i)\]
+

One thing to note is that this convention is consistent with Vowpal Wabbit and PyTorch, but not with scikit-learn. Indeed, scikit-learn divides the loss by 2, making the 2 disappear in the gradient.

+

Examples

+

from river import optim
+
+loss = optim.losses.Squared()
+loss(-4, 5)
+
+
81
+
+
loss.gradient(-4, 5)
+
+
18
+
+
loss.gradient(5, -4)
+
+
-18
+

+

Methods

+
+call +

Returns the loss.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The loss(es).

+
+

+
+gradient +

Return the gradient with respect to y_pred.

+

Parameters

+
    +
  • y_true
  • +
  • y_pred
  • +
+

Returns

+

The gradient(s).

+
+

+
+mean_func +

Mean function.

+

This is the inverse of the link function. Typically, a loss function takes as input the raw output of a model. In the case of classification, the raw output would be logits. The mean function can be used to convert the raw output into a value that makes sense to the user, such as a probability.

+

Parameters

+
    +
  • y_pred
  • +
+

Returns

+

The adjusted prediction(s).

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/schedulers/Constant/index.html b/0.19.0/api/optim/schedulers/Constant/index.html new file mode 100644 index 0000000000..9a9dbd101f --- /dev/null +++ b/0.19.0/api/optim/schedulers/Constant/index.html @@ -0,0 +1,3880 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Constant - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Constant

+

Always uses the same learning rate.

+

Parameters

+
    +
  • +

    learning_rate

    +

    Typeint | float

    +
  • +
+

Methods

+
+get +

Returns the learning rate at a given iteration.

+

Parameters

+
    +
  • t'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/schedulers/InverseScaling/index.html b/0.19.0/api/optim/schedulers/InverseScaling/index.html new file mode 100644 index 0000000000..7d6b1ca36d --- /dev/null +++ b/0.19.0/api/optim/schedulers/InverseScaling/index.html @@ -0,0 +1,3887 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InverseScaling - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

InverseScaling

+

Reduces the learning rate using a power schedule.

+

Assuming an initial learning rate \(\eta\), the learning rate at step \(t\) is:

+
\[\\frac{eta}{(t + 1) ^ p}\]
+

where \(p\) is a user-defined parameter.

+

Parameters

+
    +
  • +

    learning_rate

    +

    Typefloat

    +
  • +
  • +

    power

    +

    Default0.5

    +
  • +
+

Methods

+
+get +

Returns the learning rate at a given iteration.

+

Parameters

+
    +
  • t'int'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/optim/schedulers/Optimal/index.html b/0.19.0/api/optim/schedulers/Optimal/index.html new file mode 100644 index 0000000000..66a3268f8c --- /dev/null +++ b/0.19.0/api/optim/schedulers/Optimal/index.html @@ -0,0 +1,3892 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Optimal - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/overview/index.html b/0.19.0/api/overview/index.html new file mode 100644 index 0000000000..c894f3fe1f --- /dev/null +++ b/0.19.0/api/overview/index.html @@ -0,0 +1,5617 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Overview - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Overview

+

active

+

Online active learning.

+ +

base

+ +

anomaly

+

Anomaly detection.

+

Estimators in the anomaly module have a bespoke API. Each anomaly detector has a score_one +method instead of a predict_one method. This method returns an anomaly score. Normal observations +should have a low score, whereas anomalous observations should have a high score. The range of the +scores is relative to each estimator.

+

Anomaly detectors are usually unsupervised, in that they analyze the distribution of the features +they are shown. But River also has a notion of supervised anomaly detectors. These analyze the +distribution of a target variable, and optionally include the distribution of the features as well. They are useful for detecting labelling anomalies, which can be detrimental if they learned by a +model.

+ +

base

+ +

bandit

+

Multi-armed bandit (MAB) policies.

+

The bandit policies in River have a generic API. This allows them to be used in a variety of +situations. For instance, they can be used for model selection +(see model_selection.BanditRegressor).

+

Classes

+ +

Functions

+ +

base

+ +

datasets

+ +

envs

+ +

base

+

Base interfaces.

+

Every estimator in River is a class, and as such inherits from at least one base interface. +These are used to categorize, organize, and standardize the many estimators that River +contains.

+

This module contains mixin classes, which are all suffixed by Mixin. Their purpose is to +provide additional functionality to an estimator, and thus need to be used in conjunction with a +non-mixin base class.

+

This module also contains utilities for type hinting and tagging estimators.

+ +

cluster

+

Unsupervised clustering.

+ +

compat

+

Compatibility tools.

+

This module contains adapters for making River estimators compatible with other libraries, and +vice-versa whenever possible. The relevant adapters will only be usable if you have installed the +necessary library. For instance, you have to install scikit-learn in order to use the +compat.convert_sklearn_to_river function.

+

Classes

+ +

Functions

+ +

compose

+

Model composition.

+

This module contains utilities for merging multiple modeling steps into a single pipeline. Although +pipelines are not the only way to process a stream of data, we highly encourage you to use them.

+

Classes

+ +

Functions

+ +

conf

+

Conformal predictions. This modules contains wrappers to enable conformal predictions on any +regressor or classifier.

+ +

covariance

+

Online estimation of covariance and precision matrices.

+ +

datasets

+

Datasets.

+

This module contains a collection of datasets for multiple tasks: classification, regression, etc. +The data corresponds to popular datasets and are conveniently wrapped to easily iterate over +the data in a stream fashion. All datasets have fixed size. Please refer to river.synth if you +are interested in infinite synthetic data generators.

+

Regression

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameSamplesFeatures
AirlinePassengers1441
Bikes182,4708
ChickWeights5783
MovieLens100K100,00010
Restaurants252,1087
Taxis1,458,6448
TrumpApproval1,0016
WaterFlow1,2681
+

Binary classification

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameSamplesFeaturesSparse
Bananas5,3002
CreditCard284,80730
Elec245,3128
Higgs11,000,00028
HTTP567,4983
MaliciousURL2,396,1303,231,961✔️
Phishing1,2509
SMSSpam5,5741
SMTP95,1563
TREC0775,4195
+

Multi-class classification

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameSamplesFeaturesClasses
ImageSegments2,310187
Insects52,848336
Keystroke20,4003151
+

Multi-output binary classification

+ + + + + + + + + + + + + + + + + +
NameSamplesFeaturesOutputs
Music593726
+

Multi-output regression

+ + + + + + + + + + + + + + + + + +
NameSamplesFeaturesOutputs
SolarFlare1,066103
+

base

+ +

synth

+

Synthetic datasets.

+

Each synthetic dataset is a stream generator. The benefit of using a generator is that they do not +store the data and each data sample is generated on the fly. Except for a couple of methods, +the majority of these methods are infinite data generators.

+

Binary classification

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameFeatures
Agrawal9
AnomalySine2
ConceptDriftStream9
Hyperplane10
Mixed4
SEA3
Sine2
STAGGER3
+

Regression

+ + + + + + + + + + + + + + + + + + + + + + + + + +
NameFeatures
Friedman10
FriedmanDrift10
Mv10
Planes2D10
+

Multi-class classification

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameFeaturesClasses
LED710
LEDDrift710
RandomRBF102
RandomRBFDrift102
RandomTree102
Waveform213
+

Multi-output binary classification

+ + + + + + + + + + + + + + + +
NameFeaturesOutputs
Logical23
+

drift

+

Concept Drift Detection.

+

This module contains concept drift detection methods. The purpose of a drift detector is to raise +an alarm if the data distribution changes. A good drift detector method is the one that maximizes +the true positives while keeping the number of false positives to a minimum.

+ +

binary

+

Drift detection for binary data.

+ +

datasets

+ +

dummy

+

Dummy estimators.

+

This module is here for testing purposes, as well as providing baseline performances.

+ +

ensemble

+

Ensemble learning.

+

Broadly speaking, there are two kinds of ensemble approaches. There are those that copy a single +model several times and aggregate the predictions of said copies. This includes bagging as well as +boosting. Then there are those that are composed of an arbitrary list of models, and can therefore +aggregate predictions from different kinds of models.

+ +

evaluate

+

Model evaluation.

+

This module provides utilities to evaluate an online model. The goal is to reproduce a real-world +scenario with high fidelity. The core function of this module is progressive_val_score, which +allows to evaluate a model via progressive validation.

+

This module also exposes "tracks". A track is a predefined combination of a dataset and one or more +metrics. This allows a principled manner to compare models with each other. For instance, +the RegressionTrack contains several datasets and metrics to evaluate regression models. There is +also a bare Track class to implement a custom track. The benchmarks directory at the root of +the River repository uses these tracks.

+

Classes

+ +

Functions

+ +

facto

+

Factorization machines.

+ +

feature_extraction

+

Feature extraction.

+

This module can be used to extract information from raw features. This includes encoding +categorical data as well as looking at interactions between existing features. This differs from +the preprocessing module, in that the latter's purpose is rather to clean the data so that it may +be processed by a particular machine learning algorithm.

+ +

feature_selection

+

Feature selection.

+ +

forest

+

This module implements forest-based classifiers and regressors.

+ +

imblearn

+

Sampling methods.

+ +

linear_model

+

Linear models.

+ +

base

+ +

metrics

+

Evaluation metrics.

+

All the metrics are updated one sample at a time. This way we can track performance of +predictive methods over time.

+

Note that all metrics have a revert method, enabling them to be wrapped in utils.Rolling. +This allows computirng rolling metrics:

+
from river import metrics, utils
+
+y_true = [True, False, True, True]
+y_pred = [False, False, True, True]
+
+metric = utils.Rolling(metrics.Accuracy(), window_size=3)
+
+for yt, yp in zip(y_true, y_pred):
+    print(metric.update(yt, yp))
+
+
Accuracy: 0.00%
+Accuracy: 50.00%
+Accuracy: 66.67%
+Accuracy: 100.00%
+
+ +

base

+ +

multioutput

+

Metrics for multi-output learning.

+ +

base

+ +

misc

+

Miscellaneous.

+

This module essentially regroups some implementations that have nowhere else to go.

+ +

model_selection

+

Model selection.

+

This module regroups a variety of methods that may be used for performing model selection. An +model selector is provided with a list of models. These are called "experts" in the expert learning +literature. The model selector's goal is to perform at least as well as the best model. Indeed, +initially, the best model is not known. The performance of each model becomes more apparent as time +goes by. Different strategies are possible, each one offering a different tradeoff in terms of +accuracy and computational performance.

+

Model selection can be used for tuning the hyperparameters of a model. This may be done by creating +a copy of the model for each set of hyperparameters, and treating each copy as a separate model. +The utils.expand_param_grid function can be used for this purpose.

+ +

base

+ +

multiclass

+

Multi-class classification.

+ +

multioutput

+

Multi-output models.

+ +

naive_bayes

+

Naive Bayes algorithms.

+ +

neighbors

+

Neighbors-based learning.

+

Also known as lazy methods. In these methods, generalisation of the training data is delayed +until a query is received.

+ +

neural_net

+

Neural networks.

+ +

activations

+ +

optim

+

Stochastic optimization.

+ +

base

+ +

initializers

+

Weight initializers.

+ +

losses

+

Loss functions.

+

Each loss function is intended to work with both single values as well as numpy vectors.

+ +

schedulers

+

Learning rate schedulers.

+ +

preprocessing

+

Feature preprocessing.

+

The purpose of this module is to modify an existing set of features so that they can be processed +by a machine learning algorithm. This may be done by scaling numeric parts of the data or by +one-hot encoding categorical features. The difference with the feature_extraction module is that +the latter extracts new information from the data

+ +

proba

+

Probability distributions.

+ +

base

+ +

reco

+

Recommender systems module.

+

Recommender systems (recsys for short) is a large topic. This module is far from comprehensive. It +simply provides models which can contribute towards building a recommender system.

+

A typical recommender system is made up of a retrieval phase, followed by a ranking phase. The +output of the retrieval phase is a shortlist of the catalogue of items. The items in the shortlist +are then usually ranked according to the expected preference the user will have for each item. This +module focuses on the ranking phase.

+

Models which inherit from the Ranker class have a rank method. This allows sorting a set of +items for a given user. Each model also has a learn_one(user, item, y, context) which allows +learning user preferences. The y parameter is a reward value, the nature of which depends is +specific to each and every recommendation task. Typically the reward is a number or a boolean +value. It is up to the user to determine how to translate a user session into training data.

+ +

base

+ +

rules

+

Decision rules-based algorithms.

+ +

sketch

+

Data containers and collections for sequential data.

+

This module has summary and sketch structures that operate with constrained amounts +of memory and processing time.

+ +

stats

+

Running statistics

+ +

base

+ +

stream

+

Streaming utilities.

+

The module includes tools to iterate over data streams.

+

Classes

+ +

Functions

+ +

time_series

+

Time series forecasting.

+

Classes

+ +

Functions

+ +

base

+ +

tree

+

This module implements incremental Decision Tree (iDT) algorithms for handling classification +and regression tasks.

+

Each family of iDT will be presented in a dedicated section.

+

At any moment, iDT might face situations where an input feature previously used to make +a split decision is missing in an incoming sample. In this case, the most traversed path is +selected to pass down the instance. Moreover, in the case of nominal features, if a new category +arises and the feature is used in a decision node, a new branch is created to accommodate the new +value.

+

1. Hoeffding Trees

+

This family of iDT algorithms use the Hoeffding Bound to determine whether or not the +incrementally computed best split candidates would be equivalent to the ones obtained in a +batch-processing fashion.

+

All the available Hoeffding Tree (HT) implementation share some common functionalities:

+
    +
  • +

    Set the maximum tree depth allowed (max_depth).

    +
  • +
  • +

    Handle Active and Inactive nodes: Active learning nodes update their own +internal state to improve predictions and monitor input features to perform split +attempts. Inactive learning nodes do not update their internal state and only keep the +predictors; they are used to save memory in the tree (max_size).

    +
  • +
  • +

    Enable/disable memory management.

    +
  • +
  • +

    Define strategies to sort leaves according to how likely they are going to be split. +This enables deactivating non-promising leaves to save memory.

    +
  • +
  • +

    Disabling ‘poor’ attributes to save memory and speed up tree construction. +A poor attribute is an input feature whose split merit is much smaller than the current +best candidate. Once a feature is disabled, the tree stops saving statistics necessary +to split such a feature.

    +
  • +
  • +

    Define properties to access leaf prediction strategies, split criteria, and other +relevant characteristics.

    +
  • +
+

2. Stochastic Gradient Trees

+

Stochastic Gradient Trees (SGT) directly optimize a loss function, rather than relying on split +heuristics to guide the tree growth. F-tests are performed do decide whether a leaf should be +expanded or its prediction value should be updated.

+

SGTs can deal with binary classification and single-target regression. They also support +dynamic and static feature quantizers to deal with numerical inputs.

+ +

base

+

This module defines generic branch and leaf implementations. These should be used in River by each +tree-based model. Using these classes makes the code more DRY. The only exception for not doing so +would be for performance, whereby a tree-based model uses a bespoke implementation.

+

This module defines a bunch of methods to ease the manipulation and diagnostic of trees. Its +intention is to provide utilities for walking over a tree and visualizing it.

+ +

splitter

+

This module implements the Attribute Observers (AO) (or tree splitters) that are used by the +Hoeffding Trees (HT). It also implements the feature quantizers (FQ) used by Stochastic Gradient +Trees (SGT). AOs are a core aspect of the HTs construction, and might represent one of the major +bottlenecks when building the trees. The same holds for SGTs and FQs. The correct choice and setup +of a splitter might result in significant differences in the running time and memory usage of the +incremental decision trees.

+

AOs for classification and regression trees can be differentiated by using the property +is_target_class (True for splitters designed to classification tasks). An error will be raised +if one tries to use a classification splitter in a regression tree and vice-versa. +Lastly, AOs cannot be used in SGT and FQs cannot be used in Hoeffding Trees. So, care must be taken +when choosing the correct feature splitter.

+ +

utils

+

Shared utility classes and functions

+

Classes

+ +

Functions

+ +

math

+

Mathematical utility functions (intended for internal purposes).

+

A lot of this is experimental and has a high probability of changing in the future.

+ +

norm

+ +

pretty

+

Helper functions for making things readable by humans.

+ +

random

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/AdaptiveStandardScaler/index.html b/0.19.0/api/preprocessing/AdaptiveStandardScaler/index.html new file mode 100644 index 0000000000..8664f9b327 --- /dev/null +++ b/0.19.0/api/preprocessing/AdaptiveStandardScaler/index.html @@ -0,0 +1,3781 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AdaptiveStandardScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AdaptiveStandardScaler

+

Scales data using exponentially weighted moving average and variance.

+

Under the hood, a exponentially weighted running mean and variance are maintained for each feature. This can potentially provide better results for drifting data in comparison to preprocessing.StandardScaler. Indeed, the latter computes a global mean and variance for each feature, whereas this scaler weights data in proportion to their recency.

+

Parameters

+
    +
  • +

    fading_factor

    +

    Default0.3

    +

    This parameter is passed to stats.EWVar. It is expected to be in [0, 1]. More weight is assigned to recent samples the closer fading_factor is to 1.

    +
  • +
+

Examples

+

Consider the following series which contains a positive trend.

+

import random
+
+random.seed(42)
+X = [
+    {'x': random.uniform(4 + i, 6 + i)}
+    for i in range(8)
+]
+for x in X:
+    print(x)
+
+
{'x': 5.278}
+{'x': 5.050}
+{'x': 6.550}
+{'x': 7.446}
+{'x': 9.472}
+{'x': 10.353}
+{'x': 11.784}
+{'x': 11.173}
+

+

This scaler works well with this kind of data because it uses statistics that assign higher +weight to more recent data.

+

from river import preprocessing
+
+scaler = preprocessing.AdaptiveStandardScaler(fading_factor=.6)
+
+for x in X:
+    print(scaler.learn_one(x).transform_one(x))
+
+
{'x': 0.0}
+{'x': -0.816}
+{'x': 0.812}
+{'x': 0.695}
+{'x': 0.754}
+{'x': 0.598}
+{'x': 0.651}
+{'x': 0.124}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/Binarizer/index.html b/0.19.0/api/preprocessing/Binarizer/index.html new file mode 100644 index 0000000000..a8795b9b30 --- /dev/null +++ b/0.19.0/api/preprocessing/Binarizer/index.html @@ -0,0 +1,3764 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Binarizer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Binarizer

+

Binarizes the data to 0 or 1 according to a threshold.

+

Parameters

+
    +
  • +

    threshold

    +

    Default0.0

    +

    Values above this are replaced by 1 and the others by 0.

    +
  • +
  • +

    dtype

    +

    Default<class 'bool'>

    +

    The desired data type to apply.

    +
  • +
+

Examples

+

import river
+import numpy as np
+
+rng = np.random.RandomState(42)
+X = [{'x1': v, 'x2': int(v)} for v in rng.uniform(low=-4, high=4, size=6)]
+
+binarizer = river.preprocessing.Binarizer()
+for x in X:
+    print(binarizer.learn_one(x).transform_one(x))
+
+
{'x1': False, 'x2': False}
+{'x1': True, 'x2': True}
+{'x1': True, 'x2': True}
+{'x1': True, 'x2': False}
+{'x1': False, 'x2': False}
+{'x1': False, 'x2': False}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/FeatureHasher/index.html b/0.19.0/api/preprocessing/FeatureHasher/index.html new file mode 100644 index 0000000000..815683d5be --- /dev/null +++ b/0.19.0/api/preprocessing/FeatureHasher/index.html @@ -0,0 +1,3771 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FeatureHasher - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FeatureHasher

+

Implements the hashing trick.

+

Each pair of (name, value) features is hashed into a random integer. A module operator is then used to make sure the hash is in a certain range. We use the Murmurhash implementation from scikit-learn.

+

Parameters

+
    +
  • +

    n_features

    +

    Default1048576

    +

    The number by which each hash will be moduloed by.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Set the seed to produce identical results.

    +
  • +
+

Examples

+

import river
+
+hasher = river.preprocessing.FeatureHasher(n_features=10, seed=42)
+
+X = [
+    {'dog': 1, 'cat': 2, 'elephant': 4},
+    {'dog': 2, 'run': 5}
+]
+for x in X:
+    print(hasher.transform_one(x))
+
+
Counter({1: 4, 9: 2, 8: 1})
+Counter({4: 5, 8: 2})
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/GaussianRandomProjector/index.html b/0.19.0/api/preprocessing/GaussianRandomProjector/index.html new file mode 100644 index 0000000000..e97dc1dc4f --- /dev/null +++ b/0.19.0/api/preprocessing/GaussianRandomProjector/index.html @@ -0,0 +1,3792 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GaussianRandomProjector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GaussianRandomProjector

+

Gaussian random projector.

+

This transformer reduces the dimensionality of inputs through Gaussian random projection.

+

The components of the random projections matrix are drawn from N(0, 1 / n_components).

+

Parameters

+
    +
  • +

    n_components

    +

    Default10

    +

    Number of components to project the data onto.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+model = preprocessing.GaussianRandomProjector(
+    n_components=3,
+    seed=42
+)
+
+for x, y in dataset:
+    x = model.transform_one(x)
+    print(x)
+    break
+
+
{0: -61289.37139206629, 1: 141312.51039283074, 2: 279165.99370457436}
+

+

model = (
+    preprocessing.GaussianRandomProjector(
+        n_components=5,
+        seed=42
+    ) |
+    preprocessing.StandardScaler() |
+    linear_model.LinearRegression()
+)
+evaluate.progressive_val_score(dataset, model, metrics.MAE())
+
+
MAE: 0.933502
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/LDA/index.html b/0.19.0/api/preprocessing/LDA/index.html new file mode 100644 index 0000000000..e944f28cd9 --- /dev/null +++ b/0.19.0/api/preprocessing/LDA/index.html @@ -0,0 +1,3898 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + LDA - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

LDA

+

Online Latent Dirichlet Allocation with Infinite Vocabulary.

+

Latent Dirichlet allocation (LDA) is a probabilistic approach for exploring topics in document collections. The key advantage of this variant is that it assumes an infinite vocabulary, meaning that the set of tokens does not have to known in advance, as opposed to the implementation from sklearn The results produced by this implementation are identical to those from the original implementation proposed by the method's authors.

+

This class takes as input token counts. Therefore, it requires you to tokenize beforehand. You can do so by using a feature_extraction.BagOfWords instance, as shown in the example below.

+

Parameters

+
    +
  • +

    n_components

    +

    Default10

    +

    Number of topics of the latent Drichlet allocation.

    +
  • +
  • +

    number_of_documents

    +

    Default1000000.0

    +

    Estimated number of documents.

    +
  • +
  • +

    alpha_theta

    +

    Default0.5

    +

    Hyper-parameter of the Dirichlet distribution of topics.

    +
  • +
  • +

    alpha_beta

    +

    Default100.0

    +

    Hyper-parameter of the Dirichlet process of distribution over words.

    +
  • +
  • +

    tau

    +

    Default64.0

    +

    Learning inertia to prevent premature convergence.

    +
  • +
  • +

    kappa

    +

    Default0.75

    +

    The learning rate kappa controls how quickly new parameters estimates replace the old ones. kappa ∈ (0.5, 1] is required for convergence.

    +
  • +
  • +

    vocab_prune_interval

    +

    Default10

    +

    Interval at which to refresh the words topics distribution.

    +
  • +
  • +

    number_of_samples

    +

    Default10

    +

    Number of iteration to computes documents topics distribution.

    +
  • +
  • +

    ranking_smooth_factor

    +

    Default1e-12

    +
  • +
  • +

    burn_in_sweeps

    +

    Default5

    +

    Number of iteration necessaries while analyzing a document before updating document topics distribution.

    +
  • +
  • +

    maximum_size_vocabulary

    +

    Default4000

    +

    Maximum size of the stored vocabulary.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number seed used for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    counter (int)

    +

    The current number of observed documents.

    +
  • +
  • +

    truncation_size_prime (int)

    +

    Number of distincts words stored in the vocabulary. Updated before processing a document.

    +
  • +
  • +

    truncation_size (int)

    +

    Number of distincts words stored in the vocabulary. Updated after processing a document.

    +
  • +
  • +

    word_to_index (dict)

    +

    Words as keys and indexes as values.

    +
  • +
  • +

    index_to_word (dict)

    +

    Indexes as keys and words as values.

    +
  • +
  • +

    nu_1 (dict)

    +

    Weights of the words. Component of the variational inference.

    +
  • +
  • +

    nu_2 (dict)

    +

    Weights of the words. Component of the variational inference.

    +
  • +
+

Examples

+

from river import compose
+from river import feature_extraction
+from river import preprocessing
+
+X = [
+   'weather cold',
+   'weather hot dry',
+   'weather cold rainy',
+   'weather hot',
+   'weather cold humid',
+]
+
+lda = compose.Pipeline(
+    feature_extraction.BagOfWords(),
+    preprocessing.LDA(
+        n_components=2,
+        number_of_documents=60,
+        seed=42
+    )
+)
+
+for x in X:
+    lda = lda.learn_one(x)
+    topics = lda.transform_one(x)
+    print(topics)
+
+
{0: 0.5, 1: 2.5}
+{0: 2.499..., 1: 1.5}
+{0: 0.5, 1: 3.5}
+{0: 0.5, 1: 2.5}
+{0: 1.5, 1: 2.5}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+learn_transform_one +

Equivalent to lda.learn_one(x).transform_one(x)s, but faster.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: Component attributions for the input document.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/MaxAbsScaler/index.html b/0.19.0/api/preprocessing/MaxAbsScaler/index.html new file mode 100644 index 0000000000..422c21d8a4 --- /dev/null +++ b/0.19.0/api/preprocessing/MaxAbsScaler/index.html @@ -0,0 +1,3767 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MaxAbsScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MaxAbsScaler

+

Scales the data to a [-1, 1] range based on absolute maximum.

+

Under the hood a running absolute max is maintained. This scaler is meant for data that is already centered at zero or sparse data. It does not shift/center the data, and thus does not destroy any sparsity.

+

Attributes

+
    +
  • +

    abs_max (dict)

    +

    Mapping between features and instances of stats.AbsMax.

    +
  • +
+

Examples

+

import random
+from river import preprocessing
+
+random.seed(42)
+X = [{'x': random.uniform(8, 12)} for _ in range(5)]
+for x in X:
+    print(x)
+
+
{'x': 10.557707}
+{'x': 8.100043}
+{'x': 9.100117}
+{'x': 8.892842}
+{'x': 10.945884}
+

+

scaler = preprocessing.MaxAbsScaler()
+
+for x in X:
+    print(scaler.learn_one(x).transform_one(x))
+
+
{'x': 1.0}
+{'x': 0.767216}
+{'x': 0.861940}
+{'x': 0.842308}
+{'x': 1.0}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/MinMaxScaler/index.html b/0.19.0/api/preprocessing/MinMaxScaler/index.html new file mode 100644 index 0000000000..31a80b2717 --- /dev/null +++ b/0.19.0/api/preprocessing/MinMaxScaler/index.html @@ -0,0 +1,3771 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MinMaxScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MinMaxScaler

+

Scales the data to a fixed range from 0 to 1.

+

Under the hood a running min and a running peak to peak (max - min) are maintained.

+

Attributes

+
    +
  • +

    min (dict)

    +

    Mapping between features and instances of stats.Min.

    +
  • +
  • +

    max (dict)

    +

    Mapping between features and instances of stats.Max.

    +
  • +
+

Examples

+

import random
+from river import preprocessing
+
+random.seed(42)
+X = [{'x': random.uniform(8, 12)} for _ in range(5)]
+for x in X:
+    print(x)
+
+
{'x': 10.557707}
+{'x': 8.100043}
+{'x': 9.100117}
+{'x': 8.892842}
+{'x': 10.945884}
+

+

scaler = preprocessing.MinMaxScaler()
+
+for x in X:
+    print(scaler.learn_one(x).transform_one(x))
+
+
{'x': 0.0}
+{'x': 0.0}
+{'x': 0.406920}
+{'x': 0.322582}
+{'x': 1.0}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/Normalizer/index.html b/0.19.0/api/preprocessing/Normalizer/index.html new file mode 100644 index 0000000000..1a9d33fc20 --- /dev/null +++ b/0.19.0/api/preprocessing/Normalizer/index.html @@ -0,0 +1,3759 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Normalizer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Normalizer

+

Scales a set of features so that it has unit norm.

+

This is particularly useful when used after a feature_extraction.TFIDF.

+

Parameters

+
    +
  • +

    order

    +

    Default2

    +

    Order of the norm (e.g. 2 corresponds to the \(L^2\) norm).

    +
  • +
+

Examples

+

from river import preprocessing
+from river import stream
+
+scaler = preprocessing.Normalizer(order=2)
+
+X = [[4, 1, 2, 2],
+     [1, 3, 9, 3],
+     [5, 7, 5, 1]]
+
+for x, _ in stream.iter_array(X):
+    print(scaler.transform_one(x))
+
+
{0: 0.8, 1: 0.2, 2: 0.4, 3: 0.4}
+{0: 0.1, 1: 0.3, 2: 0.9, 3: 0.3}
+{0: 0.5, 1: 0.7, 2: 0.5, 3: 0.1}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/OneHotEncoder/index.html b/0.19.0/api/preprocessing/OneHotEncoder/index.html new file mode 100644 index 0000000000..80fa5544d8 --- /dev/null +++ b/0.19.0/api/preprocessing/OneHotEncoder/index.html @@ -0,0 +1,3938 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OneHotEncoder - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OneHotEncoder

+

One-hot encoding.

+

This transformer will encode every feature it is provided with. If a list or set is provided, this transformer will encode every entry in the list/set. You can apply it to a subset of features by composing it with compose.Select or compose.SelectType.

+

Parameters

+
    +
  • +

    drop_zeros

    +

    DefaultFalse

    +

    Whether or not 0s should be made explicit or not.

    +
  • +
  • +

    drop_first

    +

    DefaultFalse

    +

    Whether to get k - 1 dummies out of k categorical levels by removing the first key. This is useful in some statistical models where perfectly collinear features cause problems.

    +
  • +
+

Examples

+

Let us first create an example dataset.

+

from pprint import pprint
+import random
+import string
+
+random.seed(42)
+alphabet = list(string.ascii_lowercase)
+X = [
+    {
+        'c1': random.choice(alphabet),
+        'c2': random.choice(alphabet),
+    }
+    for _ in range(4)
+]
+pprint(X)
+
+
[{'c1': 'u', 'c2': 'd'},
+    {'c1': 'a', 'c2': 'x'},
+    {'c1': 'i', 'c2': 'h'},
+    {'c1': 'h', 'c2': 'e'}]
+

+

e can now apply one-hot encoding. All the provided are one-hot encoded, there is therefore +no need to specify which features to encode.

+

from river import preprocessing
+
+oh = preprocessing.OneHotEncoder()
+for x in X[:2]:
+    oh = oh.learn_one(x)
+    pprint(oh.transform_one(x))
+
+
{'c1_u': 1, 'c2_d': 1}
+{'c1_a': 1, 'c1_u': 0, 'c2_d': 0, 'c2_x': 1}
+

+

The drop_zeros parameter can be set to True if you don't want the past features to be included +in the output. Otherwise, all the past features will be included in the output.

+

oh = preprocessing.OneHotEncoder(drop_zeros=True)
+for x in X:
+    oh = oh.learn_one(x)
+    pprint(oh.transform_one(x))
+
+
{'c1_u': 1, 'c2_d': 1}
+{'c1_a': 1, 'c2_x': 1}
+{'c1_i': 1, 'c2_h': 1}
+{'c1_h': 1, 'c2_e': 1}
+

+

You can encode only k - 1 features out of k by setting drop_first to True.

+

oh = preprocessing.OneHotEncoder(drop_first=True, drop_zeros=True)
+for x in X:
+    oh = oh.learn_one(x)
+    pprint(oh.transform_one(x))
+
+
{'c2_d': 1}
+{'c2_x': 1}
+{'c2_h': 1}
+{'c2_e': 1}
+

+

A subset of the features can be one-hot encoded by piping a compose.Select into the +OneHotEncoder.

+

from river import compose
+
+pp = compose.Select('c1') | preprocessing.OneHotEncoder()
+
+for x in X:
+    pp = pp.learn_one(x)
+    pprint(pp.transform_one(x))
+
+
{'c1_u': 1}
+{'c1_a': 1, 'c1_u': 0}
+{'c1_a': 0, 'c1_i': 1, 'c1_u': 0}
+{'c1_a': 0, 'c1_h': 1, 'c1_i': 0, 'c1_u': 0}
+

+

You can preserve the c2 feature by using a union:

+

pp = compose.Select('c1') | preprocessing.OneHotEncoder()
+pp += compose.Select('c2')
+
+for x in X:
+    pp = pp.learn_one(x)
+    pprint(pp.transform_one(x))
+
+
{'c1_u': 1, 'c2': 'd'}
+{'c1_a': 1, 'c1_u': 0, 'c2': 'x'}
+{'c1_a': 0, 'c1_i': 1, 'c1_u': 0, 'c2': 'h'}
+{'c1_a': 0, 'c1_h': 1, 'c1_i': 0, 'c1_u': 0, 'c2': 'e'}
+

+

Similar to the above examples, we can also pass values as a list. This will one-hot +encode all of the entries individually.

+

X = [{'c1': ['u', 'a'], 'c2': ['d']},
+    {'c1': ['a', 'b'], 'c2': ['x']},
+    {'c1': ['i'], 'c2': ['h', 'z']},
+    {'c1': ['h', 'b'], 'c2': ['e']}]
+
+oh = preprocessing.OneHotEncoder(drop_zeros=True)
+for x in X:
+    oh = oh.learn_one(x)
+    pprint(oh.transform_one(x))
+
+
{'c1_a': 1, 'c1_u': 1, 'c2_d': 1}
+{'c1_a': 1, 'c1_b': 1, 'c2_x': 1}
+{'c1_i': 1, 'c2_h': 1, 'c2_z': 1}
+{'c1_b': 1, 'c1_h': 1, 'c2_e': 1}
+

+

Processing mini-batches is also possible.

+

from pprint import pprint
+import random
+import string
+
+random.seed(42)
+alphabet = list(string.ascii_lowercase)
+X = pd.DataFrame(
+    {
+        'c1': random.choice(alphabet),
+        'c2': random.choice(alphabet),
+    }
+    for _ in range(3)
+)
+X
+
+
  c1 c2
+0  u  d
+1  a  x
+2  i  h
+

+

oh = preprocessing.OneHotEncoder(drop_zeros=True)
+df = oh.transform_many(X)
+df.sort_index(axis="columns")
+
+
   c1_a  c1_i  c1_u  c2_d  c2_h  c2_x
+0     0     0     1     1     0     0
+1     1     0     0     0     0     1
+2     0     1     0     0     1     0
+

+

oh = preprocessing.OneHotEncoder(drop_zeros=True, drop_first=True)
+df = oh.transform_many(X)
+df.sort_index(axis="columns")
+
+
   c1_i  c1_u  c2_d  c2_h  c2_x
+0     0     1     1     0     0
+1     0     0     0     0     1
+2     1     0     0     1     0
+

+

Here's an example where the zeros are kept:

+

oh = preprocessing.OneHotEncoder(drop_zeros=False)
+X_init = pd.DataFrame([{"c1": "Oranges", "c2": "Apples"}])
+oh = oh.learn_many(X_init)
+oh = oh.learn_many(X)
+
+df = oh.transform_many(X)
+df.sort_index(axis="columns")
+
+
   c1_Oranges  c1_a  c1_i  c1_u  c2_Apples  c2_d  c2_h  c2_x
+0           0     0     0     1          0     1     0     0
+1           0     1     0     0          0     0     0     1
+2           0     0     1     0          0     0     1     0
+

+

df.dtypes.sort_index()
+
+
c1_Oranges    Sparse[uint8, 0]
+c1_a          Sparse[uint8, 0]
+c1_i          Sparse[uint8, 0]
+c1_u          Sparse[uint8, 0]
+c2_Apples     Sparse[uint8, 0]
+c2_d          Sparse[uint8, 0]
+c2_h          Sparse[uint8, 0]
+c2_x          Sparse[uint8, 0]
+dtype: object
+

+

Methods

+
+learn_many +

Update with a mini-batch of features.

+

A lot of transformers don't actually have to do anything during the learn_many step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_many can override this method.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

Transformer: self

+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Transform a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A new DataFrame.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • y — defaults to None
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/OrdinalEncoder/index.html b/0.19.0/api/preprocessing/OrdinalEncoder/index.html new file mode 100644 index 0000000000..77cd7c0f60 --- /dev/null +++ b/0.19.0/api/preprocessing/OrdinalEncoder/index.html @@ -0,0 +1,3843 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + OrdinalEncoder - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

OrdinalEncoder

+

Ordinal encoder.

+

This transformer maps each feature to integers. It can useful when a feature has string values (i.e. categorical variables).

+

Parameters

+
    +
  • +

    unknown_value

    +

    Typeint | None

    +

    Default0

    +

    The value to use for unknown categories seen during transform_one. Unknown categories will be mapped to an integer once they are seen during learn_one. This value can be set to None in order to categories to None if they've never been seen before.

    +
  • +
  • +

    none_value

    +

    Typeint

    +

    Default-1

    +

    The value to encode None with.

    +
  • +
+

Attributes

+
    +
  • +

    categories

    +

    A dict of dicts. The outer dict maps each feature to its inner dict. The inner dict maps each category to its code.

    +
  • +
+

Examples

+

from river import preprocessing
+
+X = [
+    {"country": "France", "place": "Taco Bell"},
+    {"country": None, "place": None},
+    {"country": "Sweden", "place": "Burger King"},
+    {"country": "France", "place": "Burger King"},
+    {"country": "Russia", "place": "Starbucks"},
+    {"country": "Russia", "place": "Starbucks"},
+    {"country": "Sweden", "place": "Taco Bell"},
+    {"country": None, "place": None},
+]
+
+encoder = preprocessing.OrdinalEncoder()
+for x in X:
+    print(encoder.transform_one(x))
+    encoder = encoder.learn_one(x)
+
+
{'country': 0, 'place': 0}
+{'country': -1, 'place': -1}
+{'country': 0, 'place': 0}
+{'country': 1, 'place': 2}
+{'country': 0, 'place': 0}
+{'country': 3, 'place': 3}
+{'country': 2, 'place': 1}
+{'country': -1, 'place': -1}
+

+

xb1 = pd.DataFrame(X[0:4], index=[0, 1, 2, 3])
+xb2 = pd.DataFrame(X[4:8], index=[4, 5, 6, 7])
+
+encoder = preprocessing.OrdinalEncoder()
+encoder.transform_many(xb1)
+
+
   country  place
+0        0      0
+1       -1     -1
+2        0      0
+3        0      0
+

+

encoder = encoder.learn_many(xb1)
+encoder.transform_many(xb2)
+
+
   country  place
+4        0      0
+5        0      0
+6        2      1
+7       -1     -1
+

+

Methods

+
+learn_many +

Update with a mini-batch of features.

+

A lot of transformers don't actually have to do anything during the learn_many step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_many can override this method.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
  • y — defaults to None
  • +
+

Returns

+

Transformer: self

+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Transform a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+

Returns

+

pd.DataFrame: A new DataFrame.

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/PredClipper/index.html b/0.19.0/api/preprocessing/PredClipper/index.html new file mode 100644 index 0000000000..cc49ef78b6 --- /dev/null +++ b/0.19.0/api/preprocessing/PredClipper/index.html @@ -0,0 +1,3780 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PredClipper - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PredClipper

+

Clips the target after predicting.

+

Parameters

+
    +
  • +

    regressor

    +

    Typebase.Regressor

    +

    Regressor model for which to clip the predictions.

    +
  • +
  • +

    y_min

    +

    Typefloat

    +

    minimum value.

    +
  • +
  • +

    y_max

    +

    Typefloat

    +

    maximum value.

    +
  • +
+

Examples

+

from river import linear_model
+from river import preprocessing
+
+dataset = (
+    ({'a': 2, 'b': 4}, 80),
+    ({'a': 3, 'b': 5}, 100),
+    ({'a': 4, 'b': 6}, 120)
+)
+
+model = preprocessing.PredClipper(
+    regressor=linear_model.LinearRegression(),
+    y_min=0,
+    y_max=200
+)
+
+for x, y in dataset:
+    _ = model.learn_one(x, y)
+
+model.predict_one({'a': -100, 'b': -200})
+
+
0
+

+

model.predict_one({'a': 50, 'b': 60})
+
+
200
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • kwargs
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
  • kwargs
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/PreviousImputer/index.html b/0.19.0/api/preprocessing/PreviousImputer/index.html new file mode 100644 index 0000000000..90ef3dedf7 --- /dev/null +++ b/0.19.0/api/preprocessing/PreviousImputer/index.html @@ -0,0 +1,3733 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PreviousImputer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PreviousImputer

+

Imputes missing values by using the most recent value.

+

Examples

+

from river import preprocessing
+
+imputer = preprocessing.PreviousImputer()
+
+imputer = imputer.learn_one({'x': 1, 'y': 2})
+imputer.transform_one({'y': None})
+
+
{'y': 2}
+

+

imputer.transform_one({'x': None})
+
+
{'x': 1}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/RobustScaler/index.html b/0.19.0/api/preprocessing/RobustScaler/index.html new file mode 100644 index 0000000000..1415402b12 --- /dev/null +++ b/0.19.0/api/preprocessing/RobustScaler/index.html @@ -0,0 +1,3808 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RobustScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RobustScaler

+

Scale features using statistics that are robust to outliers.

+

This Scaler removes the median and scales the data according to the interquantile range.

+

Parameters

+
    +
  • +

    with_centering

    +

    DefaultTrue

    +

    Whether to centre the data before scaling.

    +
  • +
  • +

    with_scaling

    +

    DefaultTrue

    +

    Whether to scale data to IQR.

    +
  • +
  • +

    q_inf

    +

    Default0.25

    +

    Desired inferior quantile, must be between 0 and 1.

    +
  • +
  • +

    q_sup

    +

    Default0.75

    +

    Desired superior quantile, must be between 0 and 1.

    +
  • +
+

Attributes

+
    +
  • +

    median (dict)

    +

    Mapping between features and instances of stats.Quantile(0.5)`.

    +
  • +
  • +

    iqr (dict)

    +

    Mapping between features and instances of stats.IQR.

    +
  • +
+

Examples

+

from pprint import pprint
+import random
+from river import preprocessing
+
+random.seed(42)
+X = [{'x': random.uniform(8, 12)} for _ in range(5)]
+pprint(X)
+
+
[{'x': 10.557707},
+    {'x': 8.100043},
+    {'x': 9.100117},
+    {'x': 8.892842},
+    {'x': 10.945884}]
+

+

scaler = preprocessing.RobustScaler()
+
+for x in X:
+    print(scaler.learn_one(x).transform_one(x))
+
+
    {'x': 0.0}
+    {'x': -1.0}
+    {'x': 0.0}
+    {'x': -0.12449923287875722}
+    {'x': 1.1086595155704708}
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/SparseRandomProjector/index.html b/0.19.0/api/preprocessing/SparseRandomProjector/index.html new file mode 100644 index 0000000000..761392e437 --- /dev/null +++ b/0.19.0/api/preprocessing/SparseRandomProjector/index.html @@ -0,0 +1,3797 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SparseRandomProjector - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SparseRandomProjector

+

Sparse random projector.

+

This transformer reduces the dimensionality of inputs by projecting them onto a sparse random projection matrix.

+

Ping Li et al. recommend using a minimum density of 1 / sqrt(n_features). The transformer is not aware of how many features will be seen, so the user must specify the density manually.

+

Parameters

+
    +
  • +

    n_components

    +

    Default10

    +

    Number of components to project the data onto.

    +
  • +
  • +

    density

    +

    Default0.1

    +

    Density of the random projection matrix. The density is defined as the ratio of non-zero components in the matrix. It is equal to 1 - sparsity.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+model = preprocessing.SparseRandomProjector(
+    n_components=3,
+    seed=42
+)
+
+for x, y in dataset:
+    x = model.transform_one(x)
+    print(x)
+    break
+
+
{0: 92.89572746525327, 1: 1344540.5692342375, 2: 0}
+

+

model = (
+    preprocessing.SparseRandomProjector(
+        n_components=5,
+        seed=42
+    ) |
+    preprocessing.StandardScaler() |
+    linear_model.LinearRegression()
+)
+evaluate.progressive_val_score(dataset, model, metrics.MAE())
+
+
MAE: 1.292572
+

+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+
+
+
    +
  1. +

    D. Achlioptas. 2003. Database-friendly random projections: Johnson-Lindenstrauss with binary coins. Journal of Computer and System Sciences 66 (2003) 671-687 

    +
  2. +
  3. +

    Ping Li, Trevor J. Hastie, and Kenneth W. Church. 2006. Very sparse random projections. In Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD'06). ACM, New York, NY, USA, 287-296. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/StandardScaler/index.html b/0.19.0/api/preprocessing/StandardScaler/index.html new file mode 100644 index 0000000000..22293f1926 --- /dev/null +++ b/0.19.0/api/preprocessing/StandardScaler/index.html @@ -0,0 +1,3821 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + StandardScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

StandardScaler

+

Scales the data so that it has zero mean and unit variance.

+

Under the hood, a running mean and a running variance are maintained. The scaling is slightly different than when scaling the data in batch because the exact means and variances are not known in advance. However, this doesn't have a detrimental impact on performance in the long run.

+

This transformer supports mini-batches as well as single instances. In the mini-batch case, the number of columns and the ordering of the columns are allowed to change between subsequent calls. In other words, this transformer will keep working even if you add and/or remove features every time you call learn_many and transform_many.

+

Parameters

+
    +
  • +

    with_std

    +

    DefaultTrue

    +

    Whether or not each feature should be divided by its standard deviation.

    +
  • +
+

Examples

+

import random
+from river import preprocessing
+
+random.seed(42)
+X = [{'x': random.uniform(8, 12), 'y': random.uniform(8, 12)} for _ in range(6)]
+for x in X:
+    print(x)
+
+
{'x': 10.557, 'y': 8.100}
+{'x': 9.100, 'y': 8.892}
+{'x': 10.945, 'y': 10.706}
+{'x': 11.568, 'y': 8.347}
+{'x': 9.687, 'y': 8.119}
+{'x': 8.874, 'y': 10.021}
+

+

scaler = preprocessing.StandardScaler()
+
+for x in X:
+    print(scaler.learn_one(x).transform_one(x))
+
+
{'x': 0.0, 'y': 0.0}
+{'x': -0.999, 'y': 0.999}
+{'x': 0.937, 'y': 1.350}
+{'x': 1.129, 'y': -0.651}
+{'x': -0.776, 'y': -0.729}
+{'x': -1.274, 'y': 0.992}
+

+

This transformer also supports mini-batch updates. You can call learn_many and provide a +pandas.DataFrame:

+
import pandas as pd
+X = pd.DataFrame.from_dict(X)
+
+scaler = preprocessing.StandardScaler()
+scaler = scaler.learn_many(X[:3])
+scaler = scaler.learn_many(X[3:])
+
+

You can then call transform_many to scale a mini-batch of features:

+

scaler.transform_many(X)
+
+
    x         y
+0  0.444600 -0.933384
+1 -1.044259 -0.138809
+2  0.841106  1.679208
+3  1.477301 -0.685117
+4 -0.444084 -0.914195
+5 -1.274664  0.992296
+

+

Methods

+
+learn_many +

Update with a mini-batch of features.

+

Note that the update formulas for mean and variance are slightly different than in the single instance case, but they produce exactly the same result.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_many +

Scale a mini-batch of features.

+

Parameters

+
    +
  • X'pd.DataFrame'
  • +
+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/StatImputer/index.html b/0.19.0/api/preprocessing/StatImputer/index.html new file mode 100644 index 0000000000..f14b33e3f9 --- /dev/null +++ b/0.19.0/api/preprocessing/StatImputer/index.html @@ -0,0 +1,3879 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + StatImputer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

StatImputer

+

Replaces missing values with a statistic.

+

This transformer allows you to replace missing values with the value of a running statistic. During a call to learn_one, for each feature, a statistic is updated whenever a numeric feature is observed. When transform_one is called, each feature with a None value is replaced with the current value of the corresponding statistic.

+

Parameters

+
    +
  • +

    imputers

    +

    A list of tuples where each tuple has two elements. The first elements is a feature name and the second value is an instance of stats.base.Univariate. The second value can also be an arbitrary value, such as -1, in which case the missing values will be replaced with it.

    +
  • +
+

Examples

+
from river import preprocessing
+from river import stats
+
+

For numeric data, we can use a stats.Mean()` to replace missing values by the running +average of the previously seen values:

+

X = [
+    {'temperature': 1},
+    {'temperature': 8},
+    {'temperature': 3},
+    {'temperature': None},
+    {'temperature': 4}
+]
+
+imp = preprocessing.StatImputer(('temperature', stats.Mean()))
+
+for x in X:
+    imp = imp.learn_one(x)
+    print(imp.transform_one(x))
+
+
{'temperature': 1}
+{'temperature': 8}
+{'temperature': 3}
+{'temperature': 4.0}
+{'temperature': 4}
+

+

For discrete/categorical data, a common practice is to stats.Mode to replace missing +values by the most commonly seen value:

+

X = [
+    {'weather': 'sunny'},
+    {'weather': 'rainy'},
+    {'weather': 'sunny'},
+    {'weather': None},
+    {'weather': 'rainy'},
+    {'weather': 'rainy'},
+    {'weather': None}
+]
+
+imp = preprocessing.StatImputer(('weather', stats.Mode()))
+
+for x in X:
+    imp = imp.learn_one(x)
+    print(imp.transform_one(x))
+
+
{'weather': 'sunny'}
+{'weather': 'rainy'}
+{'weather': 'sunny'}
+{'weather': 'sunny'}
+{'weather': 'rainy'}
+{'weather': 'rainy'}
+{'weather': 'rainy'}
+

+

You can also choose to replace missing values with a constant value, as so:

+

imp = preprocessing.StatImputer(('weather', 'missing'))
+
+for x in X:
+    imp = imp.learn_one(x)
+    print(imp.transform_one(x))
+
+
{'weather': 'sunny'}
+{'weather': 'rainy'}
+{'weather': 'sunny'}
+{'weather': 'missing'}
+{'weather': 'rainy'}
+{'weather': 'rainy'}
+{'weather': 'missing'}
+

+

Multiple imputers can be defined by providing a tuple for each feature which you want to +impute:

+

X = [
+    {'weather': 'sunny', 'temperature': 8},
+    {'weather': 'rainy', 'temperature': 3},
+    {'weather': 'sunny', 'temperature': None},
+    {'weather': None, 'temperature': 4},
+    {'weather': 'snowy', 'temperature': -4},
+    {'weather': 'snowy', 'temperature': -3},
+    {'weather': 'snowy', 'temperature': -3},
+    {'weather': None, 'temperature': None}
+]
+
+imp = preprocessing.StatImputer(
+    ('temperature', stats.Mean()),
+    ('weather', stats.Mode())
+)
+
+for x in X:
+    imp = imp.learn_one(x)
+    print(imp.transform_one(x))
+
+
{'weather': 'sunny', 'temperature': 8}
+{'weather': 'rainy', 'temperature': 3}
+{'weather': 'sunny', 'temperature': 5.5}
+{'weather': 'sunny', 'temperature': 4}
+{'weather': 'snowy', 'temperature': -4}
+{'weather': 'snowy', 'temperature': -3}
+{'weather': 'snowy', 'temperature': -3}
+{'weather': 'snowy', 'temperature': 0.8333}
+

+

A sophisticated way to go about imputation is condition the statistics on a given feature. +For instance, we might want to replace a missing temperature with the average temperature +of a particular weather condition. As an example, consider the following dataset where the +temperature is missing, but not the weather condition:

+
X = [
+    {'weather': 'sunny', 'temperature': 8},
+    {'weather': 'rainy', 'temperature': 3},
+    {'weather': 'sunny', 'temperature': None},
+    {'weather': 'rainy', 'temperature': 4},
+    {'weather': 'sunny', 'temperature': 10},
+    {'weather': 'sunny', 'temperature': None},
+    {'weather': 'sunny', 'temperature': 12},
+    {'weather': 'rainy', 'temperature': None}
+]
+
+

Each missing temperature can be replaced with the average temperature of the corresponding +weather condition as so:

+

from river import compose
+
+imp = compose.Grouper(
+    preprocessing.StatImputer(('temperature', stats.Mean())),
+    by='weather'
+)
+
+for x in X:
+    imp = imp.learn_one(x)
+    print(imp.transform_one(x))
+
+
{'weather': 'sunny', 'temperature': 8}
+{'weather': 'rainy', 'temperature': 3}
+{'weather': 'sunny', 'temperature': 8.0}
+{'weather': 'rainy', 'temperature': 4}
+{'weather': 'sunny', 'temperature': 10}
+{'weather': 'sunny', 'temperature': 9.0}
+{'weather': 'sunny', 'temperature': 12}
+{'weather': 'rainy', 'temperature': 3.5}
+

+

Note that you can also create a Grouper with the * operator:

+
imp = preprocessing.StatImputer(('temperature', stats.Mean())) * 'weather'
+
+

Methods

+
+learn_one +

Update with a set of features x.

+

A lot of transformers don't actually have to do anything during the learn_one step because they are stateless. For this reason the default behavior of this function is to do nothing. Transformers that however do something during the learn_one can override this method.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

Transformer: self

+
+

+
+transform_one +

Transform a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict: The transformed values.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/TargetMinMaxScaler/index.html b/0.19.0/api/preprocessing/TargetMinMaxScaler/index.html new file mode 100644 index 0000000000..bce7a361c9 --- /dev/null +++ b/0.19.0/api/preprocessing/TargetMinMaxScaler/index.html @@ -0,0 +1,3761 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TargetMinMaxScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TargetMinMaxScaler

+

Applies min-max scaling to the target.

+

Parameters

+ +

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+model = (
+    preprocessing.StandardScaler() |
+    preprocessing.TargetMinMaxScaler(
+        regressor=linear_model.LinearRegression(intercept_lr=0.15)
+    )
+)
+metric = metrics.MSE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MSE: 2.018905
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/preprocessing/TargetStandardScaler/index.html b/0.19.0/api/preprocessing/TargetStandardScaler/index.html new file mode 100644 index 0000000000..e5db69c249 --- /dev/null +++ b/0.19.0/api/preprocessing/TargetStandardScaler/index.html @@ -0,0 +1,3761 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TargetStandardScaler - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TargetStandardScaler

+

Applies standard scaling to the target.

+

Parameters

+ +

Examples

+

from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+model = (
+    preprocessing.StandardScaler() |
+    preprocessing.TargetStandardScaler(
+        regressor=linear_model.LinearRegression(intercept_lr=0.15)
+    )
+)
+metric = metrics.MSE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MSE: 2.005999
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

The prediction.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/Beta/index.html b/0.19.0/api/proba/Beta/index.html new file mode 100644 index 0000000000..ba8ecd0713 --- /dev/null +++ b/0.19.0/api/proba/Beta/index.html @@ -0,0 +1,3596 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Beta - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Beta

+

Beta distribution for binary data.

+

A Beta distribution is very similar to a Bernoulli distribution in that it counts occurrences of boolean events. The differences lies in what is being measured. A Binomial distribution models the probability of an event occurring, whereas a Beta distribution models the probability distribution itself. In other words, it's a probability distribution over probability distributions.

+

Parameters

+
    +
  • +

    alpha

    +

    Typeint

    +

    Default1

    +

    Initial alpha parameter.

    +
  • +
  • +

    beta

    +

    Typeint

    +

    Default1

    +

    Initial beta parameter.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
+

Examples

+

from river import proba
+
+successes = 81
+failures = 219
+beta = proba.Beta(successes, failures)
+
+beta(.21), beta(.35)
+
+
(0.867..., 0.165...)
+

+

for success in range(100):
+    beta = beta.update(True)
+for failure in range(200):
+    beta = beta.update(False)
+
+beta(.21), beta(.35)
+
+
(2.525...e-05, 0.841...)
+

+

beta.cdf(.35)
+
+
0.994168...
+

+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • p'float'
  • +
+
+

+
+cdf +

Cumulative density function, i.e. P(X <= x).

+

Parameters

+
    +
  • x'float'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'float'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'float'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/Gaussian/index.html b/0.19.0/api/proba/Gaussian/index.html new file mode 100644 index 0000000000..c760127f26 --- /dev/null +++ b/0.19.0/api/proba/Gaussian/index.html @@ -0,0 +1,3575 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Gaussian - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Gaussian

+

Normal distribution with parameters mu and sigma.

+

Parameters

+
    +
  • +

    seed

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    mu

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
  • +

    sigma

    +
  • +
+

Examples

+

from river import proba
+
+p = proba.Gaussian().update(6).update(7)
+
+p
+
+
𝒩(μ=6.500, σ=0.707)
+

+

p(6.5)
+
+
0.564189
+

+

p.revert(7)
+
+
𝒩(μ=6.000, σ=0.000)
+

+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • x'typing.Any'
  • +
+
+

+
+cdf +

Cumulative density function, i.e. P(X <= x).

+

Parameters

+
    +
  • x'float'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'float'
  • +
  • w — defaults to 1.0
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'float'
  • +
  • w — defaults to 1.0
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/Multinomial/index.html b/0.19.0/api/proba/Multinomial/index.html new file mode 100644 index 0000000000..6361e12ccd --- /dev/null +++ b/0.19.0/api/proba/Multinomial/index.html @@ -0,0 +1,3628 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Multinomial - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Multinomial

+

Multinomial distribution for categorical data.

+

Parameters

+
    +
  • +

    events

    +

    Typedict | list | None

    +

    DefaultNone

    +

    An optional list of events that already occurred.

    +
  • +
  • +

    seed

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
+

Examples

+

from river import proba
+
+p = proba.Multinomial(['green'] * 3)
+p = p.update('red')
+
+p('red')
+
+
0.25
+

+

p = p.update('red').update('red')
+p('green')
+
+
0.5
+

+

p = p.revert('red').revert('red')
+p('red')
+
+
0.25
+

+

You can wrap this with a utils.Rolling to measure a distribution over a window:

+

from river import utils
+
+X = ['red', 'green', 'green', 'blue', 'blue']
+
+dist = utils.Rolling(
+    proba.Multinomial(),
+    window_size=3
+)
+
+for x in X:
+    dist = dist.update(x)
+    print(dist)
+    print()
+
+
P(red) = 1.000
+<BLANKLINE>
+P(red) = 0.500
+P(green) = 0.500
+<BLANKLINE>
+P(green) = 0.667
+P(red) = 0.333
+<BLANKLINE>
+P(green) = 0.667
+P(blue) = 0.333
+P(red) = 0.000
+<BLANKLINE>
+P(blue) = 0.667
+P(green) = 0.333
+P(red) = 0.000
+<BLANKLINE>
+

+

You can wrap this with a utils.Rolling to measure a distribution over a window of time:

+

import datetime as dt
+
+X = ['red', 'green', 'green', 'blue']
+days = [1, 2, 3, 4]
+
+dist = utils.TimeRolling(
+    proba.Multinomial(),
+    period=dt.timedelta(days=2)
+)
+
+for x, day in zip(X, days):
+    dist = dist.update(x, t=dt.datetime(2019, 1, day))
+    print(dist)
+    print()
+
+
P(red) = 1.000
+<BLANKLINE>
+P(red) = 0.500
+P(green) = 0.500
+<BLANKLINE>
+P(green) = 1.000
+P(red) = 0.000
+<BLANKLINE>
+P(green) = 0.500
+P(blue) = 0.500
+P(red) = 0.000
+<BLANKLINE>
+

+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • x'typing.Any'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'typing.Hashable'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'typing.Hashable'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/MultivariateGaussian/index.html b/0.19.0/api/proba/MultivariateGaussian/index.html new file mode 100644 index 0000000000..2380540eb3 --- /dev/null +++ b/0.19.0/api/proba/MultivariateGaussian/index.html @@ -0,0 +1,3673 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MultivariateGaussian - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MultivariateGaussian

+

Multivariate normal distribution with parameters mu and var.

+

Parameters

+
    +
  • +

    seed

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    mu

    +

    The mean value of the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
  • +

    sigma

    +

    The standard deviation of the distribution.

    +
  • +
  • +

    var

    +

    The variance of the distribution.

    +
  • +
+

Examples

+

import numpy as np
+import pandas as pd
+from river import proba
+
+np.random.seed(42)
+X = pd.DataFrame(
+    np.random.random((8, 3)),
+    columns=["red", "green", "blue"]
+)
+X
+
+
        red     green      blue
+0  0.374540  0.950714  0.731994
+1  0.598658  0.156019  0.155995
+2  0.058084  0.866176  0.601115
+3  0.708073  0.020584  0.969910
+4  0.832443  0.212339  0.181825
+5  0.183405  0.304242  0.524756
+6  0.431945  0.291229  0.611853
+7  0.139494  0.292145  0.366362
+

+

p = proba.MultivariateGaussian(seed=42)
+p.n_samples
+
+
0.0
+

+

for x in X.to_dict(orient="records"):
+    p = p.update(x)
+p.var
+
+
           blue     green       red
+blue   0.076119  0.020292 -0.010128
+green  0.020292  0.112931 -0.053268
+red   -0.010128 -0.053268  0.078961
+

+

Retrieving current state in nice format is simple +

p
+
+
𝒩(
+    μ=(0.518, 0.387, 0.416),
+    σ^2=(
+        [ 0.076  0.020 -0.010]
+        [ 0.020  0.113 -0.053]
+        [-0.010 -0.053  0.079]
+    )
+)
+

+

To retrieve number of samples and mode:

+

p.n_samples
+
+
8.0
+
+
p.mode
+
+
{'blue': 0.5179..., 'green': 0.3866..., 'red': 0.4158...}
+

+

To retrieve the PDF and CDF:

+

p(x)
+
+
0.97967...
+
+
p.cdf(x)
+
+
0.00787...
+

+

To sample data from distribution:

+

p.sample()
+
+
{'blue': -0.179..., 'green': -0.051..., 'red': 0.376...}
+

+

MultivariateGaussian works with utils.Rolling:

+

from river import utils
+
+p = utils.Rolling(MultivariateGaussian(), window_size=5)
+for x in X.to_dict(orient="records"):
+    p = p.update(x)
+p.var
+
+
           blue     green       red
+blue   0.087062 -0.022873  0.007765
+green -0.022873  0.014279 -0.025181
+red    0.007765 -0.025181  0.095066
+

+

MultivariateGaussian works with utils.TimeRolling:

+

from datetime import datetime as dt, timedelta as td
+X.index = [dt(2023, 3, 28, 0, 0, 0) + td(seconds=x) for x in range(8)]
+p = utils.TimeRolling(MultivariateGaussian(), period=td(seconds=5))
+for t, x in X.iterrows():
+    p = p.update(x.to_dict(), t=t)
+p.var
+
+
           blue     green       red
+blue   0.087062 -0.022873  0.007765
+green -0.022873  0.014279 -0.025181
+red    0.007765 -0.025181  0.095066
+

+

Variance on diagonal is consistent with proba.Gaussian.

+

multi = proba.MultivariateGaussian()
+single = proba.Gaussian()
+for x in X.to_dict(orient='records'):
+    multi = multi.update(x)
+    single = single.update(x['blue'])
+multi.mu['blue'] == single.mu
+
+
True
+
+
multi.sigma['blue']['blue'] == single.sigma
+
+
True
+

+

Methods

+
+call +

PDF(x) method.

+

Parameters

+
    +
  • x'dict[str, float]'
  • +
+
+

+
+cdf +

Cumulative density function, i.e. P(X <= x).

+

Parameters

+
    +
  • x'dict[str, float]'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'dict[str, float]'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'dict[str, float]'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/base/BinaryDistribution/index.html b/0.19.0/api/proba/base/BinaryDistribution/index.html new file mode 100644 index 0000000000..18e27fa12a --- /dev/null +++ b/0.19.0/api/proba/base/BinaryDistribution/index.html @@ -0,0 +1,3616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BinaryDistribution - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BinaryDistribution

+

A probability distribution for discrete values.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • x'typing.Any'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'bool'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'bool'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/base/ContinuousDistribution/index.html b/0.19.0/api/proba/base/ContinuousDistribution/index.html new file mode 100644 index 0000000000..c13b8b813a --- /dev/null +++ b/0.19.0/api/proba/base/ContinuousDistribution/index.html @@ -0,0 +1,3625 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ContinuousDistribution - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ContinuousDistribution

+

A probability distribution for continuous values.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • x'typing.Any'
  • +
+
+

+
+cdf +

Cumulative density function, i.e. P(X <= x).

+

Parameters

+
    +
  • x'float'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'float'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'float'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/base/DiscreteDistribution/index.html b/0.19.0/api/proba/base/DiscreteDistribution/index.html new file mode 100644 index 0000000000..e27c558e2e --- /dev/null +++ b/0.19.0/api/proba/base/DiscreteDistribution/index.html @@ -0,0 +1,3616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DiscreteDistribution - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DiscreteDistribution

+

A probability distribution for discrete values.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • x'typing.Any'
  • +
+
+

+
+revert +

Reverts the parameters of the distribution for a given observation.

+

Parameters

+
    +
  • x'typing.Hashable'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+
+update +

Updates the parameters of the distribution given a new observation.

+

Parameters

+
    +
  • x'typing.Hashable'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/proba/base/Distribution/index.html b/0.19.0/api/proba/base/Distribution/index.html new file mode 100644 index 0000000000..ddbfe85dee --- /dev/null +++ b/0.19.0/api/proba/base/Distribution/index.html @@ -0,0 +1,3598 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Distribution - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Distribution

+

General distribution.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generator seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    mode

    +

    The most likely value in the distribution.

    +
  • +
  • +

    n_samples

    +

    The number of observed samples.

    +
  • +
+

Methods

+
+call +

Probability mass/density function.

+

Parameters

+
    +
  • x'typing.Any'
  • +
+
+

+
+sample +

Sample a random value from the distribution.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/reco/Baseline/index.html b/0.19.0/api/reco/Baseline/index.html new file mode 100644 index 0000000000..08a3d7c1d5 --- /dev/null +++ b/0.19.0/api/reco/Baseline/index.html @@ -0,0 +1,3616 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Baseline - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Baseline

+

Baseline for recommender systems.

+

A first-order approximation of the bias involved in target. The model equation is defined as:

+
\[\hat{y}(x) = \bar{y} + bu_{u} + bi_{i}\]
+

Where \(bu_{u}\) and \(bi_{i}\) are respectively the user and item biases.

+

This model expects a dict input with a user and an item entries without any type constraint on their values (i.e. can be strings or numbers). Other entries are ignored.

+

Parameters

+
    +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the weights.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.Loss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    l2

    +

    Default0.0

    +

    regularization amount used to push weights towards 0.

    +
  • +
  • +

    initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    DefaultNone

    +

    Random number generation seed. Set this for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    global_mean (stats.Mean)

    +

    The target arithmetic mean.

    +
  • +
  • +

    u_biases (collections.defaultdict)

    +

    The user bias weights.

    +
  • +
  • +

    i_biases (collections.defaultdict)

    +

    The item bias weights.

    +
  • +
  • +

    u_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the user bias weights.

    +
  • +
  • +

    i_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the item bias weights.

    +
  • +
+

Examples

+

from river import optim
+from river import reco
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, 8),
+    ({'user': 'Alice', 'item': 'Terminator'}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars'}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter'}, 5),
+    ({'user': 'Bob', 'item': 'Superman'}, 8),
+    ({'user': 'Bob', 'item': 'Terminator'}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars'}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, 2)
+)
+
+model = reco.Baseline(optimizer=optim.SGD(0.005))
+
+for x, y in dataset:
+    _ = model.learn_one(**x, y=y)
+
+model.predict_one(user='Bob', item='Harry Potter')
+
+
6.538120
+

+

Methods

+
+learn_one +

Fits a user-item pair and a real-valued target y.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • y'Reward'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+
+predict_one +

Predicts the target value of a set of features x.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • x'dict | None' — defaults to None
  • +
+

Returns

+

Reward: The predicted preference from the user for the item.

+
+

+
+rank +

Rank models by decreasing order of preference for a given user.

+

Parameters

+
    +
  • user'ID'
  • +
  • items'set[ID]'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/reco/BiasedMF/index.html b/0.19.0/api/reco/BiasedMF/index.html new file mode 100644 index 0000000000..cecbfcf60a --- /dev/null +++ b/0.19.0/api/reco/BiasedMF/index.html @@ -0,0 +1,3664 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BiasedMF - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BiasedMF

+

Biased Matrix Factorization for recommender systems.

+

The model equation is defined as:

+
\[\hat{y}(x) = \bar{y} + bu_{u} + bi_{i} + \langle \mathbf{v}_u, \mathbf{v}_i \rangle\]
+

Where \(bu_{u}\) and \(bi_{i}\) are respectively the user and item biases. The last term being simply the dot product between the latent vectors of the given user-item pair:

+
\[\langle \mathbf{v}_u, \mathbf{v}_i \rangle = \sum_{f=1}^{k} \mathbf{v}_{u, f} \cdot \mathbf{v}_{i, f}\]
+

where \(k\) is the number of latent factors.

+

This model expects a dict input with a user and an item entries without any type constraint on their values (i.e. can be strings or numbers). Other entries are ignored.

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    bias_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the bias weights.

    +
  • +
  • +

    latent_optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent weights.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.Loss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    l2_bias

    +

    Default0.0

    +

    Amount of L2 regularization used to push bias weights towards 0.

    +
  • +
  • +

    l2_latent

    +

    Default0.0

    +

    Amount of L2 regularization used to push latent weights towards 0.

    +
  • +
  • +

    weight_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Weights initialization scheme.

    +
  • +
  • +

    latent_initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    DefaultNone

    +

    Random number generation seed. Set this for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    global_mean (stats.Mean)

    +

    The target arithmetic mean.

    +
  • +
  • +

    u_biases (collections.defaultdict)

    +

    The user bias weights.

    +
  • +
  • +

    i_biases (collections.defaultdict)

    +

    The item bias weights.

    +
  • +
  • +

    u_latents (collections.defaultdict)

    +

    The user latent vectors randomly initialized.

    +
  • +
  • +

    i_latents (collections.defaultdict)

    +

    The item latent vectors randomly initialized.

    +
  • +
  • +

    u_bias_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the user bias weights.

    +
  • +
  • +

    i_bias_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the item bias weights.

    +
  • +
  • +

    u_latent_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the user latent weights.

    +
  • +
  • +

    i_latent_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the item latent weights.

    +
  • +
+

Examples

+

from river import optim
+from river import reco
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, 8),
+    ({'user': 'Alice', 'item': 'Terminator'}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars'}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter'}, 5),
+    ({'user': 'Bob', 'item': 'Superman'}, 8),
+    ({'user': 'Bob', 'item': 'Terminator'}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars'}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, 2)
+)
+
+model = reco.BiasedMF(
+    n_factors=10,
+    bias_optimizer=optim.SGD(0.025),
+    latent_optimizer=optim.SGD(0.025),
+    latent_initializer=optim.initializers.Normal(mu=0., sigma=0.1, seed=71)
+)
+
+for x, y in dataset:
+    _ = model.learn_one(**x, y=y)
+
+model.predict_one(user='Bob', item='Harry Potter')
+
+
6.489025
+

+

Methods

+
+learn_one +

Fits a user-item pair and a real-valued target y.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • y'Reward'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+
+predict_one +

Predicts the target value of a set of features x.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • x'dict | None' — defaults to None
  • +
+

Returns

+

Reward: The predicted preference from the user for the item.

+
+

+
+rank +

Rank models by decreasing order of preference for a given user.

+

Parameters

+
    +
  • user'ID'
  • +
  • items'set[ID]'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/reco/FunkMF/index.html b/0.19.0/api/reco/FunkMF/index.html new file mode 100644 index 0000000000..1348f8342c --- /dev/null +++ b/0.19.0/api/reco/FunkMF/index.html @@ -0,0 +1,3624 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FunkMF - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

FunkMF

+

Funk Matrix Factorization for recommender systems.

+

The model equation is defined as:

+
\[\hat{y}(x) = \langle \mathbf{v}_u, \mathbf{v}_i \rangle = \sum_{f=1}^{k} \mathbf{v}_{u, f} \cdot \mathbf{v}_{i, f}\]
+

where \(k\) is the number of latent factors.

+

This model expects a dict input with a user and an item entries without any type constraint on their values (i.e. can be strings or numbers). Other entries are ignored.

+

Parameters

+
    +
  • +

    n_factors

    +

    Default10

    +

    Dimensionality of the factorization or number of latent factors.

    +
  • +
  • +

    optimizer

    +

    Typeoptim.base.Optimizer | None

    +

    DefaultNone

    +

    The sequential optimizer used for updating the latent factors.

    +
  • +
  • +

    loss

    +

    Typeoptim.losses.Loss | None

    +

    DefaultNone

    +

    The loss function to optimize for.

    +
  • +
  • +

    l2

    +

    Default0.0

    +

    Amount of L2 regularization used to push weights towards 0.

    +
  • +
  • +

    initializer

    +

    Typeoptim.initializers.Initializer | None

    +

    DefaultNone

    +

    Latent factors initialization scheme.

    +
  • +
  • +

    clip_gradient

    +

    Default1000000000000.0

    +

    Clips the absolute value of each gradient value.

    +
  • +
  • +

    seed

    +

    DefaultNone

    +

    Random number generation seed. Set this for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    u_latents (collections.defaultdict)

    +

    The user latent vectors randomly initialized.

    +
  • +
  • +

    i_latents (collections.defaultdict)

    +

    The item latent vectors randomly initialized.

    +
  • +
  • +

    u_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the user latent weights.

    +
  • +
  • +

    i_optimizer (optim.base.Optimizer)

    +

    The sequential optimizer used for updating the item latent weights.

    +
  • +
+

Examples

+

from river import optim
+from river import reco
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, 8),
+    ({'user': 'Alice', 'item': 'Terminator'}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars'}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter'}, 5),
+    ({'user': 'Bob', 'item': 'Superman'}, 8),
+    ({'user': 'Bob', 'item': 'Terminator'}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars'}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, 2)
+)
+
+model = reco.FunkMF(
+    n_factors=10,
+    optimizer=optim.SGD(0.1),
+    initializer=optim.initializers.Normal(mu=0., sigma=0.1, seed=11),
+)
+
+for x, y in dataset:
+    _ = model.learn_one(**x, y=y)
+
+model.predict_one(user='Bob', item='Harry Potter')
+
+
1.866272
+

+

Methods

+
+learn_one +

Fits a user-item pair and a real-valued target y.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • y'Reward'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+
+predict_one +

Predicts the target value of a set of features x.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • x'dict | None' — defaults to None
  • +
+

Returns

+

Reward: The predicted preference from the user for the item.

+
+

+
+rank +

Rank models by decreasing order of preference for a given user.

+

Parameters

+
    +
  • user'ID'
  • +
  • items'set[ID]'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/reco/RandomNormal/index.html b/0.19.0/api/reco/RandomNormal/index.html new file mode 100644 index 0000000000..332ad6346a --- /dev/null +++ b/0.19.0/api/reco/RandomNormal/index.html @@ -0,0 +1,3564 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RandomNormal - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RandomNormal

+

Predicts random values sampled from a normal distribution.

+

The parameters of the normal distribution are fitted with running statistics. They parameters are independent of the user, the item, or the context, and are instead fitted globally. This recommender therefore acts as a dummy model that any serious model should easily outperform.

+

Parameters

+
    +
  • +

    seed

    +

    DefaultNone

    +

    Random number generation seed. Set this for reproducibility.

    +
  • +
+

Attributes

+ +

Examples

+

from river import reco
+
+dataset = (
+    ({'user': 'Alice', 'item': 'Superman'}, 8),
+    ({'user': 'Alice', 'item': 'Terminator'}, 9),
+    ({'user': 'Alice', 'item': 'Star Wars'}, 8),
+    ({'user': 'Alice', 'item': 'Notting Hill'}, 2),
+    ({'user': 'Alice', 'item': 'Harry Potter'}, 5),
+    ({'user': 'Bob', 'item': 'Superman'}, 8),
+    ({'user': 'Bob', 'item': 'Terminator'}, 9),
+    ({'user': 'Bob', 'item': 'Star Wars'}, 8),
+    ({'user': 'Bob', 'item': 'Notting Hill'}, 2)
+)
+
+model = reco.RandomNormal(seed=42)
+
+for x, y in dataset:
+    _ = model.learn_one(**x, y=y)
+
+model.predict_one(user='Bob', item='Harry Potter')
+
+
6.147299621751425
+

+

Methods

+
+learn_one +

Fits a user-item pair and a real-valued target y.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • y'Reward'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+
+predict_one +

Predicts the target value of a set of features x.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • x'dict | None' — defaults to None
  • +
+

Returns

+

Reward: The predicted preference from the user for the item.

+
+

+
+rank +

Rank models by decreasing order of preference for a given user.

+

Parameters

+
    +
  • user'ID'
  • +
  • items'set[ID]'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/reco/base/Ranker/index.html b/0.19.0/api/reco/base/Ranker/index.html new file mode 100644 index 0000000000..a76ab4de17 --- /dev/null +++ b/0.19.0/api/reco/base/Ranker/index.html @@ -0,0 +1,3547 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Ranker - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Ranker

+

Base class for ranking models.

+

Parameters

+
    +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random number generation seed. Set this for reproducibility.

    +
  • +
+

Attributes

+
    +
  • is_contextual
  • +
+

Methods

+
+learn_one +

Fits a user-item pair and a real-valued target y.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • y'Reward'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+
+predict_one +

Predicts the target value of a set of features x.

+

Parameters

+
    +
  • user'ID'
  • +
  • item'ID'
  • +
  • x'dict | None' — defaults to None
  • +
+

Returns

+

Reward: The predicted preference from the user for the item.

+
+

+
+rank +

Rank models by decreasing order of preference for a given user.

+

Parameters

+
    +
  • user'ID'
  • +
  • items'set[ID]'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/rules/AMRules/index.html b/0.19.0/api/rules/AMRules/index.html new file mode 100644 index 0000000000..e3d49dce49 --- /dev/null +++ b/0.19.0/api/rules/AMRules/index.html @@ -0,0 +1,3571 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AMRules - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AMRules

+

Adaptive Model Rules.

+

AMRules1 is a rule-based algorithm for incremental regression tasks. AMRules relies on the Hoeffding bound to build its rule set, similarly to Hoeffding Trees. The Variance-Ratio heuristic is used to evaluate rules' splits. Moreover, this rule-based regressor has additional capacities not usually found in decision trees.

+

Firstly, each created decision rule has a built-in drift detection mechanism. Every time a drift is detected, the affected decision rule is removed. In addition, AMRules' rules also have anomaly detection capabilities. After a warm-up period, each rule tests whether or not the incoming instances are anomalies. Anomalous instances are not used for training.

+

Every time no rule is covering an incoming example, a default rule is used to learn from it. A rule covers an instance when all of the rule's literals (tests joined by the logical operation and) match the input case. The default rule is also applied for predicting examples not covered by any rules from the rule set.

+

Parameters

+
    +
  • +

    n_min

    +

    Typeint

    +

    Default200

    +

    The total weight that must be observed by a rule between expansion attempts.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    The split test significance. The split confidence is given by 1 - delta.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    The tie-breaking threshold.

    +
  • +
  • +

    pred_type

    +

    Typestr

    +

    Defaultadaptive

    +

    The prediction strategy used by the decision rules. Can be either:
    - "mean": outputs the target mean within the partitions defined by the decision rules.
    - "model": always use instances of the model passed pred_model to make predictions.
    - "adaptive": dynamically selects between "mean" and "model" for each incoming example. The most accurate option at the moment will be used.

    +
  • +
  • +

    pred_model

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    The regression model that will be replicated for every rule when pred_type is either "model" or "adaptive".

    +
  • +
  • +

    splitter

    +

    Typespl.Splitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.TEBSTSplitter is used if splitter is None.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    The drift detection model that is used by each rule. Care must be taken to avoid the triggering of too many false alarms or delaying too much the concept drift detection. By default, drift.ADWIN is used if drift_detector is None.

    +
  • +
  • +

    fading_factor

    +

    Typefloat

    +

    Default0.99

    +

    The exponential decaying factor applied to the learning models' absolute errors, that are monitored if pred_type='adaptive'. Must be between 0 and 1. The closer to 1, the more importance is going to be given to past observations. On the other hand, if its value approaches 0, the recent observed errors are going to have more influence on the final decision.

    +
  • +
  • +

    anomaly_threshold

    +

    Typefloat

    +

    Default-0.75

    +

    The threshold below which instances will be considered anomalies by the rules.

    +
  • +
  • +

    m_min

    +

    Typeint

    +

    Default30

    +

    The minimum total weight a rule must observe before it starts to skip anomalous instances during training.

    +
  • +
  • +

    ordered_rule_set

    +

    Typebool

    +

    DefaultTrue

    +

    If True, only the first rule that covers an instance will be used for training or prediction. If False, all the rules covering an instance will be updated during training, and the predictions for an instance will be the average prediction of all rules covering that example.

    +
  • +
  • +

    min_samples_split

    +

    Typeint

    +

    Default5

    +

    The minimum number of samples each partition of a binary split candidate must have to be considered valid.

    +
  • +
+

Attributes

+
    +
  • +

    n_drifts_detected

    +

    The number of detected concept drifts.

    +
  • +
+

Examples

+

from river import datasets
+from river import drift
+from river import evaluate
+from river import metrics
+from river import preprocessing
+from river import rules
+
+dataset = datasets.TrumpApproval()
+
+model = (
+    preprocessing.StandardScaler() |
+    rules.AMRules(
+        delta=0.01,
+        n_min=50,
+        drift_detector=drift.ADWIN()
+    )
+)
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 1.119553
+

+

Methods

+
+anomaly_score +

Aggregated anomaly score computed using all the rules that cover the input instance.

+

Returns the mean anomaly score, the standard deviation of the score, and the proportion of rules that cover the instance (support). If the support is zero, it means that the default rule was used (not other rule covered x).

+

Parameters

+
    +
  • x
  • +
+

Returns

+

tuple[float, float, float]: mean_anomaly_score, std_anomaly_score, support

+
+

+
+debug_one +

Return an explanation of how x is predicted

+

Parameters

+
    +
  • x
  • +
+

Returns

+

str: A representation of the rules that cover the input and their prediction.

+
+

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • w'int' — defaults to 1
  • +
+

Returns

+

AMRules: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+

Notes

+

AMRules treats all the non-numerical inputs as nominal features. All instances of +numbers.Number will be treated as continuous, even if they represent integer categories. +When using nominal features, pred_type should be set to "mean", otherwise errors will be +thrown while trying to update the underlying rules' prediction models. Prediction strategies +other than "mean" can be used, as long as the prediction model passed to pred_model supports +nominal features.

+
+
+
    +
  1. +

    Duarte, J., Gama, J. and Bifet, A., 2016. Adaptive model rules from high-speed data +streams. ACM Transactions on Knowledge Discovery from Data (TKDD), 10(3), pp.1-22. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/sketch/Counter/index.html b/0.19.0/api/sketch/Counter/index.html new file mode 100644 index 0000000000..8cfb2e0de6 --- /dev/null +++ b/0.19.0/api/sketch/Counter/index.html @@ -0,0 +1,3606 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Counter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Counter

+

Counting using the Count-Min Sketch (CMS) algorithm.

+

Contrary to an exhaustive approach, e.g., using a collections.Counter, CMS uses a limited and fixed amount of memory. The CMS algorithm uses a sketch structure consisting of a matrix \(w \times d\).

+

These dimensions are obtained via:

+
    +
  • +

    \(w = \lceil \frac{e}{\epsilon} \rceil\), where \(e\) is the Euler number.

    +
  • +
  • +

    \(d = \lceil \ln\left(\frac{1}{\delta} \right) \rceil\).

    +
  • +
+

Decreasing the values of \(\epsilon\) (epsilon) and \(\delta\) (delta) increase the accuracy of the algorithm, at the cost of increased memory usage. The values of w and d control the hash tables' capability and the amount of hash collisions, respectively.

+

CMS works by keeping d hash tables with w slots each. Elements are mapped to a slot in each hash table. These tables store the counting estimates. This implementation assumes the turnstile case described in the paper, i.e., count values and updates can be negative.

+

The count values obtained by CMS are always overestimates. Suppose \(c_i\) and \(\hat{c}_i\) are the ground truth and estimated count values, respectively, for a given element \(i\). CMS guarantees that \(c_i \le \hat{c}_i\) and, with probability \(1 - \delta\), \(\hat{c}_i \le c_i + \epsilon||\mathbf{c}||_1\). In the expression, \(||\mathbf{c}||_1 = \sum_i |c_i|\).

+

Parameters

+
    +
  • +

    epsilon

    +

    Typefloat

    +

    Default0.1

    +

    The approximation error parameter. The error in answering a query is within a factor of epsilon with probability delta.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default0.05

    +

    A query estimates have a probability of 1 - delta of having errors which are a factor of epsilon. See the CMS description above for more details.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    n_slots

    +

    The number of slots in each hash table.

    +
  • +
  • +

    n_tables

    +

    The number of stored hash tables.

    +
  • +
+

Examples

+
import collections
+from river import sketch
+
+cms = sketch.Counter(epsilon=0.005, seed=0)
+
+rng = random.Random(7)
+
+counter = collections.Counter()
+
+

We can check the number of slots per hash table: +

cms.n_slots
+
+
544
+

+

And the number of hash tables: +

cms.n_tables
+
+
3
+

+

Let's compare the sketch against a brute force approach:

+
vals = []
+for _ in range(10000):
+    v = rng.randint(-1000, 1000)
+    cms = cms.update(v)
+    counter[v] += 1
+    vals.append(v)
+
+

Now, we can compare the estimates of CMS against the exhaustive counting strategy:

+

counter[7]
+
+
5
+
+
cms[7]
+
+
12
+
+
counter[532]
+
+
4
+
+
cms[532]
+
+
15
+

+

Keep in mind that CMS is an approximate sketch algorithm. Couting estimates for unseen values +might not be always reliable:

+

cms[1001]
+
+
9
+

+

We can check the number of elements stored by each approach:

+

len(counter), len(cms)
+
+
(1982, 1632)
+

+

And also retrieve the total sum of counts:

+

cms.total()
+
+
10000
+

+

We can decrease the error by allocating more memory in the CMS:

+

cms_a = sketch.Counter(epsilon=0.001, delta=0.01, seed=0)
+for v in vals:
+    cms_a = cms_a.update(v)
+
+cms_a[7]
+
+
5
+
+
cms_a[532]
+
+
4
+

+

We can also obtain estimates of the dot product between two instances of river.collections.Counter. This could be useful, +for instance, to estimate the cosine distance between the data monitored in two different counter sketch instances. Suppose we +create another CMS instance (the number of slots and hash tables must match) that monitors another sample of the same data +generating process:

+
cms_b = sketch.Counter(epsilon=0.001, delta=0.01, seed=7)
+
+for _ in range(10000):
+    v = rng.randint(-1000, 1000)
+    cms_b = cms_b.update(v)
+
+

Now, we can define a cosine distance function:

+
def cosine_dist(cms_a, cms_b):
+    num = cms_a @ cms_b
+    den = math.sqrt(cms_a @ cms_a) * math.sqrt(cms_b @ cms_b)
+    return num / den
+
+

And use it to calculate the cosine distance between the elements monitored in cms_a and cms_b:

+

cosine_dist(cms_a, cms_b)
+
+
0.175363...
+

+

Methods

+
+total +

Return the total count.

+
+

+
+update +
+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/sketch/HeavyHitters/index.html b/0.19.0/api/sketch/HeavyHitters/index.html new file mode 100644 index 0000000000..f0fe9dce28 --- /dev/null +++ b/0.19.0/api/sketch/HeavyHitters/index.html @@ -0,0 +1,3507 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HeavyHitters - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HeavyHitters

+

Find the Heavy Hitters using the Lossy Count with Forgetting factor algorithm1.

+

Keep track of the most frequent item(set)s in a data stream and apply a forgetting factor to discard previous frequent items that do not often appear anymore. This is an approximation algorithm designed to work with a limited amount of memory rather than accounting for every possible solution (thus using an unbounded memory footprint). Any hashable type can be passed as input, hence tuples or frozensets can also be monitored.

+

Considering a data stream where n elements were observed so far, the Lossy Count algorithm has the following properties:

+
    +
  • All item(set)s whose true frequency exceeds support * n are output. There are no
  • +
+

false negatives;

+
    +
  • +

    No item(set) whose true frequency is less than (support - epsilon) * n is outputted;

    +
  • +
  • +

    Estimated frequencies are less than the true frequencies by at most epsilon * n.

    +
  • +
+

Parameters

+
    +
  • +

    support

    +

    Typefloat

    +

    Default0.001

    +

    The support threshold used to determine if an item is frequent. The value of support must be in \([0, 1]\). Elements whose frequency is higher than support times the number of observations seen so far are outputted.

    +
  • +
  • +

    epsilon

    +

    Typefloat

    +

    Default0.005

    +

    Error parameter to control the accuracy-memory tradeoff. The value of epsilon must be in \((0, 1]\) and typically epsilon \(\ll\) support. The smaller the epsilon, the more accurate the estimates will be, but the count sketch will have an increased memory footprint.

    +
  • +
  • +

    fading_factor

    +

    Typefloat

    +

    Default0.999

    +

    Forgetting factor applied to the frequency estimates to reduce the impact of old items. The value of fading_factor must be in \((0, 1]\).

    +
  • +
+

Examples

+
import random
+import string
+from river import sketch
+
+rng = random.Random(42)
+hh = sketch.HeavyHitters()
+
+

We will feed the counter with printable ASCII characters:

+
for _ in range(10_000):
+    hh = hh.update(rng.choice(string.printable))
+
+

We can retrieve estimates of the n top elements and their frequencies. Let's try n=3 +

hh.most_common(3)
+
+
[(',', 122.099142...), ('[', 116.049510...), ('W', 115.013402...)]
+

+

We can also access estimates of individual elements:

+

hh['A']
+
+
99.483575...
+

+

Unobserved elements are handled just fine: +

hh[(1, 2, 3)]
+
+
0.0
+

+

Methods

+
+most_common +
+
+update +
+
+
+
    +
  1. +

    Veloso, B., Tabassum, S., Martins, C., Espanha, R., Azevedo, R., & Gama, J. (2020). +Interconnect bypass fraud detection: a case study. Annals of Telecommunications, 75(9), 583-596. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/sketch/Histogram/index.html b/0.19.0/api/sketch/Histogram/index.html new file mode 100644 index 0000000000..a853142f29 --- /dev/null +++ b/0.19.0/api/sketch/Histogram/index.html @@ -0,0 +1,3525 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Histogram - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Histogram

+

Streaming histogram.

+

Parameters

+
    +
  • +

    max_bins

    +

    Default256

    +

    Maximal number of bins.

    +
  • +
+

Attributes

+
    +
  • +

    n

    +

    Total number of seen values.

    +
  • +
+

Examples

+

from river import sketch
+import numpy as np
+
+np.random.seed(42)
+
+values = np.hstack((
+    np.random.normal(-3, 1, 1000),
+    np.random.normal(3, 1, 1000),
+))
+
+hist = sketch.Histogram(max_bins=15)
+
+for x in values:
+    hist = hist.update(x)
+
+for bin in hist:
+    print(bin)
+
+
[-6.24127, -6.24127]: 1
+[-5.69689, -5.19881]: 8
+[-5.12390, -4.43014]: 57
+[-4.42475, -3.72574]: 158
+[-3.71984, -3.01642]: 262
+[-3.01350, -2.50668]: 206
+[-2.50329, -0.81020]: 294
+[-0.80954, 0.29677]: 19
+[0.40896, 0.82733]: 7
+[0.84661, 1.25147]: 24
+[1.26029, 2.30758]: 178
+[2.31081, 3.05701]: 284
+[3.05963, 3.69695]: 242
+[3.69822, 5.64434]: 258
+[6.13775, 6.19311]: 2
+

+

Methods

+
+cdf +

Cumulative distribution function.

+

Parameters

+
    +
  • x
  • +
+
+

+
+iter_cdf +

Yields CDF values for a sorted iterable of values.

+

This is faster than calling cdf with many values.

+

Parameters

+
    +
  • X
  • +
  • verbose — defaults to False
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/sketch/Set/index.html b/0.19.0/api/sketch/Set/index.html new file mode 100644 index 0000000000..8898b4a1ca --- /dev/null +++ b/0.19.0/api/sketch/Set/index.html @@ -0,0 +1,3609 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Set - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Set

+

Approximate tracking of observed items using Bloom filters.

+

Bloom filters enable using a limited amount of memory to check whether a given item was already observed in a stream. They can be used similarly to Python's built-in sets with the difference that items are not explicitly stored. For that reason, element removal and set difference are not currently supported.

+

Bloom filters store a bit array and map incoming items to k index positions in the such array. The selected positions are set to True. Therefore, a binary code representation is created for each item. Membership works by projecting the query item and checking if every position of its binary code is True. If that is not the case, the item was not observed yet. A nice property of Bloom filters is that they do not yield false negatives: unobserved items might be signalized as observed, but observed items are never signalized as unobserved.

+

If more than one item has the same binary code, i.e., hash collisions happen, the accuracy of the Bloom filter decreases, and false positives are produced. For instance, a previously unobserved item is signalized as observed. Increasing the size of the binary array and the value of k increase the filter's accuracy as hash collisions are avoided. Nonetheless, even using an increased number of hash functions, hash collisions will frequently happen if the array capacity is too small. The length of the bit array and the number of hash functions are inferred automatically from the supplied capacity and fp_rate.

+

Parameters

+
    +
  • +

    capacity

    +

    Typeint

    +

    Default2048

    +

    The maximum capacity of the Bloom filter, i.e., the maximum number of distinct items to store given the selected fp_rate.

    +
  • +
  • +

    fp_rate

    +

    Typefloat

    +

    Default0.01

    +

    The allowed rate of false positives. The probability of obtaining a true positive is 1 - fp_rate.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    n_bits

    +

    Return the size of the binary array used by the Bloom filter.

    +
  • +
  • +

    n_hash

    +

    Return the number of used hash functions.

    +
  • +
+

Examples

+
import random
+from river import sketch
+
+rng = random.Random(42)
+s_set = sketch.Set(capacity=100, seed=0)
+
+

We can retrieve the number of selected hash functions:

+

s_set.n_hash
+
+
7
+

+

And the size of the binary array used by the Bloom filter: +

s_set.n_bits
+
+
959
+

+

We can add new items and check for membership using the same calls used by Python's +standard sets: +

for _ in range(1000):
+    s_set.add(rng.randint(0, 200))
+
+1 in s_set
+
+
True
+

+

False positives might happen if the capacity is not large enough: +

-10 in s_set
+
+
True
+

+

Iterables can also be supplied to perform multiple updates with a single call to update: +

s_set = s_set.update([1, 2, 3, 4, 5, 6, 7])
+

+

We can also combine instances of sketch.Set using the intersection and union operations, as long as +they share the same hash functions and capability. In other words, all they hyperparameters match. +Let's create two instances that will monitor different portions of a stream of random numbers:

+

s1 = sketch.Set(seed=8)
+s2 = sketch.Set(seed=8)
+
+for _ in range(1000):
+    s1.add(rng.randint(0, 5000))
+
+for _ in range(1000):
+    s2.add(rng.randint(0, 5000))
+
+43 in s1
+
+
True
+
+
43 in s2
+
+
False
+

+

We can get the intersection between the two instances by using:

+

s_intersection = s1 & s2
+43 in s_intersection
+
+
False
+

+

We can also obtain the set union:

+

s_union = s1 | s2
+
+43 in s_union
+
+
True
+

+

The same effect of the non-inplace dunder methods can be achieved via explicit method calls:

+

43 in s1.intersection(s2)
+
+
False
+

+

43 in s1.union(s2)
+
+
True
+

+

Methods

+
+add +
+
+intersection +

Set intersection.

+

Return a new instance that results from the set intersection between the current Set object and other. Dunder operators can be used to replace the method call, i.e., a &= b and a & b for inplace and non-inplace intersections, respectively.

+

Parameters

+
    +
  • other'Set'
  • +
+
+

+
+union +

Set union.

+

Return a new instance that results from the set union between the current Set object and other. Dunder operators can be used to replace the method call, i.e., a |= b and a | b for inplace and non-inplace unions, respectively.

+

Parameters

+
    +
  • other'Set'
  • +
+
+

+
+update +
+

Notes

+

This implementation uses an integer to represent the binary array. Bitwise operations are performed in the +integer to reflect the Bloom filter updates.

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/AbsMax/index.html b/0.19.0/api/stats/AbsMax/index.html new file mode 100644 index 0000000000..0c1dbb0935 --- /dev/null +++ b/0.19.0/api/stats/AbsMax/index.html @@ -0,0 +1,4068 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AbsMax - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AbsMax

+

Running absolute max.

+

Attributes

+
    +
  • +

    abs_max (float)

    +

    The current absolute max.

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 5, -6]
+abs_max = stats.AbsMax()
+for x in X:
+    print(abs_max.update(x).get())
+
+
1
+4
+4
+4
+5
+6
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/AutoCorr/index.html b/0.19.0/api/stats/AutoCorr/index.html new file mode 100644 index 0000000000..ac79022394 --- /dev/null +++ b/0.19.0/api/stats/AutoCorr/index.html @@ -0,0 +1,4103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + AutoCorr - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

AutoCorr

+

Measures the serial correlation.

+

This method computes the Pearson correlation between the current value and the value seen n steps before.

+

Parameters

+
    +
  • +

    lag

    +

    Typeint

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

The following examples are taken from the pandas documentation.

+

from river import stats
+
+auto_corr = stats.AutoCorr(lag=1)
+for x in [0.25, 0.5, 0.2, -0.05]:
+    print(auto_corr.update(x).get())
+
+
0
+0
+-1.0
+0.103552
+

+

auto_corr = stats.AutoCorr(lag=2)
+for x in [0.25, 0.5, 0.2, -0.05]:
+    print(auto_corr.update(x).get())
+
+
0
+0
+0
+-1.0
+

+

auto_corr = stats.AutoCorr(lag=1)
+for x in [1, 0, 0, 0]:
+    print(auto_corr.update(x).get())
+
+
0
+0
+0
+0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/BayesianMean/index.html b/0.19.0/api/stats/BayesianMean/index.html new file mode 100644 index 0000000000..344e45125c --- /dev/null +++ b/0.19.0/api/stats/BayesianMean/index.html @@ -0,0 +1,4078 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + BayesianMean - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

BayesianMean

+

Estimates a mean using outside information.

+

Parameters

+
    +
  • +

    prior

    +

    Typefloat

    +
  • +
  • +

    prior_weight

    +

    Typefloat

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Count/index.html b/0.19.0/api/stats/Count/index.html new file mode 100644 index 0000000000..09a07b5587 --- /dev/null +++ b/0.19.0/api/stats/Count/index.html @@ -0,0 +1,4039 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Count - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Count

+

A simple counter.

+

Attributes

+
    +
  • +

    n (int)

    +

    The current number of observations.

    +
  • +
+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number' — defaults to None
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Cov/index.html b/0.19.0/api/stats/Cov/index.html new file mode 100644 index 0000000000..c4a10d31a8 --- /dev/null +++ b/0.19.0/api/stats/Cov/index.html @@ -0,0 +1,4144 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cov - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Cov

+

Covariance.

+

Parameters

+
    +
  • +

    ddof

    +

    Default1

    +

    Delta Degrees of Freedom.

    +
  • +
+

Attributes

+
    +
  • n
  • +
+

Examples

+

from river import stats
+
+x = [-2.1,  -1,  4.3]
+y = [   3, 1.1, 0.12]
+
+cov = stats.Cov()
+
+for xi, yi in zip(x, y):
+    print(cov.update(xi, yi).get())
+
+
0.0
+-1.044999
+-4.286
+

+

This class has a revert method, and can thus be wrapped by utils.Rolling:

+

from river import utils
+
+x = [-2.1,  -1, 4.3, 1, -2.1,  -1, 4.3]
+y = [   3, 1.1, .12, 1,    3, 1.1, .12]
+
+rcov = utils.Rolling(stats.Cov(), window_size=3)
+
+for xi, yi in zip(x, y):
+    print(rcov.update(xi, yi).get())
+
+
0.0
+-1.045
+-4.286
+-1.382
+-4.589
+-1.415
+-4.286
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • w — defaults to 1.0
  • +
+
+

+
+update_many +
+

Notes

+

The outcomes of the incremental and parallel updates are consistent with numpy's +batch processing when \(\text{ddof} \le 1\).

+
+
+
    +
  1. +

    Wikipedia article on algorithms for calculating variance 

    +
  2. +
  3. +

    Schubert, E. and Gertz, M., 2018, July. Numerically stable parallel computation of +(co-) variance. In Proceedings of the 30th International Conference on Scientific and +Statistical Database Management (pp. 1-12). 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/EWMean/index.html b/0.19.0/api/stats/EWMean/index.html new file mode 100644 index 0000000000..c74eacf53c --- /dev/null +++ b/0.19.0/api/stats/EWMean/index.html @@ -0,0 +1,4104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EWMean - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EWMean

+

Exponentially weighted mean.

+

Parameters

+
    +
  • +

    fading_factor

    +

    Default0.5

    +

    The closer fading_factor is to 1 the more the statistic will adapt to recent values.

    +
  • +
+

Attributes

+
    +
  • +

    mean (float)

    +

    The running exponentially weighted mean.

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, 3, 5, 4, 6, 8, 7, 9, 11]
+ewm = stats.EWMean(fading_factor=0.5)
+for x in X:
+    print(ewm.update(x).get())
+
+
1.0
+2.0
+3.5
+3.75
+4.875
+6.4375
+6.71875
+7.859375
+9.4296875
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/EWVar/index.html b/0.19.0/api/stats/EWVar/index.html new file mode 100644 index 0000000000..0d805a647b --- /dev/null +++ b/0.19.0/api/stats/EWVar/index.html @@ -0,0 +1,4105 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EWVar - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EWVar

+

Exponentially weighted variance.

+

To calculate the variance we use the fact that Var(X) = Mean(x^2) - Mean(x)^2 and internally we use the exponentially weighted mean of x/x^2 to calculate this.

+

Parameters

+
    +
  • +

    fading_factor

    +

    Default0.5

    +

    The closer fading_factor is to 1 the more the statistic will adapt to recent values.

    +
  • +
+

Attributes

+
    +
  • +

    variance (float)

    +

    The running exponentially weighted variance.

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, 3, 5, 4, 6, 8, 7, 9, 11]
+ewv = stats.EWVar(fading_factor=0.5)
+for x in X:
+    print(ewv.update(x).get())
+
+
0.0
+1.0
+2.75
+1.4375
+1.984375
+3.43359375
+1.7958984375
+2.198974609375
+3.56536865234375
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Entropy/index.html b/0.19.0/api/stats/Entropy/index.html new file mode 100644 index 0000000000..988969cdb1 --- /dev/null +++ b/0.19.0/api/stats/Entropy/index.html @@ -0,0 +1,4128 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Entropy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Entropy

+

Running entropy.

+

Parameters

+
    +
  • +

    fading_factor

    +

    Default1

    +

    Fading factor.

    +
  • +
  • +

    eps

    +

    Default1e-08

    +

    Small value that will be added to the denominator to avoid division by zero.

    +
  • +
+

Attributes

+
    +
  • +

    entropy (float)

    +

    The running entropy.

    +
  • +
  • +

    n (int)

    +

    The current number of observations.

    +
  • +
  • +

    counter (collections.Counter)

    +

    Count the number of times the values have occurred

    +
  • +
+

Examples

+

import math
+import random
+import numpy as np
+from scipy.stats import entropy
+from river import stats
+
+def entropy_list(labels, base=None):
+  value,counts = np.unique(labels, return_counts=True)
+  return entropy(counts, base=base)
+
+SEED = 42 * 1337
+random.seed(SEED)
+
+entro = stats.Entropy(fading_factor=1)
+
+list_animal = []
+for animal, num_val in zip(['cat', 'dog', 'bird'],[301, 401, 601]):
+    list_animal += [animal for i in range(num_val)]
+random.shuffle(list_animal)
+
+for animal in list_animal:
+    _ = entro.update(animal)
+
+print(f'{entro.get():.6f}')
+
+
1.058093
+
+
print(f'{entropy_list(list_animal):.6f}')
+
+
1.058093
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/IQR/index.html b/0.19.0/api/stats/IQR/index.html new file mode 100644 index 0000000000..01563660ad --- /dev/null +++ b/0.19.0/api/stats/IQR/index.html @@ -0,0 +1,4099 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IQR - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

IQR

+

Computes the interquartile range.

+

Parameters

+
    +
  • +

    q_inf

    +

    Default0.25

    +

    Desired inferior quantile, must be between 0 and 1. Defaults to 0.25.

    +
  • +
  • +

    q_sup

    +

    Default0.75

    +

    Desired superior quantile, must be between 0 and 1. Defaults to 0.75.

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

from river import stats
+
+iqr = stats.IQR(q_inf=0.25, q_sup=0.75)
+
+for i in range(0, 1001):
+    iqr = iqr.update(i)
+    if i % 100 == 0:
+        print(iqr.get())
+
+
0.0
+50.0
+100.0
+150.0
+200.0
+250.0
+300.0
+350.0
+400.0
+450.0
+500.0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Kurtosis/index.html b/0.19.0/api/stats/Kurtosis/index.html new file mode 100644 index 0000000000..fe360fb496 --- /dev/null +++ b/0.19.0/api/stats/Kurtosis/index.html @@ -0,0 +1,4144 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Kurtosis - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Kurtosis

+

Running kurtosis using Welford's algorithm.

+

Parameters

+
    +
  • +

    bias

    +

    DefaultFalse

    +

    If False, then the calculations are corrected for statistical bias.

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

from river import stats
+import scipy.stats
+import numpy as np
+
+np.random.seed(42)
+X = np.random.normal(loc=0, scale=1, size=10)
+
+kurtosis = stats.Kurtosis(bias=False)
+for x in X:
+    print(kurtosis.update(x).get())
+
+
-3.0
+-2.0
+-1.5
+1.4130027920707047
+0.15367976585756438
+0.46142633246812653
+-1.620647789230658
+-1.3540178492487054
+-1.2310268787102745
+-0.9490372374384453
+

+

for i in range(2, len(X)+1):
+    print(scipy.stats.kurtosis(X[:i], bias=False))
+
+
-2.0
+-1.4999999999999998
+1.4130027920707082
+0.15367976585756082
+0.46142633246812403
+-1.620647789230658
+-1.3540178492487063
+-1.2310268787102738
+-0.9490372374384459
+

+

kurtosis = stats.Kurtosis(bias=True)
+for x in X:
+    print(kurtosis.update(x).get())
+
+
-3.0
+-2.0
+-1.5
+-1.011599627723906
+-0.9615800585356089
+-0.6989395431537853
+-1.4252699121794408
+-1.311437071070812
+-1.246289111322894
+-1.082283689864171
+

+

for i in range(2, len(X)+1):
+    print(scipy.stats.kurtosis(X[:i], bias=True))
+
+
-2.0
+-1.4999999999999998
+-1.0115996277239057
+-0.9615800585356098
+-0.6989395431537861
+-1.425269912179441
+-1.3114370710708125
+-1.2462891113228936
+-1.0822836898641714
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Link/index.html b/0.19.0/api/stats/Link/index.html new file mode 100644 index 0000000000..f92637217b --- /dev/null +++ b/0.19.0/api/stats/Link/index.html @@ -0,0 +1,4117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Link - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Link

+

A link joins two univariate statistics as a sequence.

+

This can be used to pipe the output of one statistic to the input of another. This can be used, for instance, to calculate the mean of the variance of a variable. It can also be used to compute shifted statistics by piping statistics with an instance of stats.Shift.

+

Note that a link is not meant to be instantiated via this class definition. Instead, users can link statistics together via the | operator.

+

Parameters

+ +

Attributes

+
    +
  • name
  • +
+

Examples

+
from river import stats
+stat = stats.Shift(1) | stats.Mean()
+
+

No values have been seen, therefore get defaults to the initial value of stats.Mean, +which is 0.

+

stat.get()
+
+
0.
+

+

Let us now call update.

+
stat = stat.update(1)
+
+

The output from get will still be 0. The reason is that stats.Shift has not enough +values, and therefore outputs it's default value, which is None. The stats.Mean +instance is therefore not updated.

+

stat.get()
+
+
0.0
+

+

On the next call to update, the stats.Shift instance has seen enough values, and +therefore the mean can be updated. The mean is therefore equal to 1, because that's the +only value from the past.

+

stat = stat.update(3)
+stat.get()
+
+
1.0
+

+

On the subsequent call to update, the mean will be updated with the value 3.

+

stat = stat.update(4)
+stat.get()
+
+
2.0
+

+

Note that composing statistics returns a new statistic with it's own name.

+

stat.name
+
+
'mean_of_shift_1'
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/MAD/index.html b/0.19.0/api/stats/MAD/index.html new file mode 100644 index 0000000000..d5380368a5 --- /dev/null +++ b/0.19.0/api/stats/MAD/index.html @@ -0,0 +1,4078 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MAD - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

MAD

+

Median Absolute Deviation (MAD).

+

The median absolute deviation is the median of the absolute differences between each data point and the data's overall median. In an online setting, the median of the data is unknown beforehand. Therefore, both the median of the data and the median of the differences of the data with respect to the latter are updated online. To be precise, the median of the data is updated before the median of the differences. As a consequence, this online version of the MAD does not coincide exactly with its batch counterpart.

+

Attributes

+
    +
  • +

    median (stats.Median)

    +

    The median of the data.

    +
  • +
+

Examples

+

from river import stats
+
+X = [4, 2, 5, 3, 0, 4]
+
+mad = stats.MAD()
+for x in X:
+    print(mad.update(x).get())
+
+
0.0
+2.0
+1.0
+1.0
+1.0
+1.0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Max/index.html b/0.19.0/api/stats/Max/index.html new file mode 100644 index 0000000000..2a719e4a65 --- /dev/null +++ b/0.19.0/api/stats/Max/index.html @@ -0,0 +1,4068 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Max - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Max

+

Running max.

+

Attributes

+
    +
  • +

    max (float)

    +

    The current max.

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 5, -6]
+_max = stats.Max()
+for x in X:
+    print(_max.update(x).get())
+
+
1
+1
+3
+3
+5
+5
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Mean/index.html b/0.19.0/api/stats/Mean/index.html new file mode 100644 index 0000000000..101c0702d1 --- /dev/null +++ b/0.19.0/api/stats/Mean/index.html @@ -0,0 +1,4105 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Mean - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Mean

+

Running mean.

+

Attributes

+
    +
  • +

    n (float)

    +

    The current sum of weights. If each passed weight was 1, then this is equal to the number of seen observations.

    +
  • +
+

Examples

+

from river import stats
+
+X = [-5, -3, -1, 1, 3, 5]
+mean = stats.Mean()
+for x in X:
+    print(mean.update(x).get())
+
+
-5.0
+-4.0
+-3.0
+-2.0
+-1.0
+0.0
+

+

You can calculate a rolling average by wrapping a utils.Rolling around:

+

from river import utils
+
+X = [1, 2, 3, 4, 5, 6]
+rmean = utils.Rolling(stats.Mean(), window_size=2)
+
+for x in X:
+    print(rmean.update(x).get())
+
+
1.0
+1.5
+2.5
+3.5
+4.5
+5.5
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
  • w — defaults to 1.0
  • +
+
+

+
+update_many +
+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Min/index.html b/0.19.0/api/stats/Min/index.html new file mode 100644 index 0000000000..d2a23c4a41 --- /dev/null +++ b/0.19.0/api/stats/Min/index.html @@ -0,0 +1,4039 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Min - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Min

+

Running min.

+

Attributes

+
    +
  • +

    min (float)

    +

    The current min.

    +
  • +
+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Mode/index.html b/0.19.0/api/stats/Mode/index.html new file mode 100644 index 0000000000..389bb472a8 --- /dev/null +++ b/0.19.0/api/stats/Mode/index.html @@ -0,0 +1,4099 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Mode - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Mode

+

Running mode.

+

The mode is simply the most common value. An approximate mode can be computed by setting the number of first unique values to count.

+

Parameters

+
    +
  • +

    k

    +

    Default25

    +

    Only the first k unique values will be included. If k equals -1, the exact mode is computed.

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

from river import stats
+
+X = ['sunny', 'cloudy', 'cloudy', 'rainy', 'rainy', 'rainy']
+mode = stats.Mode(k=2)
+for x in X:
+    print(mode.update(x).get())
+
+
sunny
+sunny
+cloudy
+cloudy
+cloudy
+cloudy
+

+

mode = stats.Mode(k=-1)
+for x in X:
+    print(mode.update(x).get())
+
+
sunny
+sunny
+cloudy
+cloudy
+cloudy
+rainy
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/NUnique/index.html b/0.19.0/api/stats/NUnique/index.html new file mode 100644 index 0000000000..cf4c0833c2 --- /dev/null +++ b/0.19.0/api/stats/NUnique/index.html @@ -0,0 +1,4127 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + NUnique - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

NUnique

+

Approximate number of unique values counter.

+

This is basically an implementation of the HyperLogLog algorithm. Adapted from hypy. The code is a bit too terse but it will do for now.

+

Parameters

+
    +
  • +

    error_rate

    +

    Default0.01

    +

    Desired error rate. Memory usage is inversely proportional to this value.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Set the seed to produce identical results.

    +
  • +
+

Attributes

+
    +
  • +

    n_bits (int)

    +
  • +
  • +

    n_buckets (int)

    +
  • +
  • +

    buckets (list)

    +
  • +
+

Examples

+

import string
+from river import stats
+
+alphabet = string.ascii_lowercase
+n_unique = stats.NUnique(error_rate=0.2, seed=42)
+
+n_unique.update('a').get()
+
+
1
+

+

n_unique.update('b').get()
+
+
2
+

+

for letter in alphabet:
+    n_unique = n_unique.update(letter)
+n_unique.get()
+
+
31
+

+

Lowering the error_rate parameter will increase the precision.

+

n_unique = stats.NUnique(error_rate=0.01, seed=42)
+for letter in alphabet:
+    n_unique = n_unique.update(letter)
+n_unique.get()
+
+
26
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/PeakToPeak/index.html b/0.19.0/api/stats/PeakToPeak/index.html new file mode 100644 index 0000000000..d7681dda79 --- /dev/null +++ b/0.19.0/api/stats/PeakToPeak/index.html @@ -0,0 +1,4065 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PeakToPeak - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PeakToPeak

+

Running peak to peak (max - min).

+

Attributes

+
    +
  • name
  • +
+

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 2, 4]
+ptp = stats.PeakToPeak()
+for x in X:
+    print(ptp.update(x).get())
+
+
0.
+5.
+7.
+7.
+7.
+8.
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/PearsonCorr/index.html b/0.19.0/api/stats/PearsonCorr/index.html new file mode 100644 index 0000000000..f0f2fe898f --- /dev/null +++ b/0.19.0/api/stats/PearsonCorr/index.html @@ -0,0 +1,4125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + PearsonCorr - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

PearsonCorr

+

Online Pearson correlation.

+

Parameters

+
    +
  • +

    ddof

    +

    Default1

    +

    Delta Degrees of Freedom.

    +
  • +
+

Attributes

+
    +
  • +

    var_x (stats.Var)

    +

    Running variance of x.

    +
  • +
  • +

    var_y (stats.Var)

    +

    Running variance of y.

    +
  • +
  • +

    cov_xy (stats.Cov)

    +

    Running covariance of x and y.

    +
  • +
+

Examples

+

from river import stats
+
+x = [0, 0, 0, 1, 1, 1, 1]
+y = [0, 1, 2, 3, 4, 5, 6]
+
+pearson = stats.PearsonCorr()
+
+for xi, yi in zip(x, y):
+    print(pearson.update(xi, yi).get())
+
+
0
+0
+0
+0.774596
+0.866025
+0.878310
+0.866025
+

+

You can also do this in a rolling fashion:

+

from river import utils
+
+x = [0, 0, 0, 1, 1, 1, 1]
+y = [0, 1, 2, 3, 4, 5, 6]
+
+pearson = utils.Rolling(stats.PearsonCorr(), window_size=4)
+
+for xi, yi in zip(x, y):
+    print(pearson.update(xi, yi).get())
+
+
0
+0
+0
+0.7745966692414834
+0.8944271909999159
+0.7745966692414832
+-4.712160915387242e-09
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Quantile/index.html b/0.19.0/api/stats/Quantile/index.html new file mode 100644 index 0000000000..0f3d053668 --- /dev/null +++ b/0.19.0/api/stats/Quantile/index.html @@ -0,0 +1,4118 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Quantile - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Quantile

+

Running quantile.

+

Uses the P² algorithm, which is also known as the "Piecewise-Parabolic quantile estimator". The code is inspired by LiveStat's implementation 2.

+

Parameters

+
    +
  • +

    q

    +

    Typefloat

    +

    Default0.5

    +

    Determines which quantile to compute, must be comprised between 0 and 1.

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

from river import stats
+import numpy as np
+
+np.random.seed(42 * 1337)
+mu, sigma = 0, 1
+s = np.random.normal(mu, sigma, 500)
+
+median = stats.Quantile(0.5)
+for x in s:
+   _ = median.update(x)
+print(f'The estimated value of the 50th (median) quantile is {median.get():.4f}')
+
+
The estimated value of the 50th (median) quantile is -0.0275
+

+

print(f'The real value of the 50th (median) quantile is {np.median(s):.4f}')
+
+
The real value of the 50th (median) quantile is -0.0135
+

+

percentile_17 = stats.Quantile(0.17)
+for x in s:
+   _ = percentile_17.update(x)
+print(f'The estimated value of the 17th quantile is {percentile_17.get():.4f}')
+
+
The estimated value of the 17th quantile is -0.8652
+

+

print(f'The real value of the 17th quantile is {np.percentile(s,17):.4f}')
+
+
The real value of the 17th quantile is -0.9072
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingAbsMax/index.html b/0.19.0/api/stats/RollingAbsMax/index.html new file mode 100644 index 0000000000..e2e5899dcb --- /dev/null +++ b/0.19.0/api/stats/RollingAbsMax/index.html @@ -0,0 +1,4092 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingAbsMax - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingAbsMax

+

Running absolute max over a window.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Size of the rolling window.

    +
  • +
+

Attributes

+
    +
  • +

    name

    +
  • +
  • +

    window_size

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 2, 1]
+rolling_absmax = stats.RollingAbsMax(window_size=2)
+for x in X:
+    print(rolling_absmax.update(x).get())
+
+
1
+4
+4
+3
+2
+2
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingIQR/index.html b/0.19.0/api/stats/RollingIQR/index.html new file mode 100644 index 0000000000..ed3811dbea --- /dev/null +++ b/0.19.0/api/stats/RollingIQR/index.html @@ -0,0 +1,4112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingIQR - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingIQR

+

Computes the rolling interquartile range.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Size of the window.

    +
  • +
  • +

    q_inf

    +

    Default0.25

    +

    Desired inferior quantile, must be between 0 and 1. Defaults to 0.25.

    +
  • +
  • +

    q_sup

    +

    Default0.75

    +

    Desired superior quantile, must be between 0 and 1. Defaults to 0.75.

    +
  • +
+

Attributes

+
    +
  • +

    name

    +
  • +
  • +

    window_size

    +
  • +
+

Examples

+

from river import stats
+rolling_iqr = stats.RollingIQR(
+    q_inf=0.25,
+    q_sup=0.75,
+    window_size=101
+)
+
+for i in range(0, 1001):
+    rolling_iqr = rolling_iqr.update(i)
+    if i % 100 == 0:
+        print(rolling_iqr.get())
+
+
0.0
+50.0
+50.0
+50.0
+50.0
+50.0
+50.0
+50.0
+50.0
+50.0
+50.0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingMax/index.html b/0.19.0/api/stats/RollingMax/index.html new file mode 100644 index 0000000000..4b17e111a7 --- /dev/null +++ b/0.19.0/api/stats/RollingMax/index.html @@ -0,0 +1,4092 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingMax - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingMax

+

Running max over a window.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Size of the rolling window.

    +
  • +
+

Attributes

+
    +
  • +

    name

    +
  • +
  • +

    window_size

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 2, 1]
+rolling_max = stats.RollingMax(window_size=2)
+for x in X:
+    print(rolling_max.update(x).get())
+
+
1
+1
+3
+3
+2
+2
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingMin/index.html b/0.19.0/api/stats/RollingMin/index.html new file mode 100644 index 0000000000..92a036f2b1 --- /dev/null +++ b/0.19.0/api/stats/RollingMin/index.html @@ -0,0 +1,4092 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingMin - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingMin

+

Running min over a window.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Size of the rolling window.

    +
  • +
+

Attributes

+
    +
  • +

    name

    +
  • +
  • +

    window_size

    +
  • +
+

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 2, 1]
+rolling_min = stats.RollingMin(2)
+for x in X:
+    print(rolling_min.update(x).get())
+
+
1
+-4
+-4
+-2
+-2
+1
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingMode/index.html b/0.19.0/api/stats/RollingMode/index.html new file mode 100644 index 0000000000..090b0dc385 --- /dev/null +++ b/0.19.0/api/stats/RollingMode/index.html @@ -0,0 +1,4104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingMode - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingMode

+

Running mode over a window.

+

The mode is the most common value.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Size of the rolling window.

    +
  • +
+

Attributes

+
    +
  • +

    counts (collections.defaultdict)

    +

    Value counts.

    +
  • +
+

Examples

+

from river import stats
+
+X = ['sunny', 'sunny', 'sunny', 'rainy', 'rainy', 'rainy', 'rainy']
+rolling_mode = stats.RollingMode(window_size=2)
+for x in X:
+    print(rolling_mode.update(x).get())
+
+
sunny
+sunny
+sunny
+sunny
+rainy
+rainy
+rainy
+

+

rolling_mode = stats.RollingMode(window_size=5)
+for x in X:
+    print(rolling_mode.update(x).get())
+
+
sunny
+sunny
+sunny
+sunny
+sunny
+rainy
+rainy
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingPeakToPeak/index.html b/0.19.0/api/stats/RollingPeakToPeak/index.html new file mode 100644 index 0000000000..f593a065fa --- /dev/null +++ b/0.19.0/api/stats/RollingPeakToPeak/index.html @@ -0,0 +1,4094 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingPeakToPeak - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingPeakToPeak

+

Running peak to peak (max - min) over a window.

+

Parameters

+
    +
  • +

    window_size

    +

    Typeint

    +

    Size of the rolling window.

    +
  • +
+

Attributes

+ +

Examples

+

from river import stats
+
+X = [1, -4, 3, -2, 2, 1]
+ptp = stats.RollingPeakToPeak(window_size=2)
+for x in X:
+    print(ptp.update(x).get())
+
+
0
+5
+7
+5
+4
+1
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/RollingQuantile/index.html b/0.19.0/api/stats/RollingQuantile/index.html new file mode 100644 index 0000000000..8433ae0eef --- /dev/null +++ b/0.19.0/api/stats/RollingQuantile/index.html @@ -0,0 +1,4115 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RollingQuantile - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

RollingQuantile

+

Running quantile over a window.

+

Parameters

+
    +
  • +

    q

    +

    Typefloat

    +

    Determines which quantile to compute, must be comprised between 0 and 1.

    +
  • +
  • +

    window_size

    +

    Typeint

    +

    Size of the window.

    +
  • +
+

Attributes

+
    +
  • +

    name

    +
  • +
  • +

    window_size

    +
  • +
+

Examples

+

from river import stats
+
+rolling_quantile = stats.RollingQuantile(
+    q=.5,
+    window_size=101,
+)
+
+for i in range(1001):
+    rolling_quantile = rolling_quantile.update(i)
+    if i % 100 == 0:
+        print(rolling_quantile.get())
+
+
0.0
+50.0
+150.0
+250.0
+350.0
+450.0
+550.0
+650.0
+750.0
+850.0
+950.0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+
+
+
    +
  1. +

    Left sorted 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/SEM/index.html b/0.19.0/api/stats/SEM/index.html new file mode 100644 index 0000000000..8a73f952a0 --- /dev/null +++ b/0.19.0/api/stats/SEM/index.html @@ -0,0 +1,4121 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SEM - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SEM

+

Running standard error of the mean using Welford's algorithm.

+

Parameters

+
    +
  • +

    ddof

    +

    Default1

    +

    Delta Degrees of Freedom. The divisor used in calculations is n - ddof, where n is the number of seen elements.

    +
  • +
+

Attributes

+
    +
  • +

    n (int)

    +

    Number of observations.

    +
  • +
+

Examples

+

from river import stats
+
+X = [3, 5, 4, 7, 10, 12]
+
+sem = stats.SEM()
+for x in X:
+    print(sem.update(x).get())
+
+
0.0
+1.0
+0.577350
+0.853912
+1.240967
+1.447219
+

+

from river import utils
+
+X = [1, 4, 2, -4, -8, 0]
+
+rolling_sem = utils.Rolling(stats.SEM(ddof=1), window_size=3)
+for x in X:
+    print(rolling_sem.update(x).get())
+
+
0.0
+1.5
+0.881917
+2.403700
+2.905932
+2.309401
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
  • w — defaults to 1.0
  • +
+
+

+
+update_many +
+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Shift/index.html b/0.19.0/api/stats/Shift/index.html new file mode 100644 index 0000000000..7fc51a52ae --- /dev/null +++ b/0.19.0/api/stats/Shift/index.html @@ -0,0 +1,4138 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Shift - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Shift

+

Shifts a data stream by returning past values.

+

This can be used to compute statistics over past data. For instance, if you're computing daily averages, then shifting by 7 will be equivalent to computing averages from a week ago.

+

Shifting values is useful when you're calculating an average over a target value. Indeed, in this case it's important to shift the values in order not to introduce leakage. The recommended way to do this is to feature_extraction.TargetAgg, which already takes care of shifting the target values once.

+

Parameters

+
    +
  • +

    amount

    +

    Default1

    +

    Shift amount. The get method will return the t - amount value, where t is the current moment.

    +
  • +
  • +

    fill_value

    +

    DefaultNone

    +

    This value will be returned by the get method if not enough values have been observed.

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

It is rare to have to use Shift by itself. A more common usage is to compose it with +other statistics. This can be done via the | operator.

+

from river import stats
+
+stat = stats.Shift(1) | stats.Mean()
+
+for i in range(5):
+    stat = stat.update(i)
+    print(stat.get())
+
+
0.0
+0.0
+0.5
+1.0
+1.5
+

+

A common usecase for using Shift is when computing statistics on shifted data. For +instance, say you have a dataset which records the amount of sales for a set of shops. You +might then have a shop field and a sales field. Let's say you want to look at the +average amount of sales per shop. You can do this by using a feature_extraction.Agg. When +you call transform_one, you're expecting it to return the average amount of sales, +without including today's sales. You can do this by prepending an instance of +stats.Mean with an instance of stats.Shift.

+
from river import feature_extraction
+
+agg = feature_extraction.Agg(
+    on='sales',
+    how=stats.Shift(1) | stats.Mean(),
+    by='shop'
+)
+
+

Let's define a little example dataset.

+
X = iter([
+    {'shop': 'Ikea', 'sales': 10},
+    {'shop': 'Ikea', 'sales': 15},
+    {'shop': 'Ikea', 'sales': 20}
+])
+
+

Now let's call the learn_one method to update our feature extractor.

+
x = next(X)
+agg = agg.learn_one(x)
+
+

At this point, the average defaults to the initial value of stats.Mean, which is 0.

+

agg.transform_one(x)
+
+
{'sales_mean_of_shift_1_by_shop': 0.0}
+

+

We can now update our feature extractor with the next data point and check the output.

+

agg = agg.learn_one(next(X))
+agg.transform_one(x)
+
+
{'sales_mean_of_shift_1_by_shop': 10.0}
+

+

agg = agg.learn_one(next(X))
+agg.transform_one(x)
+
+
{'sales_mean_of_shift_1_by_shop': 12.5}
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Skew/index.html b/0.19.0/api/stats/Skew/index.html new file mode 100644 index 0000000000..cc8b0cfb4e --- /dev/null +++ b/0.19.0/api/stats/Skew/index.html @@ -0,0 +1,4117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Skew - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Skew

+

Running skew using Welford's algorithm.

+

Parameters

+
    +
  • +

    bias

    +

    DefaultFalse

    +

    If False, then the calculations are corrected for statistical bias.

    +
  • +
+

Attributes

+
    +
  • name
  • +
+

Examples

+

from river import stats
+import numpy as np
+
+np.random.seed(42)
+X = np.random.normal(loc=0, scale=1, size=10)
+
+skew = stats.Skew(bias=False)
+for x in X:
+    print(skew.update(x).get())
+
+
0.0
+0.0
+-1.4802398132849872
+0.5127437186677888
+0.7803466510704751
+1.056115628922055
+0.5057840774320389
+0.3478402420400934
+0.4536710660918704
+0.4123070197493227
+

+

skew = stats.Skew(bias=True)
+for x in X:
+    print(skew.update(x).get())
+
+
0.0
+0.0
+-0.6043053732501439
+0.2960327239981376
+0.5234724473423674
+0.7712778043924866
+0.39022088752624845
+0.278892645224261
+0.37425953513864063
+0.3476878073823696
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Sum/index.html b/0.19.0/api/stats/Sum/index.html new file mode 100644 index 0000000000..77f6f0041f --- /dev/null +++ b/0.19.0/api/stats/Sum/index.html @@ -0,0 +1,4085 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Sum - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Sum

+

Running sum.

+

Attributes

+
    +
  • +

    sum (float)

    +

    The running sum.

    +
  • +
+

Examples

+

from river import stats
+
+X = [-5, -3, -1, 1, 3, 5]
+mean = stats.Sum()
+for x in X:
+    print(mean.update(x).get())
+
+
-5.0
+-8.0
+-9.0
+-8.0
+-5.0
+0.0
+

+

from river import utils
+
+X = [1, -4, 3, -2, 2, 1]
+rolling_sum = utils.Rolling(stats.Sum(), window_size=2)
+for x in X:
+    print(rolling_sum.update(x).get())
+
+
1.0
+-3.0
+-1.0
+1.0
+0.0
+3.0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/Var/index.html b/0.19.0/api/stats/Var/index.html new file mode 100644 index 0000000000..40412b8e4b --- /dev/null +++ b/0.19.0/api/stats/Var/index.html @@ -0,0 +1,4146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Var - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Var

+

Running variance using Welford's algorithm.

+

Parameters

+
    +
  • +

    ddof

    +

    Default1

    +

    Delta Degrees of Freedom. The divisor used in calculations is n - ddof, where n represents the number of seen elements.

    +
  • +
+

Attributes

+
    +
  • +

    mean

    +

    It is necessary to calculate the mean of the data in order to calculate its variance.

    +
  • +
+

Examples

+

from river import stats
+
+X = [3, 5, 4, 7, 10, 12]
+
+var = stats.Var()
+for x in X:
+    print(var.update(x).get())
+
+
0.0
+2.0
+1.0
+2.916666
+7.7
+12.56666
+

+

You can measure a rolling variance by using a utils.Rolling wrapper:

+

from river import utils
+
+X = [1, 4, 2, -4, -8, 0]
+rvar = utils.Rolling(stats.Var(ddof=1), window_size=3)
+for x in X:
+    print(rvar.update(x).get())
+
+
0.0
+4.5
+2.333333
+17.333333
+25.333333
+16.0
+

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+revert +
+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
  • w — defaults to 1.0
  • +
+
+

+
+update_many +
+

Notes

+

The outcomes of the incremental and parallel updates are consistent with numpy's +batch processing when \(\text{ddof} \le 1\).

+
+
+
    +
  1. +

    Wikipedia article on algorithms for calculating variance 

    +
  2. +
  3. +

    Chan, T.F., Golub, G.H. and LeVeque, R.J., 1983. Algorithms for computing the sample variance: Analysis and recommendations. The American Statistician, 37(3), pp.242-247. 

    +
  4. +
  5. +

    Schubert, E. and Gertz, M., 2018, July. Numerically stable parallel computation of +(co-)variance. In Proceedings of the 30th International Conference on Scientific and +Statistical Database Management (pp. 1-12). 

    +
  6. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/base/Bivariate/index.html b/0.19.0/api/stats/base/Bivariate/index.html new file mode 100644 index 0000000000..217e467981 --- /dev/null +++ b/0.19.0/api/stats/base/Bivariate/index.html @@ -0,0 +1,4067 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Bivariate - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Bivariate

+

A bivariate statistic measures a relationship between two variables.

+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stats/base/Univariate/index.html b/0.19.0/api/stats/base/Univariate/index.html new file mode 100644 index 0000000000..3004ff39c1 --- /dev/null +++ b/0.19.0/api/stats/base/Univariate/index.html @@ -0,0 +1,4084 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Univariate - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Univariate

+

A univariate statistic measures a property of a variable.

+

Attributes

+
    +
  • name
  • +
+

Methods

+
+get +

Return the current value of the statistic.

+
+

+
+update +

Update and return the called instance.

+

Parameters

+
    +
  • x'numbers.Number'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/Cache/index.html b/0.19.0/api/stream/Cache/index.html new file mode 100644 index 0000000000..3ffa97debb --- /dev/null +++ b/0.19.0/api/stream/Cache/index.html @@ -0,0 +1,3707 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Cache - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Cache

+

Utility for caching iterables.

+

This can be used to save a stream of data to the disk in order to iterate over it faster the following time. This can save time depending on the nature of stream. The more processing happens in a stream, the more time will be saved. Even in the case where no processing is done apart from reading the data, the cache will save some time because it is using the pickle binary protocol. It can thus improve the speed in common cases such as reading from a CSV file.

+

Parameters

+
    +
  • +

    directory

    +

    DefaultNone

    +

    The path where to store the pickled data streams. If not provided, then it will be automatically inferred whenever possible, if not an exception will be raised.

    +
  • +
+

Attributes

+
    +
  • +

    keys (set)

    +

    The set of keys that are being cached.

    +
  • +
+

Examples

+
import time
+from river import datasets
+from river import stream
+
+dataset = datasets.Phishing()
+cache = stream.Cache()
+
+

The cache can be used by wrapping it around an iterable. Because this is the first time +are iterating over the data, nothing is cached.

+

tic = time.time()
+for x, y in cache(dataset, key='phishing'):
+    pass
+toc = time.time()
+print(toc - tic)  # doctest: +SKIP
+
+
0.012813
+

+

If we do the same thing again, we can see the loop is now faster.

+

tic = time.time()
+for x, y in cache(dataset, key='phishing'):
+    pass
+toc = time.time()
+print(toc - tic)  # doctest: +SKIP
+
+
0.001927
+

+

We can see an overview of the cache. The first line indicates the location of the +cache.

+

cache  # doctest: +SKIP
+
+
/tmp
+phishing - 125.2KiB
+

+

Finally, we can clear the stream from the cache.

+

cache.clear('phishing')
+cache  # doctest: +SKIP
+
+
/tmp
+

+

There is also a clear_all method to remove all the items in the cache.

+
cache.clear_all()
+
+

Methods

+
+call +

Call self as a function.

+

Parameters

+
    +
  • stream
  • +
  • key — defaults to None
  • +
+
+

+
+clear +

Delete the cached stream associated with the given key.

+

Parameters

+
    +
  • key'str'
  • +
+
+

+
+clear_all +

Delete all the cached streams.

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/TwitchChatStream/index.html b/0.19.0/api/stream/TwitchChatStream/index.html new file mode 100644 index 0000000000..1b449822e5 --- /dev/null +++ b/0.19.0/api/stream/TwitchChatStream/index.html @@ -0,0 +1,3656 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TwitchChatStream - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TwitchChatStream

+

Twitch chat stream client.

+

This client gives access to a live stream of chat messages in Twitch channels using IRC protocol. You need to have a Twitch account and receive an OAuth token from https://twitchapps.com/tmi/.

+

Parameters

+
    +
  • +

    nickname

    +

    Typestr

    +

    The nickname of your account.

    +
  • +
  • +

    token

    +

    Typestr

    +

    OAuth token which has been generated.

    +
  • +
  • +

    channels

    +

    Typelist[str]

    +

    A list of channel names like ["asmongold", "shroud"] you want to collect messages from.

    +
  • +
  • +

    buffer_size

    +

    Typeint

    +

    Default2048

    +

    Size of buffer in bytes used for receiving responses from Twitch with IRC (default 2 kB).

    +
  • +
  • +

    timeout

    +

    Typeint

    +

    Default60

    +

    A timeout value in seconds for waiting response from Twitch (default 60s). It can be useful if all requested channels are offline or chat is not active enough.

    +
  • +
+

Examples

+

The live stream is instantiated by passing your Twitch account nickname, OAuth token and list of channels. Other parameters are optional.

+
from river import stream
+
+twitch_chat = stream.TwitchChatStream(
+    nickname="twitch_user1",
+    token="oauth:okrip6j6fjio8n5xpy2oum1lph4fbve",
+    channels=["asmongold", "shroud"]
+)
+
+

The stream can be iterated over like this:

+
for item in twitch_chat:
+    print(item)
+
+

Here's a single stream item example: +

{
+    'dt': datetime.datetime(2022, 9, 14, 10, 33, 37, 989560),
+    'channel': 'asmongold',
+    'username': 'moojiejaa',
+    'msg': 'damn this chat mod are wild'
+}
+

+
+
+
    +
  1. +

    Twitch IRC doc 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/TwitterLiveStream/index.html b/0.19.0/api/stream/TwitterLiveStream/index.html new file mode 100644 index 0000000000..00602deb22 --- /dev/null +++ b/0.19.0/api/stream/TwitterLiveStream/index.html @@ -0,0 +1,3666 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TwitterLiveStream - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TwitterLiveStream

+

Twitter API v2 live stream client.

+

This client gives access to a live stream of Tweets. That is, Tweets that have just been published. This is different to stream.TwitterRecentStream, which also covers Tweets that have been published over recent days, and not necessarily in real-time.

+

A list of filtering rules has to be provided. For instance, this allows focusing on a subset of topics and/or users.

+
+

Note

+

Using this requires having the requests package installed.

+
+

Parameters

+
    +
  • +

    rules

    +

    See the documentation[^2] for a comprehensive overview of filtering rules.

    +
  • +
  • +

    bearer_token

    +

    A bearer token that is available in each account's developer portal.

    +
  • +
+

Examples

+

The live stream is instantiated by passing a list of filtering rules, as well as a bearer + token. For instance, we can listen to all the breaking news Tweets from the BBC and CNN.

+
from river import stream
+
+tweets = stream.TwitterLiveStream(
+    rules=["from:BBCBreaking", "from:cnnbrk"],
+    bearer_token="<insert_bearer_token>"
+)
+
+
The stream can then be iterated over, possibly in an infinite loop. This will listen to the
+live feed of Tweets and produce a Tweet right after it's been published.
+
+```py
+import logging
+
+while True:
+    try:
+        for tweet in tweets:
+            print(tweet)
+    except requests.exceptions.RequestException as e:
+        logging.warning(str(e))
+        time.sleep(10)
+```
+
+Here's a Tweet example:
+
+```py
+{
+    'data': {
+        'author_id': '428333',
+        'created_at': '2022-08-26T12:59:48.000Z',
+        'id': '1563149212774445058',
+        'text': "Ukraine's Zaporizhzhia nuclear power plant, which is currently held by
+
+

Russian forces, has been reconnected to Ukraine's electricity grid, according to the +country's nuclear operator https://t.co/xfylkBs4JR" + }, + 'includes': { + 'users': [ + { + 'created_at': '2007-01-02T01:48:14.000Z', + 'id': '428333', + 'name': 'CNN Breaking News', + 'username': 'cnnbrk' + } + ] + }, + 'matching_rules': [{'id': '1563148866333151233', 'tag': 'from:cnnbrk'}] + } + ``` + [^1]: Filtered stream introduction + [^2]: Building rules for filtered stream + [^3]: Stream Tweets in real-time

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-arff/index.html b/0.19.0/api/stream/iter-arff/index.html new file mode 100644 index 0000000000..76205cc81b --- /dev/null +++ b/0.19.0/api/stream/iter-arff/index.html @@ -0,0 +1,3736 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_arff - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_arff

+

Iterates over rows from an ARFF file.

+

Parameters

+
    +
  • +

    filepath_or_buffer

    +

    Either a string indicating the location of a file, or a buffer object that has a read method.

    +
  • +
  • +

    target

    +

    Typestr | list[str] | None

    +

    DefaultNone

    +

    Name(s) of the target field. If None, then the target field is ignored. If a list of names is passed, then a dictionary is returned instead of a single value.

    +
  • +
  • +

    compression

    +

    Defaultinfer

    +

    For on-the-fly decompression of on-disk data. If this is set to 'infer' and filepath_or_buffer is a path, then the decompression method is inferred for the following extensions: '.gz', '.zip'.

    +
  • +
  • +

    sparse

    +

    DefaultFalse

    +

    Whether the data is sparse or not.

    +
  • +
+

Examples

+

cars = '''
+@relation CarData
+@attribute make {Toyota, Honda, Ford, Chevrolet}
+@attribute model string
+@attribute year numeric
+@attribute price numeric
+@attribute mpg numeric
+@data
+Toyota, Corolla, 2018, 15000, 30.5
+Honda, Civic, 2019, 16000, 32.2
+Ford, Mustang, 2020, 25000, 25.0
+Chevrolet, Malibu, 2017, 18000, 28.9
+Toyota, Camry, 2019, 22000, 29.8
+'''
+with open('cars.arff', mode='w') as f:
+    _ = f.write(cars)
+
+from river import stream
+
+for x, y in stream.iter_arff('cars.arff', target='price'):
+    print(x, y)
+
+
{'make': 'Toyota', 'model': ' Corolla', 'year': 2018.0, 'mpg': 30.5} 15000.0
+{'make': 'Honda', 'model': ' Civic', 'year': 2019.0, 'mpg': 32.2} 16000.0
+{'make': 'Ford', 'model': ' Mustang', 'year': 2020.0, 'mpg': 25.0} 25000.0
+{'make': 'Chevrolet', 'model': ' Malibu', 'year': 2017.0, 'mpg': 28.9} 18000.0
+{'make': 'Toyota', 'model': ' Camry', 'year': 2019.0, 'mpg': 29.8} 22000.0
+

+

Finally, let's delete the example file.

+
import os; os.remove('cars.arff')
+
+

ARFF files support sparse data. Let's create a sparse ARFF file.

+
sparse = '''
+% traindata
+@RELATION "traindata: -C 6"
+@ATTRIBUTE y0 {0, 1}
+@ATTRIBUTE y1 {0, 1}
+@ATTRIBUTE y2 {0, 1}
+@ATTRIBUTE y3 {0, 1}
+@ATTRIBUTE y4 {0, 1}
+@ATTRIBUTE y5 {0, 1}
+@ATTRIBUTE X0 NUMERIC
+@ATTRIBUTE X1 NUMERIC
+@ATTRIBUTE X2 NUMERIC
+@DATA
+{ 3 1,6 0.863382,8 0.820094 }
+{ 2 1,6 0.659761 }
+{ 0 1,3 1,6 0.437881,8 0.818882 }
+{ 2 1,6 0.676477,7 0.724635,8 0.755123 }
+'''
+
+with open('sparse.arff', mode='w') as f:
+    _ = f.write(sparse)
+
+

In addition, we'll specify that there are several target fields.

+

arff_stream = stream.iter_arff(
+    'sparse.arff',
+    target=['y0', 'y1', 'y2', 'y3', 'y4', 'y5'],
+    sparse=True
+)
+
+for x, y in arff_stream:
+    print(x)
+    print(y)
+
+
{'X0': '0.863382', 'X2': '0.820094'}
+{'y0': 0, 'y1': 0, 'y2': 0, 'y3': '1', 'y4': 0, 'y5': 0}
+{'X0': '0.659761'}
+{'y0': 0, 'y1': 0, 'y2': '1', 'y3': 0, 'y4': 0, 'y5': 0}
+{'X0': '0.437881', 'X2': '0.818882'}
+{'y0': '1', 'y1': 0, 'y2': 0, 'y3': '1', 'y4': 0, 'y5': 0}
+{'X0': '0.676477', 'X1': '0.724635', 'X2': '0.755123'}
+{'y0': 0, 'y1': 0, 'y2': '1', 'y3': 0, 'y4': 0, 'y5': 0}
+

+

This function can also deal with missing features in non-sparse data. These are indicated with +a question mark.

+

data = '''
+@relation giveMeLoan-weka.filters.unsupervised.attribute.Remove-R1
+@attribute RevolvingUtilizationOfUnsecuredLines numeric
+@attribute age numeric
+@attribute NumberOfTime30-59DaysPastDueNotWorse numeric
+@attribute DebtRatio numeric
+@attribute MonthlyIncome numeric
+@attribute NumberOfOpenCreditLinesAndLoans numeric
+@attribute NumberOfTimes90DaysLate numeric
+@attribute NumberRealEstateLoansOrLines numeric
+@attribute NumberOfTime60-89DaysPastDueNotWorse numeric
+@attribute NumberOfDependents numeric
+@attribute isFraud {0,1}
+@data
+0.213179,74,0,0.375607,3500,3,0,1,0,1,0
+0.305682,57,0,5710,?,8,0,3,0,0,0
+0.754464,39,0,0.20994,3500,8,0,0,0,0,0
+0.116951,27,0,46,?,2,0,0,0,0,0
+0.189169,57,0,0.606291,23684,9,0,4,0,2,0
+'''
+
+with open('data.arff', mode='w') as f:
+    _ = f.write(data)
+
+for x, y in stream.iter_arff('data.arff', target='isFraud'):
+    print(len(x))
+
+
10
+9
+10
+9
+10
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-array/index.html b/0.19.0/api/stream/iter-array/index.html new file mode 100644 index 0000000000..32f9abcb03 --- /dev/null +++ b/0.19.0/api/stream/iter-array/index.html @@ -0,0 +1,3663 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_array - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_array

+

Iterates over the rows from an array of features and an array of targets.

+

This method is intended to work with numpy arrays, but should also work with Python lists.

+

Parameters

+
    +
  • +

    X

    +

    Typenp.ndarray

    +

    A 2D array of features. This can also be a 1D array of strings, which can be the case if you're working with text.

    +
  • +
  • +

    y

    +

    Typenp.ndarray | None

    +

    DefaultNone

    +

    An optional array of targets.

    +
  • +
  • +

    feature_names

    +

    Typelist[base.typing.FeatureName] | None

    +

    DefaultNone

    +

    An optional list of feature names. The features will be labeled with integers if no names are provided.

    +
  • +
  • +

    target_names

    +

    Typelist[base.typing.FeatureName] | None

    +

    DefaultNone

    +

    An optional list of output names. The outputs will be labeled with integers if no names are provided. Only applies if there are multiple outputs, i.e. if y is a 2D array.

    +
  • +
  • +

    shuffle

    +

    Typebool

    +

    DefaultFalse

    +

    Indicates whether or not to shuffle the input arrays before iterating over them.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed used for shuffling the data.

    +
  • +
+

Examples

+

from river import stream
+import numpy as np
+
+X = np.array([[1, 2, 3], [11, 12, 13]])
+Y = np.array([True, False])
+
+dataset = stream.iter_array(
+    X, Y,
+    feature_names=['x1', 'x2', 'x3']
+)
+for x, y in dataset:
+    print(x, y)
+
+
{'x1': 1, 'x2': 2, 'x3': 3} True
+{'x1': 11, 'x2': 12, 'x3': 13} False
+

+

This also works with a array of texts:

+

X = ["foo", "bar"]
+dataset = stream.iter_array(
+    X, Y,
+    feature_names=['x1', 'x2', 'x3']
+)
+for x, y in dataset:
+    print(x, y)
+
+
foo True
+bar False
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-csv/index.html b/0.19.0/api/stream/iter-csv/index.html new file mode 100644 index 0000000000..bf97f5a277 --- /dev/null +++ b/0.19.0/api/stream/iter-csv/index.html @@ -0,0 +1,3704 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_csv - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_csv

+

Iterates over rows from a CSV file.

+

Reading CSV files can be quite slow. If, for whatever reason, you're going to loop through the same file multiple times, then we recommend that you to use the stream.Cache utility.

+

Parameters

+
    +
  • +

    filepath_or_buffer

    +

    Either a string indicating the location of a file, or a buffer object that has a read method.

    +
  • +
  • +

    target

    +

    Typestr | list[str] | None

    +

    DefaultNone

    +

    A single target column is assumed if a string is passed. A multiple output scenario is assumed if a list of strings is passed. A None value will be assigned to each y if this parameter is omitted.

    +
  • +
  • +

    converters

    +

    Typedict | None

    +

    DefaultNone

    +

    All values in the CSV are interpreted as strings by default. You can use this parameter to cast values to the desired type. This should be a dict mapping feature names to callables used to parse their associated values. Note that a callable may be a type, such as float and int.

    +
  • +
  • +

    parse_dates

    +

    Typedict | None

    +

    DefaultNone

    +

    A dict mapping feature names to a format passed to the datetime.datetime.strptime method.

    +
  • +
  • +

    drop

    +

    Typelist[str] | None

    +

    DefaultNone

    +

    Fields to ignore.

    +
  • +
  • +

    drop_nones

    +

    DefaultFalse

    +

    Whether or not to drop fields where the value is a None.

    +
  • +
  • +

    fraction

    +

    Default1.0

    +

    Sampling fraction.

    +
  • +
  • +

    compression

    +

    Defaultinfer

    +

    For on-the-fly decompression of on-disk data. If this is set to 'infer' and filepath_or_buffer is a path, then the decompression method is inferred for the following extensions: '.gz', '.zip'.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    If specified, the sampling will be deterministic.

    +
  • +
  • +

    field_size_limit

    +

    Typeint | None

    +

    DefaultNone

    +

    If not None, this will be passed to the csv.field_size_limit function.

    +
  • +
  • +

    kwargs

    +

    All other keyword arguments are passed to the underlying csv.DictReader.

    +
  • +
+

Examples

+

Although this function is designed to handle different kinds of inputs, the most common +use case is to read a file on the disk. We'll first create a little CSV file to illustrate.

+
tv_shows = '''name,year,rating
+Planet Earth II,2016,9.5
+Planet Earth,2006,9.4
+Band of Brothers,2001,9.4
+Breaking Bad,2008,9.4
+Chernobyl,2019,9.4
+'''
+with open('tv_shows.csv', mode='w') as f:
+    _ = f.write(tv_shows)
+
+

We can now go through the rows one by one. We can use the converters parameter to cast +the rating field value as a float. We can also convert the year to a datetime via +the parse_dates parameter.

+

from river import stream
+
+params = {
+    'converters': {'rating': float},
+    'parse_dates': {'year': '%Y'}
+}
+for x, y in stream.iter_csv('tv_shows.csv', **params):
+    print(x, y)
+
+
{'name': 'Planet Earth II', 'year': datetime.datetime(2016, 1, 1, 0, 0), 'rating': 9.5} None
+{'name': 'Planet Earth', 'year': datetime.datetime(2006, 1, 1, 0, 0), 'rating': 9.4} None
+{'name': 'Band of Brothers', 'year': datetime.datetime(2001, 1, 1, 0, 0), 'rating': 9.4} None
+{'name': 'Breaking Bad', 'year': datetime.datetime(2008, 1, 1, 0, 0), 'rating': 9.4} None
+{'name': 'Chernobyl', 'year': datetime.datetime(2019, 1, 1, 0, 0), 'rating': 9.4} None
+

+

The value of y is always None because we haven't provided a value for the target +parameter. Here is an example where a target is provided:

+

dataset = stream.iter_csv('tv_shows.csv', target='rating', **params)
+for x, y in dataset:
+    print(x, y)
+
+
{'name': 'Planet Earth II', 'year': datetime.datetime(2016, 1, 1, 0, 0)} 9.5
+{'name': 'Planet Earth', 'year': datetime.datetime(2006, 1, 1, 0, 0)} 9.4
+{'name': 'Band of Brothers', 'year': datetime.datetime(2001, 1, 1, 0, 0)} 9.4
+{'name': 'Breaking Bad', 'year': datetime.datetime(2008, 1, 1, 0, 0)} 9.4
+{'name': 'Chernobyl', 'year': datetime.datetime(2019, 1, 1, 0, 0)} 9.4
+

+

Finally, let's delete the example file.

+
import os; os.remove('tv_shows.csv')
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-libsvm/index.html b/0.19.0/api/stream/iter-libsvm/index.html new file mode 100644 index 0000000000..53d7b54391 --- /dev/null +++ b/0.19.0/api/stream/iter-libsvm/index.html @@ -0,0 +1,3638 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_libsvm - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_libsvm

+

Iterates over a dataset in LIBSVM format.

+

The LIBSVM format is a popular way in the machine learning community to store sparse datasets. Only numerical feature values are supported. The feature names will be considered as strings.

+

Parameters

+
    +
  • +

    filepath_or_buffer

    +

    Typestr

    +

    Either a string indicating the location of a file, or a buffer object that has a read method.

    +
  • +
  • +

    target_type

    +

    Default<class 'float'>

    +

    The type of the target value.

    +
  • +
  • +

    compression

    +

    Defaultinfer

    +

    For on-the-fly decompression of on-disk data. If this is set to 'infer' and filepath_or_buffer is a path, then the decompression method is inferred for the following extensions: '.gz', '.zip'.

    +
  • +
+

Examples

+

import io
+from river import stream
+
+data = io.StringIO('''+1 x:-134.26 y:0.2563
+1 x:-12 z:0.3
+-1 y:.25
+''')
+
+for x, y in stream.iter_libsvm(data, target_type=int):
+    print(y, x)
+
+
1 {'x': -134.26, 'y': 0.2563}
+1 {'x': -12.0, 'z': 0.3}
+-1 {'y': 0.25}
+

+
+
+
    +
  1. +

    LIBSVM documentation 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-pandas/index.html b/0.19.0/api/stream/iter-pandas/index.html new file mode 100644 index 0000000000..7e7411356e --- /dev/null +++ b/0.19.0/api/stream/iter-pandas/index.html @@ -0,0 +1,3632 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_pandas - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_pandas

+

Iterates over the rows of a pandas.DataFrame.

+

Parameters

+
    +
  • +

    X

    +

    Typepd.DataFrame

    +

    A dataframe of features.

    +
  • +
  • +

    y

    +

    Typepd.Series | pd.DataFrame | None

    +

    DefaultNone

    +

    A series or a dataframe with one column per target.

    +
  • +
  • +

    kwargs

    +

    Extra keyword arguments are passed to the underlying call to stream.iter_array.

    +
  • +
+

Examples

+

import pandas as pd
+from river import stream
+
+X = pd.DataFrame({
+    'x1': [1, 2, 3, 4],
+    'x2': ['blue', 'yellow', 'yellow', 'blue'],
+    'y': [True, False, False, True]
+})
+y = X.pop('y')
+
+for xi, yi in stream.iter_pandas(X, y):
+    print(xi, yi)
+
+
{'x1': 1, 'x2': 'blue'} True
+{'x1': 2, 'x2': 'yellow'} False
+{'x1': 3, 'x2': 'yellow'} False
+{'x1': 4, 'x2': 'blue'} True
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-sklearn-dataset/index.html b/0.19.0/api/stream/iter-sklearn-dataset/index.html new file mode 100644 index 0000000000..2c90b66ebc --- /dev/null +++ b/0.19.0/api/stream/iter-sklearn-dataset/index.html @@ -0,0 +1,3632 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_sklearn_dataset - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_sklearn_dataset

+

Iterates rows from one of the datasets provided by scikit-learn.

+

This allows you to use any dataset from scikit-learn's datasets module. For instance, you can use the fetch_openml function to get access to all of the datasets from the OpenML website.

+

Parameters

+
    +
  • +

    dataset

    +

    Typesklearn.utils.Bunch

    +

    A scikit-learn dataset.

    +
  • +
  • +

    kwargs

    +

    Extra keyword arguments are passed to the underlying call to stream.iter_array.

    +
  • +
+

Examples

+

import pprint
+from sklearn import datasets
+from river import stream
+
+dataset = datasets.load_diabetes()
+
+for xi, yi in stream.iter_sklearn_dataset(dataset):
+    pprint.pprint(xi)
+    print(yi)
+    break
+
+
{'age': 0.038075906433423026,
+ 'bmi': 0.061696206518683294,
+ 'bp': 0.0218723855140367,
+ 's1': -0.04422349842444599,
+ 's2': -0.03482076283769895,
+ 's3': -0.04340084565202491,
+ 's4': -0.002592261998183278,
+ 's5': 0.019907486170462722,
+ 's6': -0.01764612515980379,
+ 'sex': 0.05068011873981862}
+151.0
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-sql/index.html b/0.19.0/api/stream/iter-sql/index.html new file mode 100644 index 0000000000..a60769de79 --- /dev/null +++ b/0.19.0/api/stream/iter-sql/index.html @@ -0,0 +1,3672 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_sql - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_sql

+

Iterates over the results from an SQL query.

+

By default, SQLAlchemy prefetches results. Therefore, even though you can iterate over the resulting rows one by one, the results are in fact loaded in batch. You can modify this behavior by configuring the connection you pass to iter_sql. For instance, you can set the stream_results parameter to True, as explained in SQLAlchemy's documentation. Note, however, that this isn't available for all database engines.

+

Parameters

+
    +
  • +

    query

    +

    Typestr | sqlalchemy.TextClause | sqlalchemy.Select

    +

    SQL query to be executed.

    +
  • +
  • +

    conn

    +

    Typesqlalchemy.Connection

    +

    An SQLAlchemy construct which has an execute method. In other words you can pass an engine, a connection, or a session.

    +
  • +
  • +

    target_name

    +

    Typestr | None

    +

    DefaultNone

    +

    The name of the target field. If this is None, then y will also be None.

    +
  • +
+

Examples

+

As an example we'll create an in-memory database with SQLAlchemy.

+
import datetime as dt
+import sqlalchemy
+
+engine = sqlalchemy.create_engine('sqlite://')
+
+metadata = sqlalchemy.MetaData()
+
+t_sales = sqlalchemy.Table('sales', metadata,
+    sqlalchemy.Column('shop', sqlalchemy.String, primary_key=True),
+    sqlalchemy.Column('date', sqlalchemy.Date, primary_key=True),
+    sqlalchemy.Column('amount', sqlalchemy.Integer)
+)
+
+metadata.create_all(engine)
+
+sales = [
+    {'shop': 'Hema', 'date': dt.date(2016, 8, 2), 'amount': 20},
+    {'shop': 'Ikea', 'date': dt.date(2016, 8, 2), 'amount': 18},
+    {'shop': 'Hema', 'date': dt.date(2016, 8, 3), 'amount': 22},
+    {'shop': 'Ikea', 'date': dt.date(2016, 8, 3), 'amount': 14},
+    {'shop': 'Hema', 'date': dt.date(2016, 8, 4), 'amount': 12},
+    {'shop': 'Ikea', 'date': dt.date(2016, 8, 4), 'amount': 16}
+]
+
+with engine.connect() as conn:
+    _ = conn.execute(t_sales.insert(), sales)
+    conn.commit()
+
+

We can now query the database. We will set amount to be the target field.

+

from river import stream
+
+with engine.connect() as conn:
+    query = sqlalchemy.sql.select(t_sales)
+    dataset = stream.iter_sql(query, conn, target_name='amount')
+    for x, y in dataset:
+        print(x, y)
+
+
{'shop': 'Hema', 'date': datetime.date(2016, 8, 2)} 20
+{'shop': 'Ikea', 'date': datetime.date(2016, 8, 2)} 18
+{'shop': 'Hema', 'date': datetime.date(2016, 8, 3)} 22
+{'shop': 'Ikea', 'date': datetime.date(2016, 8, 3)} 14
+{'shop': 'Hema', 'date': datetime.date(2016, 8, 4)} 12
+{'shop': 'Ikea', 'date': datetime.date(2016, 8, 4)} 16
+

+

This also with raw SQL queries.

+

with engine.connect() as conn:
+    query = "SELECT * FROM sales WHERE shop = 'Hema'"
+    dataset = stream.iter_sql(query, conn, target_name='amount')
+    for x, y in dataset:
+        print(x, y)
+
+
{'shop': 'Hema', 'date': '2016-08-02'} 20
+{'shop': 'Hema', 'date': '2016-08-03'} 22
+{'shop': 'Hema', 'date': '2016-08-04'} 12
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/iter-vaex/index.html b/0.19.0/api/stream/iter-vaex/index.html new file mode 100644 index 0000000000..800b9bbe22 --- /dev/null +++ b/0.19.0/api/stream/iter-vaex/index.html @@ -0,0 +1,3601 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_vaex - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_vaex

+

Yields rows from a vaex.DataFrame.

+

Parameters

+
    +
  • +

    X

    +

    Typevaex.dataframe.DataFrame

    +

    A vaex DataFrame housing the training featuers.

    +
  • +
  • +

    y

    +

    Typestr | vaex.expression.Expression | None

    +

    DefaultNone

    +

    The column or expression containing the target variable.

    +
  • +
  • +

    features

    +

    Typelist[str] | vaex.expression.Expression | None

    +

    DefaultNone

    +

    A list of features used for training. If None, all columns in X will be used. Features specifying in y are ignored.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/shuffle/index.html b/0.19.0/api/stream/shuffle/index.html new file mode 100644 index 0000000000..5ac16fd628 --- /dev/null +++ b/0.19.0/api/stream/shuffle/index.html @@ -0,0 +1,3646 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + shuffle - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

shuffle

+

Shuffles a stream of data.

+

This works by maintaining a buffer of elements. The first buffer_size elements are stored in memory. Once the buffer is full, a random element inside the buffer is yielded. Every time an element is yielded, the next element in the stream replaces it and the buffer is sampled again. Increasing buffer_size will improve the quality of the shuffling.

+

If you really want to stream over your dataset in a "good" random order, the best way is to split your dataset into smaller datasets and loop over them in a round-robin fashion. You may do this by using the roundrobin recipe from the itertools module.

+

Parameters

+
    +
  • +

    stream

    +

    Typetyping.Iterator

    +

    The stream to shuffle.

    +
  • +
  • +

    buffer_size

    +

    Typeint

    +

    The size of the buffer which contains the elements help in memory. Increasing this will increase randomness but will incur more memory usage.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed used for sampling.

    +
  • +
+

Examples

+

from river import stream
+
+for i in stream.shuffle(range(15), buffer_size=5, seed=42):
+    print(i)
+
+
0
+5
+2
+1
+8
+9
+6
+4
+11
+12
+10
+7
+14
+13
+3
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/stream/simulate-qa/index.html b/0.19.0/api/stream/simulate-qa/index.html new file mode 100644 index 0000000000..f323ac1427 --- /dev/null +++ b/0.19.0/api/stream/simulate-qa/index.html @@ -0,0 +1,3671 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + simulate_qa - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

simulate_qa

+

Simulate a time-ordered question and answer session.

+

This method allows looping through a dataset in the order in which it arrived. Indeed, it usually is the case that labels arrive after features. Being able to go through a dataset in arrival order enables assessing a model's performance in a reliable manner. For instance, the evaluate.progressive_val_score is a high-level method that can be used to score a model on a dataset. Under the hood it uses this method to determine the correct arrival order.

+

Parameters

+
    +
  • +

    dataset

    +

    Typebase.typing.Dataset

    +

    A stream of (features, target) tuples.

    +
  • +
  • +

    moment

    +

    Typestr | typing.Callable[[dict], dt.datetime] | None

    +

    The attribute used for measuring time. If a callable is passed, then it is expected to take as input a dict of features. If None, then the observations are implicitly timestamped in the order in which they arrive. If a str is passed, then it will be used to obtain the time from the input features.

    +
  • +
  • +

    delay

    +

    Typestr | int | dt.timedelta | typing.Callable | None

    +

    The amount of time to wait before revealing the target associated with each observation to the model. This value is expected to be able to sum with the moment value. For instance, if moment is a datetime.date, then delay is expected to be a datetime.timedelta. If a callable is passed, then it is expected to take as input a dict of features and the target. If a str is passed, then it will be used to access the relevant field from the features. If None is passed, then no delay will be used, which leads to doing standard online validation. If a scalar is passed, such an int or a datetime.timedelta, then the delay is constant.

    +
  • +
  • +

    copy

    +

    Typebool

    +

    DefaultTrue

    +

    If True, then a separate copy of the features are yielded the second time around. This ensures that inadvertent modifications in downstream code don't have any effect.

    +
  • +
+

Examples

+

The arrival delay isn't usually indicated in a dataset, but it might be able to be inferred +from the features. As an example, we'll simulate the departure and arrival time of taxi +trips. Let's first create a time table which records the departure time and the duration of +seconds of several taxi trips.

+
import datetime as dt
+time_table = [
+    (dt.datetime(2020, 1, 1, 20,  0, 0),  900),
+    (dt.datetime(2020, 1, 1, 20, 10, 0), 1800),
+    (dt.datetime(2020, 1, 1, 20, 20, 0),  300),
+    (dt.datetime(2020, 1, 1, 20, 45, 0),  400),
+    (dt.datetime(2020, 1, 1, 20, 50, 0),  240),
+    (dt.datetime(2020, 1, 1, 20, 55, 0),  450)
+]
+
+

We can now create a streaming dataset where the features are the departure dates and the +targets are the durations.

+
dataset = (
+    ({'date': date}, duration)
+    for date, duration in time_table
+)
+
+

Now, we can use simulate_qa to iterate over the events in the order in which they are +meant to occur.

+

delay = lambda _, y: dt.timedelta(seconds=y)
+
+for i, x, y in simulate_qa(dataset, moment='date', delay=delay):
+    if y is None:
+        print(f'{x["date"]} - trip #{i} departs')
+    else:
+        arrival_date = x['date'] + dt.timedelta(seconds=y)
+        print(f'{arrival_date} - trip #{i} arrives after {y} seconds')
+
+
2020-01-01 20:00:00 - trip #0 departs
+2020-01-01 20:10:00 - trip #1 departs
+2020-01-01 20:15:00 - trip #0 arrives after 900 seconds
+2020-01-01 20:20:00 - trip #2 departs
+2020-01-01 20:25:00 - trip #2 arrives after 300 seconds
+2020-01-01 20:40:00 - trip #1 arrives after 1800 seconds
+2020-01-01 20:45:00 - trip #3 departs
+2020-01-01 20:50:00 - trip #4 departs
+2020-01-01 20:51:40 - trip #3 arrives after 400 seconds
+2020-01-01 20:54:00 - trip #4 arrives after 240 seconds
+2020-01-01 20:55:00 - trip #5 departs
+2020-01-01 21:02:30 - trip #5 arrives after 450 seconds
+

+

This function is extremely practical because it provides a reliable way to evaluate the +performance of a model in a real scenario. Indeed, it allows to make predictions and +perform model updates in exactly the same manner that would happen live. For instance, it +is used in evaluate.progressive_val_score, which is a higher level function for +evaluating models in an online manner.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/ForecastingMetric/index.html b/0.19.0/api/time-series/ForecastingMetric/index.html new file mode 100644 index 0000000000..9395b59ce7 --- /dev/null +++ b/0.19.0/api/time-series/ForecastingMetric/index.html @@ -0,0 +1,3520 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ForecastingMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ForecastingMetric

+

Methods

+
+get +

Return the current performance along the horizon.

+

Returns

+

list[float]: The current performance.

+
+

+
+update +

Update the metric at each step along the horizon.

+

Parameters

+
    +
  • y_true'list[Number]'
  • +
  • y_pred'list[Number]'
  • +
+

Returns

+

ForecastingMetric: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/HoltWinters/index.html b/0.19.0/api/time-series/HoltWinters/index.html new file mode 100644 index 0000000000..946a2c24f5 --- /dev/null +++ b/0.19.0/api/time-series/HoltWinters/index.html @@ -0,0 +1,3636 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HoltWinters - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HoltWinters

+

Holt-Winters forecaster.

+

This is a standard implementation of the Holt-Winters forecasting method. Certain parametrisations result in special cases, such as simple exponential smoothing.

+

Optimal parameters and initialisation values can be determined in a batch setting. However, in an online setting, it is necessary to wait and observe enough values. The first k = max(2, seasonality) values are indeed used to initialize the components.

+

Level initialization

+
\[l = \frac{1}{k} \sum_{i=1}{k} y_i\]
+

Trend initialization

+
\[t = \frac{1}{k - 1} \sum_{i=2}{k} y_i - y_{i-1}\]
+

Trend initialization

+
\[s_i = \frac{y_i}{k}\]
+

Parameters

+
    +
  • +

    alpha

    +

    Smoothing parameter for the level.

    +
  • +
  • +

    beta

    +

    DefaultNone

    +

    Smoothing parameter for the trend.

    +
  • +
  • +

    gamma

    +

    DefaultNone

    +

    Smoothing parameter for the seasonality.

    +
  • +
  • +

    seasonality

    +

    Default0

    +

    The number of periods in a season. For instance, this should be 4 for quarterly data, and 12 for yearly data.

    +
  • +
  • +

    multiplicative

    +

    DefaultFalse

    +

    Whether or not to use a multiplicative formulation.

    +
  • +
+

Examples

+

from river import datasets
+from river import metrics
+from river import time_series
+
+dataset = datasets.AirlinePassengers()
+
+model = time_series.HoltWinters(
+    alpha=0.3,
+    beta=0.1,
+    gamma=0.6,
+    seasonality=12,
+    multiplicative=True
+)
+
+metric = metrics.MAE()
+
+time_series.evaluate(
+    dataset,
+    model,
+    metric,
+    horizon=12
+)
+
+
+1  MAE: 25.899087
++2  MAE: 26.26131
++3  MAE: 25.735903
++4  MAE: 25.625678
++5  MAE: 26.093842
++6  MAE: 26.90249
++7  MAE: 28.634398
++8  MAE: 29.284769
++9  MAE: 31.018351
++10 MAE: 32.252349
++11 MAE: 33.518946
++12 MAE: 33.975057
+

+

Methods

+
+forecast +

Makes forecast at each step of the given horizon.

+

Parameters

+
    +
  • horizon'int'
  • +
  • xs'list[dict] | None' — defaults to None
  • +
+
+

+
+learn_one +

Updates the model.

+

Parameters

+
    +
  • y'float'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/HorizonAggMetric/index.html b/0.19.0/api/time-series/HorizonAggMetric/index.html new file mode 100644 index 0000000000..2deae34232 --- /dev/null +++ b/0.19.0/api/time-series/HorizonAggMetric/index.html @@ -0,0 +1,3582 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HorizonAggMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HorizonAggMetric

+

Same as HorizonMetric, but aggregates the result based on an provided function.

+

This allows, for instance, to measure the average performance of a forecasting model along the horizon.

+

Parameters

+
    +
  • +

    metric

    +

    Typemetrics.base.RegressionMetric

    +

    A regression metric.

    +
  • +
  • +

    agg_func

    +

    Typetyping.Callable[[list[float]], float]

    +

    A function that takes as input a list of floats and outputs a single float. You may want to min, max, as well as statistics.mean and statistics.median.

    +
  • +
+

Examples

+

This is used internally by the time_series.evaluate function when you pass an agg_func.

+

import statistics
+from river import datasets
+from river import metrics
+from river import time_series
+
+metric = time_series.evaluate(
+    dataset=datasets.AirlinePassengers(),
+    model=time_series.HoltWinters(alpha=0.1),
+    metric=metrics.MAE(),
+    agg_func=statistics.mean,
+    horizon=4
+)
+
+metric
+
+
mean(MAE): 42.901748
+

+

Methods

+
+get +

Return the current performance along the horizon.

+

Returns

+

list[float]: The current performance.

+
+

+
+update +

Update the metric at each step along the horizon.

+

Parameters

+
    +
  • y_true'list[Number]'
  • +
  • y_pred'list[Number]'
  • +
+

Returns

+

ForecastingMetric: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/HorizonMetric/index.html b/0.19.0/api/time-series/HorizonMetric/index.html new file mode 100644 index 0000000000..40666d539f --- /dev/null +++ b/0.19.0/api/time-series/HorizonMetric/index.html @@ -0,0 +1,3578 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HorizonMetric - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HorizonMetric

+

Measures performance at each time step ahead.

+

This allows to measure the performance of a model at each time step along the horizon. A copy of the provided regression metric is made for each time step. At each time step ahead, the metric is thus evaluated on each prediction for said time step, and not for the time steps before or after that.

+

Parameters

+ +

Examples

+

This is used internally by the time_series.evaluate function.

+

from river import datasets
+from river import metrics
+from river import time_series
+
+metric = time_series.evaluate(
+    dataset=datasets.AirlinePassengers(),
+    model=time_series.HoltWinters(alpha=0.1),
+    metric=metrics.MAE(),
+    horizon=4
+)
+
+metric
+
+
+1 MAE: 40.931286
++2 MAE: 42.667998
++3 MAE: 44.158092
++4 MAE: 43.849617
+

+

Methods

+
+get +

Return the current performance along the horizon.

+

Returns

+

list[float]: The current performance.

+
+

+
+update +

Update the metric at each step along the horizon.

+

Parameters

+
    +
  • y_true'list[Number]'
  • +
  • y_pred'list[Number]'
  • +
+

Returns

+

ForecastingMetric: self

+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/SNARIMAX/index.html b/0.19.0/api/time-series/SNARIMAX/index.html new file mode 100644 index 0000000000..b715183066 --- /dev/null +++ b/0.19.0/api/time-series/SNARIMAX/index.html @@ -0,0 +1,3767 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SNARIMAX - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SNARIMAX

+

SNARIMAX model.

+

SNARIMAX stands for (S)easonal (N)on-linear (A)uto(R)egressive (I)ntegrated (M)oving-(A)verage with e(X)ogenous inputs model.

+

This model generalizes many established time series models in a single interface that can be trained online. It assumes that the provided training data is ordered in time and is uniformly spaced. It is made up of the following components:

+
    +
  • +

    S (Seasonal)

    +
  • +
  • +

    N (Non-linear): Any online regression model can be used, not necessarily a linear regression

    +

    as is done in textbooks. - AR (Autoregressive): Lags of the target variable are used as features.

    +
  • +
  • +

    I (Integrated): The model can be fitted on a differenced version of a time series. In this

    +

    context, integration is the reverse of differencing. - MA (Moving average): Lags of the errors are used as features.

    +
  • +
  • +

    X (Exogenous): Users can provide additional features. Care has to be taken to include

    +

    features that will be available both at training and prediction time.

    +
  • +
+

Each of these components can be switched on and off by specifying the appropriate parameters. Classical time series models such as AR, MA, ARMA, and ARIMA can thus be seen as special parametrizations of the SNARIMAX model.

+

This model is tailored for time series that are homoskedastic. In other words, it might not work well if the variance of the time series varies widely along time.

+

Parameters

+
    +
  • +

    p

    +

    Typeint

    +

    Order of the autoregressive part. This is the number of past target values that will be included as features.

    +
  • +
  • +

    d

    +

    Typeint

    +

    Differencing order.

    +
  • +
  • +

    q

    +

    Typeint

    +

    Order of the moving average part. This is the number of past error terms that will be included as features.

    +
  • +
  • +

    m

    +

    Typeint

    +

    Default1

    +

    Season length used for extracting seasonal features. If you believe your data has a seasonal pattern, then set this accordingly. For instance, if the data seems to exhibit a yearly seasonality, and that your data is spaced by month, then you should set this to 12. Note that for this parameter to have any impact you should also set at least one of the p, d, and q parameters.

    +
  • +
  • +

    sp

    +

    Typeint

    +

    Default0

    +

    Seasonal order of the autoregressive part. This is the number of past target values that will be included as features.

    +
  • +
  • +

    sd

    +

    Typeint

    +

    Default0

    +

    Seasonal differencing order.

    +
  • +
  • +

    sq

    +

    Typeint

    +

    Default0

    +

    Seasonal order of the moving average part. This is the number of past error terms that will be included as features.

    +
  • +
  • +

    regressor

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    The online regression model to use. By default, a preprocessing.StandardScaler piped with a linear_model.LinearRegression will be used.

    +
  • +
+

Attributes

+
    +
  • +

    differencer (Differencer)

    +
  • +
  • +

    y_trues (collections.deque)

    +

    The p past target values.

    +
  • +
  • +

    errors (collections.deque)

    +

    The q past error values.

    +
  • +
+

Examples

+

import datetime as dt
+from river import datasets
+from river import time_series
+from river import utils
+
+period = 12
+model = time_series.SNARIMAX(
+    p=period,
+    d=1,
+    q=period,
+    m=period,
+    sd=1
+)
+
+for t, (x, y) in enumerate(datasets.AirlinePassengers()):
+    model = model.learn_one(y)
+
+horizon = 12
+future = [
+    {'month': dt.date(year=1961, month=m, day=1)}
+    for m in range(1, horizon + 1)
+]
+forecast = model.forecast(horizon=horizon)
+for x, y_pred in zip(future, forecast):
+    print(x['month'], f'{y_pred:.3f}')
+
+
1961-01-01 494.542
+1961-02-01 450.825
+1961-03-01 484.972
+1961-04-01 576.401
+1961-05-01 559.489
+1961-06-01 612.251
+1961-07-01 722.410
+1961-08-01 674.604
+1961-09-01 575.716
+1961-10-01 562.808
+1961-11-01 477.049
+1961-12-01 515.191
+

+

Classic ARIMA models learn solely on the time series values. You can also include features +built at each step.

+

import calendar
+import math
+from river import compose
+from river import linear_model
+from river import optim
+from river import preprocessing
+
+def get_month_distances(x):
+    return {
+        calendar.month_name[month]: math.exp(-(x['month'].month - month) ** 2)
+        for month in range(1, 13)
+    }
+
+def get_ordinal_date(x):
+    return {'ordinal_date': x['month'].toordinal()}
+
+extract_features = compose.TransformerUnion(
+    get_ordinal_date,
+    get_month_distances
+)
+
+model = (
+    extract_features |
+    time_series.SNARIMAX(
+        p=1,
+        d=0,
+        q=0,
+        m=12,
+        sp=3,
+        sq=6,
+        regressor=(
+            preprocessing.StandardScaler() |
+            linear_model.LinearRegression(
+                intercept_init=110,
+                optimizer=optim.SGD(0.01),
+                intercept_lr=0.3
+            )
+        )
+    )
+)
+
+for x, y in datasets.AirlinePassengers():
+    model = model.learn_one(x, y)
+
+forecast = model.forecast(horizon=horizon)
+for x, y_pred in zip(future, forecast):
+    print(x['month'], f'{y_pred:.3f}')
+
+
1961-01-01 444.821
+1961-02-01 432.612
+1961-03-01 457.739
+1961-04-01 465.544
+1961-05-01 476.575
+1961-06-01 516.255
+1961-07-01 565.405
+1961-08-01 572.470
+1961-09-01 512.645
+1961-10-01 475.919
+1961-11-01 438.033
+1961-12-01 456.892
+

+

Methods

+
+forecast +

Makes forecast at each step of the given horizon.

+

Parameters

+
    +
  • horizon'int'
  • +
  • xs'list[dict] | None' — defaults to None
  • +
+
+

+
+learn_one +

Updates the model.

+

Parameters

+
    +
  • y'float'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/base/Forecaster/index.html b/0.19.0/api/time-series/base/Forecaster/index.html new file mode 100644 index 0000000000..37b01939cb --- /dev/null +++ b/0.19.0/api/time-series/base/Forecaster/index.html @@ -0,0 +1,3549 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Forecaster - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Forecaster

+

Methods

+
+forecast +

Makes forecast at each step of the given horizon.

+

Parameters

+
    +
  • horizon'int'
  • +
  • xs'list[dict] | None' — defaults to None
  • +
+
+

+
+learn_one +

Updates the model.

+

Parameters

+
    +
  • y'float'
  • +
  • x'dict | None' — defaults to None
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/evaluate/index.html b/0.19.0/api/time-series/evaluate/index.html new file mode 100644 index 0000000000..0e9409e84c --- /dev/null +++ b/0.19.0/api/time-series/evaluate/index.html @@ -0,0 +1,3536 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + evaluate - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

evaluate

+

Evaluates the performance of a forecaster on a time series dataset.

+

To understand why this method is useful, it's important to understand the difference between nowcasting and forecasting. Nowcasting is about predicting a value at the next time step. This can be seen as a special case of regression, where the value to predict is the value at the next time step. In this case, the evaluate.progressive_val_score function may be used to evaluate a model via progressive validation.

+

Forecasting models can also be evaluated via progressive validation. This is the purpose of this function. At each time step t, the forecaster is asked to predict the values at t + 1, t + 2, ..., t + horizon. The performance at each time step is measured and returned.

+

Parameters

+
    +
  • +

    dataset

    +

    Typebase.typing.Dataset

    +

    A sequential time series.

    +
  • +
  • +

    model

    +

    Typetime_series.base.Forecaster

    +

    A forecaster.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.RegressionMetric

    +

    A regression metric.

    +
  • +
  • +

    horizon

    +

    Typeint

    +
  • +
  • +

    agg_func

    +

    Typetyping.Callable[[list[float]], float] | None

    +

    DefaultNone

    +
  • +
  • +

    grace_period

    +

    Typeint | None

    +

    DefaultNone

    +

    Initial period during which the metric is not updated. This is to fairly evaluate models which need a warming up period to start producing meaningful forecasts. The value of this parameter is equal to the horizon by default.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/time-series/iter-evaluate/index.html b/0.19.0/api/time-series/iter-evaluate/index.html new file mode 100644 index 0000000000..95a025d0df --- /dev/null +++ b/0.19.0/api/time-series/iter-evaluate/index.html @@ -0,0 +1,3535 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iter_evaluate - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iter_evaluate

+

Evaluates the performance of a forecaster on a time series dataset and yields results.

+

This does exactly the same as evaluate.progressive_val_score. The only difference is that this function returns an iterator, yielding results at every step. This can be useful if you want to have control over what you do with the results. For instance, you might want to plot the results.

+

Parameters

+
    +
  • +

    dataset

    +

    Typebase.typing.Dataset

    +

    A sequential time series.

    +
  • +
  • +

    model

    +

    Typetime_series.base.Forecaster

    +

    A forecaster.

    +
  • +
  • +

    metric

    +

    Typemetrics.base.RegressionMetric

    +

    A regression metric.

    +
  • +
  • +

    horizon

    +

    Typeint

    +
  • +
  • +

    agg_func

    +

    Typetyping.Callable[[list[float]], float] | None

    +

    DefaultNone

    +
  • +
  • +

    grace_period

    +

    Typeint | None

    +

    DefaultNone

    +

    Initial period during which the metric is not updated. This is to fairly evaluate models which need a warming up period to start producing meaningful forecasts. The value of this parameter is equal to the horizon by default.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/ExtremelyFastDecisionTreeClassifier/index.html b/0.19.0/api/tree/ExtremelyFastDecisionTreeClassifier/index.html new file mode 100644 index 0000000000..6fda03b393 --- /dev/null +++ b/0.19.0/api/tree/ExtremelyFastDecisionTreeClassifier/index.html @@ -0,0 +1,3892 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ExtremelyFastDecisionTreeClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ExtremelyFastDecisionTreeClassifier

+

Extremely Fast Decision Tree classifier.

+

Also referred to as Hoeffding AnyTime Tree (HATT) classifier.

+

Parameters

+
    +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    min_samples_reevaluate

    +

    Typeint

    +

    Default20

    +

    Number of instances a node should observe before reevaluating the best split.

    +
  • +
  • +

    split_criterion

    +

    Typestr

    +

    Defaultinfo_gain

    +

    Split criterion to use.
    - 'gini' - Gini
    - 'info_gain' - Information Gain
    - 'hellinger' - Helinger Distance

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Significance level to calculate the Hoeffding bound. The significance level is given by 1 - delta. Values closer to zero imply longer split decision delays.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultnba

    +

    Prediction mechanism used at leafs.
    - 'mc' - Majority Class
    - 'nb' - Naive Bayes
    - 'nba' - Naive Bayes Adaptive

    +
  • +
  • +

    nb_threshold

    +

    Typeint

    +

    Default0

    +

    Number of instances a leaf should observe before allowing Naive Bayes.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List of Nominal attributes identifiers. If empty, then assume that all numeric attributes should be treated as continuous.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.GaussianSplitter is used if splitter is None.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    If True, only allow binary splits.

    +
  • +
  • +

    min_branch_fraction

    +

    Typefloat

    +

    Default0.01

    +

    The minimum percentage of observed data required for branches resulting from split candidates. To validate a split candidate, at least two resulting branches must have a percentage of samples greater than min_branch_fraction. This criterion prevents unnecessary splits when the majority of instances are concentrated in a single branch.

    +
  • +
  • +

    max_share_to_split

    +

    Typefloat

    +

    Default0.99

    +

    Only perform a split in a leaf if the proportion of elements in the majority class is smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default100.0

    +

    The max size of the tree, in Megabytes (MB).

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default1000000

    +

    Interval (number of processed instances) between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    If True, enable merit-based tree pre-pruning.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    leaf_prediction

    +

    Return the prediction strategy used by the tree at its leaves.

    +
  • +
  • +

    max_size

    +

    Max allowed size tree can reach (in MB).

    +
  • +
  • +

    n_active_leaves

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_inactive_leaves

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    split_criterion

    +

    Return a string with the name of the split criterion being used by the tree.

    +
  • +
  • +

    summary

    +

    Collect metrics corresponding to the current status of the tree in a string buffer.

    +
  • +
+

Examples

+

from river.datasets import synth
+from river import evaluate
+from river import metrics
+from river import tree
+
+gen = synth.Agrawal(classification_function=0, seed=42)
+dataset = iter(gen.take(1000))
+
+model = tree.ExtremelyFastDecisionTreeClassifier(
+    grace_period=100,
+    delta=1e-5,
+    nominal_attributes=['elevel', 'car', 'zipcode'],
+    min_samples_reevaluate=100
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 87.29%
+

+

Methods

+
+debug_one +

Print an explanation of how x is predicted.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

str | None: A representation of the path followed by the tree to predict x; None if

+
+

+
+draw +

Draw the tree using the graphviz library.

+

Since the tree is drawn without passing incoming samples, classification trees will show the majority class in their leaves, whereas regression trees will use the target mean.

+

Parameters

+
    +
  • max_depth'int | None' — defaults to None
    + The maximum depth a tree can reach. If None, the tree will grow indefinitely.
  • +
+
+

+
+learn_one +

Incrementally train the model

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+to_dataframe +

Return a representation of the current tree structure organized in a pandas.DataFrame object.

+

In case the tree is empty or it only contains a single node (a leaf), None is returned.

+

Returns

+

df

+
+

+

Notes

+

The Extremely Fast Decision Tree (EFDT) 1 constructs a tree incrementally. The EFDT seeks to +select and deploy a split as soon as it is confident the split is useful, and then revisits +that decision, replacing the split if it subsequently becomes evident that a better split is +available. The EFDT learns rapidly from a stationary distribution and eventually it learns the +asymptotic batch tree if the distribution from which the data are drawn is stationary.

+
+
+
    +
  1. +

    C. Manapragada, G. Webb, and M. Salehi. Extremely Fast Decision Tree. +In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data +Mining (KDD '18). ACM, New York, NY, USA, 1953-1962. +DOI: https://doi.org/10.1145/3219819.3220005 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/HoeffdingAdaptiveTreeClassifier/index.html b/0.19.0/api/tree/HoeffdingAdaptiveTreeClassifier/index.html new file mode 100644 index 0000000000..5dc830d7b2 --- /dev/null +++ b/0.19.0/api/tree/HoeffdingAdaptiveTreeClassifier/index.html @@ -0,0 +1,3925 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HoeffdingAdaptiveTreeClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HoeffdingAdaptiveTreeClassifier

+

Hoeffding Adaptive Tree classifier.

+

Parameters

+
    +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    split_criterion

    +

    Typestr

    +

    Defaultinfo_gain

    +

    Split criterion to use.
    - 'gini' - Gini
    - 'info_gain' - Information Gain
    - 'hellinger' - Helinger Distance

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Significance level to calculate the Hoeffding bound. The significance level is given by 1 - delta. Values closer to zero imply longer split decision delays.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultnba

    +

    Prediction mechanism used at leafs.
    - 'mc' - Majority Class
    - 'nb' - Naive Bayes
    - 'nba' - Naive Bayes Adaptive

    +
  • +
  • +

    nb_threshold

    +

    Typeint

    +

    Default0

    +

    Number of instances a leaf should observe before allowing Naive Bayes.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List of Nominal attributes. If empty, then assume that all numeric attributes should be treated as continuous.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.GaussianSplitter is used if splitter is None.

    +
  • +
  • +

    bootstrap_sampling

    +

    Typebool

    +

    DefaultTrue

    +

    If True, perform bootstrap sampling in the leaf nodes.

    +
  • +
  • +

    drift_window_threshold

    +

    Typeint

    +

    Default300

    +

    Minimum number of examples an alternate tree must observe before being considered as a potential replacement to the current one.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    The drift detector used to build the tree. If None then drift.ADWIN is used.

    +
  • +
  • +

    switch_significance

    +

    Typefloat

    +

    Default0.05

    +

    The significance level to assess whether alternate subtrees are significantly better than their main subtree counterparts.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    If True, only allow binary splits.

    +
  • +
  • +

    min_branch_fraction

    +

    Typefloat

    +

    Default0.01

    +

    The minimum percentage of observed data required for branches resulting from split candidates. To validate a split candidate, at least two resulting branches must have a percentage of samples greater than min_branch_fraction. This criterion prevents unnecessary splits when the majority of instances are concentrated in a single branch.

    +
  • +
  • +

    max_share_to_split

    +

    Typefloat

    +

    Default0.99

    +

    Only perform a split in a leaf if the proportion of elements in the majority class is smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default100.0

    +

    The max size of the tree, in Megabytes (MB).

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default1000000

    +

    Interval (number of processed instances) between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    If True, enable merit-based tree pre-pruning.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    leaf_prediction

    +

    Return the prediction strategy used by the tree at its leaves.

    +
  • +
  • +

    max_size

    +

    Max allowed size tree can reach (in MB).

    +
  • +
  • +

    n_active_leaves

    +
  • +
  • +

    n_alternate_trees

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_inactive_leaves

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    n_pruned_alternate_trees

    +
  • +
  • +

    n_switch_alternate_trees

    +
  • +
  • +

    split_criterion

    +

    Return a string with the name of the split criterion being used by the tree.

    +
  • +
  • +

    summary

    +

    Collect metrics corresponding to the current status of the tree in a string buffer.

    +
  • +
+

Examples

+

from river.datasets import synth
+from river import evaluate
+from river import metrics
+from river import tree
+
+gen = synth.ConceptDriftStream(stream=synth.SEA(seed=42, variant=0),
+                               drift_stream=synth.SEA(seed=42, variant=1),
+                               seed=1, position=500, width=50)
+dataset = iter(gen.take(1000))
+
+model = tree.HoeffdingAdaptiveTreeClassifier(
+    grace_period=100,
+    delta=1e-5,
+    leaf_prediction='nb',
+    nb_threshold=10,
+    seed=0
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 91.49%
+

+

Methods

+
+debug_one +

Print an explanation of how x is predicted.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

str | None: A representation of the path followed by the tree to predict x; None if

+
+

+
+draw +

Draw the tree using the graphviz library.

+

Since the tree is drawn without passing incoming samples, classification trees will show the majority class in their leaves, whereas regression trees will use the target mean.

+

Parameters

+
    +
  • max_depth'int | None' — defaults to None
    + The maximum depth a tree can reach. If None, the tree will grow indefinitely.
  • +
+
+

+
+learn_one +

Train the model on instance x and corresponding target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+to_dataframe +

Return a representation of the current tree structure organized in a pandas.DataFrame object.

+

In case the tree is empty or it only contains a single node (a leaf), None is returned.

+

Returns

+

df

+
+

+

Notes

+

The Hoeffding Adaptive Tree 1 uses a drift detector to monitor performance of branches in +the tree and to replace them with new branches when their accuracy decreases.

+

The bootstrap sampling strategy is an improvement over the original Hoeffding Adaptive Tree +algorithm. It is enabled by default since, in general, it results in better performance.

+
+
+
    +
  1. +

    Bifet, Albert, and Ricard Gavaldà. "Adaptive learning from evolving data streams." + In International Symposium on Intelligent Data Analysis, pp. 249-260. Springer, Berlin, + Heidelberg, 2009. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/HoeffdingAdaptiveTreeRegressor/index.html b/0.19.0/api/tree/HoeffdingAdaptiveTreeRegressor/index.html new file mode 100644 index 0000000000..90c3840503 --- /dev/null +++ b/0.19.0/api/tree/HoeffdingAdaptiveTreeRegressor/index.html @@ -0,0 +1,3913 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HoeffdingAdaptiveTreeRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HoeffdingAdaptiveTreeRegressor

+

Hoeffding Adaptive Tree regressor (HATR).

+

This class implements a regression version of the Hoeffding Adaptive Tree Classifier. Hence, it also uses an ADWIN concept-drift detector instance at each decision node to monitor possible changes in the data distribution. If a drift is detected in a node, an alternate tree begins to be induced in the background. When enough information is gathered, HATR swaps the node where the change was detected by its alternate tree.

+

Parameters

+
    +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Significance level to calculate the Hoeffding bound. The significance level is given by 1 - delta. Values closer to zero imply longer split decision delays.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultadaptive

    +

    Prediction mechanism used at leafs.
    - 'mean' - Target mean
    - 'model' - Uses the model defined in leaf_model
    - 'adaptive' - Chooses between 'mean' and 'model' dynamically

    +
  • +
  • +

    leaf_model

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    The regression model used to provide responses if leaf_prediction='model'. If not provided an instance of linear_model.LinearRegression with the default hyperparameters is used.

    +
  • +
  • +

    model_selector_decay

    +

    Typefloat

    +

    Default0.95

    +

    The exponential decaying factor applied to the learning models' squared errors, that are monitored if leaf_prediction='adaptive'. Must be between 0 and 1. The closer to 1, the more importance is going to be given to past observations. On the other hand, if its value approaches 0, the recent observed errors are going to have more influence on the final decision.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List of Nominal attributes. If empty, then assume that all numeric attributes should be treated as continuous.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.TEBSTSplitter is used if splitter is None.

    +
  • +
  • +

    min_samples_split

    +

    Typeint

    +

    Default5

    +

    The minimum number of samples every branch resulting from a split candidate must have to be considered valid.

    +
  • +
  • +

    bootstrap_sampling

    +

    Typebool

    +

    DefaultTrue

    +

    If True, perform bootstrap sampling in the leaf nodes.

    +
  • +
  • +

    drift_window_threshold

    +

    Typeint

    +

    Default300

    +

    Minimum number of examples an alternate tree must observe before being considered as a potential replacement to the current one.

    +
  • +
  • +

    drift_detector

    +

    Typebase.DriftDetector | None

    +

    DefaultNone

    +

    The drift detector used to build the tree. If None then drift.ADWIN is used. Only detectors that support arbitrarily valued continuous data can be used for regression.

    +
  • +
  • +

    switch_significance

    +

    Typefloat

    +

    Default0.05

    +

    The significance level to assess whether alternate subtrees are significantly better than their main subtree counterparts.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    If True, only allow binary splits.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default500.0

    +

    The max size of the tree, in Megabytes (MB).

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default1000000

    +

    Interval (number of processed instances) between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    If True, enable merit-based tree pre-pruning.

    +
  • +
  • +

    seed

    +

    Typeint | None

    +

    DefaultNone

    +

    Random seed for reproducibility.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    leaf_prediction

    +

    Return the prediction strategy used by the tree at its leaves.

    +
  • +
  • +

    max_size

    +

    Max allowed size tree can reach (in MB).

    +
  • +
  • +

    n_active_leaves

    +
  • +
  • +

    n_alternate_trees

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_inactive_leaves

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    n_pruned_alternate_trees

    +
  • +
  • +

    n_switch_alternate_trees

    +
  • +
  • +

    split_criterion

    +

    Return a string with the name of the split criterion being used by the tree.

    +
  • +
  • +

    summary

    +

    Collect metrics corresponding to the current status of the tree in a string buffer.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import metrics
+from river import tree
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+
+model = (
+    preprocessing.StandardScaler() |
+    tree.HoeffdingAdaptiveTreeRegressor(
+        grace_period=50,
+        model_selector_decay=0.3,
+        seed=0
+    )
+)
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.823026
+

+

Methods

+
+debug_one +

Print an explanation of how x is predicted.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

str | None: A representation of the path followed by the tree to predict x; None if

+
+

+
+draw +

Draw the tree using the graphviz library.

+

Since the tree is drawn without passing incoming samples, classification trees will show the majority class in their leaves, whereas regression trees will use the target mean.

+

Parameters

+
    +
  • max_depth'int | None' — defaults to None
    + The maximum depth a tree can reach. If None, the tree will grow indefinitely.
  • +
+
+

+
+learn_one +

Train the tree model on sample x and corresponding target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the target value using one of the leaf prediction strategies.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

Predicted target value.

+
+

+
+to_dataframe +

Return a representation of the current tree structure organized in a pandas.DataFrame object.

+

In case the tree is empty or it only contains a single node (a leaf), None is returned.

+

Returns

+

df

+
+

+

Notes

+

The Hoeffding Adaptive Tree 1 uses drift detectors to monitor performance of branches +in the tree and to replace them with new branches when their accuracy decreases.

+

The bootstrap sampling strategy is an improvement over the original Hoeffding Adaptive Tree +algorithm. It is enabled by default since, in general, it results in better performance.

+

To cope with ADWIN's requirements of bounded input data, HATR uses a novel error normalization +strategy based on the empiral rule of Gaussian distributions. We assume the deviations +of the predictions from the expected values follow a normal distribution. Hence, we subject +these errors to a min-max normalization assuming that most of the data lies in the +\(\left[-3\sigma, 3\sigma\right]\) range. These normalized errors are passed to the ADWIN +instances. This is the same strategy used by Adaptive Random Forest Regressor.

+
+
+
    +
  1. +

    Bifet, Albert, and Ricard Gavaldà. "Adaptive learning from evolving data streams." +In International Symposium on Intelligent Data Analysis, pp. 249-260. Springer, Berlin, +Heidelberg, 2009. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/HoeffdingTreeClassifier/index.html b/0.19.0/api/tree/HoeffdingTreeClassifier/index.html new file mode 100644 index 0000000000..6ac7aa272c --- /dev/null +++ b/0.19.0/api/tree/HoeffdingTreeClassifier/index.html @@ -0,0 +1,3892 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HoeffdingTreeClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HoeffdingTreeClassifier

+

Hoeffding Tree or Very Fast Decision Tree classifier.

+

Parameters

+
    +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    split_criterion

    +

    Typestr

    +

    Defaultinfo_gain

    +

    Split criterion to use.
    - 'gini' - Gini
    - 'info_gain' - Information Gain
    - 'hellinger' - Helinger Distance

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Significance level to calculate the Hoeffding bound. The significance level is given by 1 - delta. Values closer to zero imply longer split decision delays.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultnba

    +

    Prediction mechanism used at leafs.
    - 'mc' - Majority Class
    - 'nb' - Naive Bayes
    - 'nba' - Naive Bayes Adaptive

    +
  • +
  • +

    nb_threshold

    +

    Typeint

    +

    Default0

    +

    Number of instances a leaf should observe before allowing Naive Bayes.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List of Nominal attributes identifiers. If empty, then assume that all numeric attributes should be treated as continuous.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.GaussianSplitter is used if splitter is None.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    If True, only allow binary splits.

    +
  • +
  • +

    min_branch_fraction

    +

    Typefloat

    +

    Default0.01

    +

    The minimum percentage of observed data required for branches resulting from split candidates. To validate a split candidate, at least two resulting branches must have a percentage of samples greater than min_branch_fraction. This criterion prevents unnecessary splits when the majority of instances are concentrated in a single branch.

    +
  • +
  • +

    max_share_to_split

    +

    Typefloat

    +

    Default0.99

    +

    Only perform a split in a leaf if the proportion of elements in the majority class is smaller than this parameter value. This parameter avoids performing splits when most of the data belongs to a single class.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default100.0

    +

    The max size of the tree, in Megabytes (MB).

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default1000000

    +

    Interval (number of processed instances) between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    If True, enable merit-based tree pre-pruning.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    leaf_prediction

    +

    Return the prediction strategy used by the tree at its leaves.

    +
  • +
  • +

    max_size

    +

    Max allowed size tree can reach (in MB).

    +
  • +
  • +

    n_active_leaves

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_inactive_leaves

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    split_criterion

    +

    Return a string with the name of the split criterion being used by the tree.

    +
  • +
  • +

    summary

    +

    Collect metrics corresponding to the current status of the tree in a string buffer.

    +
  • +
+

Examples

+

from river.datasets import synth
+from river import evaluate
+from river import metrics
+from river import tree
+
+gen = synth.Agrawal(classification_function=0, seed=42)
+dataset = iter(gen.take(1000))
+
+model = tree.HoeffdingTreeClassifier(
+    grace_period=100,
+    delta=1e-5,
+    nominal_attributes=['elevel', 'car', 'zipcode']
+)
+
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 84.58%
+

+

Methods

+
+debug_one +

Print an explanation of how x is predicted.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

str | None: A representation of the path followed by the tree to predict x; None if

+
+

+
+draw +

Draw the tree using the graphviz library.

+

Since the tree is drawn without passing incoming samples, classification trees will show the majority class in their leaves, whereas regression trees will use the target mean.

+

Parameters

+
    +
  • max_depth'int | None' — defaults to None
    + The maximum depth a tree can reach. If None, the tree will grow indefinitely.
  • +
+
+

+
+learn_one +

Train the model on instance x and corresponding target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

A dictionary that associates a probability which each label.

+
+

+
+to_dataframe +

Return a representation of the current tree structure organized in a pandas.DataFrame object.

+

In case the tree is empty or it only contains a single node (a leaf), None is returned.

+

Returns

+

df

+
+

+

Notes

+

A Hoeffding Tree 1 is an incremental, anytime decision tree induction algorithm that is +capable of learning from massive data streams, assuming that the distribution generating +examples does not change over time. Hoeffding trees exploit the fact that a small sample can +often be enough to choose an optimal splitting attribute. This idea is supported mathematically +by the Hoeffding bound, which quantifies the number of observations (in our case, examples) +needed to estimate some statistics within a prescribed precision (in our case, the goodness of +an attribute).

+

A theoretically appealing feature of Hoeffding Trees not shared by other incremental decision +tree learners is that it has sound guarantees of performance. Using the Hoeffding bound one +can show that its output is asymptotically nearly identical to that of a non-incremental +learner using infinitely many examples. Implementation based on MOA 2.

+
+
+
    +
  1. +

    G. Hulten, L. Spencer, and P. Domingos. Mining time-changing data streams. + In KDD’01, pages 97–106, San Francisco, CA, 2001. ACM Press. 

    +
  2. +
  3. +

    Albert Bifet, Geoff Holmes, Richard Kirkby, Bernhard Pfahringer. + MOA: Massive Online Analysis; Journal of Machine Learning Research 11: 1601-1604, 2010. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/HoeffdingTreeRegressor/index.html b/0.19.0/api/tree/HoeffdingTreeRegressor/index.html new file mode 100644 index 0000000000..4ad68609ac --- /dev/null +++ b/0.19.0/api/tree/HoeffdingTreeRegressor/index.html @@ -0,0 +1,3859 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HoeffdingTreeRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HoeffdingTreeRegressor

+

Hoeffding Tree regressor.

+

Parameters

+
    +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Significance level to calculate the Hoeffding bound. The significance level is given by 1 - delta. Values closer to zero imply longer split decision delays.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultadaptive

    +

    Prediction mechanism used at leafs.
    - 'mean' - Target mean
    - 'model' - Uses the model defined in leaf_model
    - 'adaptive' - Chooses between 'mean' and 'model' dynamically

    +
  • +
  • +

    leaf_model

    +

    Typebase.Regressor | None

    +

    DefaultNone

    +

    The regression model used to provide responses if leaf_prediction='model'. If not provided an instance of linear_model.LinearRegression with the default hyperparameters is used.

    +
  • +
  • +

    model_selector_decay

    +

    Typefloat

    +

    Default0.95

    +

    The exponential decaying factor applied to the learning models' squared errors, that are monitored if leaf_prediction='adaptive'. Must be between 0 and 1. The closer to 1, the more importance is going to be given to past observations. On the other hand, if its value approaches 0, the recent observed errors are going to have more influence on the final decision.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List of Nominal attributes identifiers. If empty, then assume that all numeric attributes should be treated as continuous.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.TEBSTSplitter is used if splitter is None.

    +
  • +
  • +

    min_samples_split

    +

    Typeint

    +

    Default5

    +

    The minimum number of samples every branch resulting from a split candidate must have to be considered valid.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    If True, only allow binary splits.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default500.0

    +

    The max size of the tree, in Megabytes (MB).

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default1000000

    +

    Interval (number of processed instances) between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    If True, enable merit-based tree pre-pruning.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    leaf_prediction

    +

    Return the prediction strategy used by the tree at its leaves.

    +
  • +
  • +

    max_size

    +

    Max allowed size tree can reach (in MB).

    +
  • +
  • +

    n_active_leaves

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_inactive_leaves

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    split_criterion

    +

    Return a string with the name of the split criterion being used by the tree.

    +
  • +
  • +

    summary

    +

    Collect metrics corresponding to the current status of the tree in a string buffer.

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import metrics
+from river import tree
+from river import preprocessing
+
+dataset = datasets.TrumpApproval()
+
+model = (
+    preprocessing.StandardScaler() |
+    tree.HoeffdingTreeRegressor(
+        grace_period=100,
+        model_selector_decay=0.9
+    )
+)
+
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 0.793345
+

+

Methods

+
+debug_one +

Print an explanation of how x is predicted.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

str | None: A representation of the path followed by the tree to predict x; None if

+
+

+
+draw +

Draw the tree using the graphviz library.

+

Since the tree is drawn without passing incoming samples, classification trees will show the majority class in their leaves, whereas regression trees will use the target mean.

+

Parameters

+
    +
  • max_depth'int | None' — defaults to None
    + The maximum depth a tree can reach. If None, the tree will grow indefinitely.
  • +
+
+

+
+learn_one +

Train the tree model on sample x and corresponding target y.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • sample_weight — defaults to 1.0
  • +
+

Returns

+

self

+
+

+
+predict_one +

Predict the target value using one of the leaf prediction strategies.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

Predicted target value.

+
+

+
+to_dataframe +

Return a representation of the current tree structure organized in a pandas.DataFrame object.

+

In case the tree is empty or it only contains a single node (a leaf), None is returned.

+

Returns

+

df

+
+

+

Notes

+

The Hoeffding Tree Regressor (HTR) is an adaptation of the incremental tree algorithm of the +same name for classification. Similarly to its classification counterpart, HTR uses the +Hoeffding bound to control its split decisions. Differently from the classification algorithm, +HTR relies on calculating the reduction of variance in the target space to decide among the +split candidates. The smallest the variance at its leaf nodes, the more homogeneous the +partitions are. At its leaf nodes, HTR fits either linear models or uses the target +average as the predictor.

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/SGTClassifier/index.html b/0.19.0/api/tree/SGTClassifier/index.html new file mode 100644 index 0000000000..723bcda28f --- /dev/null +++ b/0.19.0/api/tree/SGTClassifier/index.html @@ -0,0 +1,3764 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SGTClassifier - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SGTClassifier

+

Stochastic Gradient Tree1 for binary classification.

+

Binary decision tree classifier that minimizes the binary cross-entropy to guide its growth.

+

Stochastic Gradient Trees (SGT) directly minimize a loss function to guide tree growth and update their predictions. Thus, they differ from other incrementally tree learners that do not directly optimize the loss, but data impurity-related heuristics.

+

Parameters

+
    +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Define the significance level of the F-tests performed to decide upon creating splits or updating predictions.

    +
  • +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Interval between split attempts or prediction updates.

    +
  • +
  • +

    init_pred

    +

    Typefloat

    +

    Default0.0

    +

    Initial value predicted by the tree.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth the tree might reach. If set to None, the trees will grow indefinitely.

    +
  • +
  • +

    lambda_value

    +

    Typefloat

    +

    Default0.1

    +

    Positive float value used to impose a penalty over the tree's predictions and force them to become smaller. The greater the lambda value, the more constrained are the predictions.

    +
  • +
  • +

    gamma

    +

    Typefloat

    +

    Default1.0

    +

    Positive float value used to impose a penalty over the tree's splits and force them to be avoided when possible. The greater the gamma value, the smaller the chance of a split occurring.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List with identifiers of the nominal attributes. If None, all features containing numbers are assumed to be numeric.

    +
  • +
  • +

    feature_quantizer

    +

    Typetree.splitter.Quantizer | None

    +

    DefaultNone

    +

    The algorithm used to quantize numeric features. Either a static quantizer (as in the original implementation) or a dynamic quantizer can be used. The correct choice and setup of the feature quantizer is a crucial step to determine the performance of SGTs. Feature quantizers are akin to the attribute observers used in Hoeffding Trees. By default, an instance of tree.splitter.StaticQuantizer (with default parameters) is used if this parameter is not set.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_node_updates

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    n_observations

    +
  • +
  • +

    n_splits

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import metrics
+from river import tree
+
+dataset = datasets.Phishing()
+model = tree.SGTClassifier(
+    feature_quantizer=tree.splitter.StaticQuantizer(
+        n_bins=32, warm_start=10
+    )
+)
+metric = metrics.Accuracy()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
Accuracy: 82.24%
+

+

Methods

+
+learn_one +

Update the model with a set of features x and a label y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.ClfTarget'
  • +
  • w — defaults to 1.0
  • +
+

Returns

+

Classifier: self

+
+

+
+predict_one +

Predict the label of a set of features x.

+

Parameters

+
    +
  • x'dict'
  • +
  • kwargs
  • +
+

Returns

+

base.typing.ClfTarget | None: The predicted label.

+
+

+
+predict_proba_one +

Predict the probability of each label for a dictionary of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

dict[base.typing.ClfTarget, float]: A dictionary that associates a probability which each label.

+
+

+
+
+
    +
  1. +

    Gouk, H., Pfahringer, B., & Frank, E. (2019, October). Stochastic Gradient Trees. +In Asian Conference on Machine Learning (pp. 1094-1109). 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/SGTRegressor/index.html b/0.19.0/api/tree/SGTRegressor/index.html new file mode 100644 index 0000000000..cdf9094304 --- /dev/null +++ b/0.19.0/api/tree/SGTRegressor/index.html @@ -0,0 +1,3779 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SGTRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SGTRegressor

+

Stochastic Gradient Tree for regression.

+

Incremental decision tree regressor that minimizes the mean square error to guide its growth.

+

Stochastic Gradient Trees (SGT) directly minimize a loss function to guide tree growth and update their predictions. Thus, they differ from other incrementally tree learners that do not directly optimize the loss, but a data impurity-related heuristic.

+

Parameters

+
    +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Define the significance level of the F-tests performed to decide upon creating splits or updating predictions.

    +
  • +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Interval between split attempts or prediction updates.

    +
  • +
  • +

    init_pred

    +

    Typefloat

    +

    Default0.0

    +

    Initial value predicted by the tree.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth the tree might reach. If set to None, the trees will grow indefinitely.

    +
  • +
  • +

    lambda_value

    +

    Typefloat

    +

    Default0.1

    +

    Positive float value used to impose a penalty over the tree's predictions and force them to become smaller. The greater the lambda value, the more constrained are the predictions.

    +
  • +
  • +

    gamma

    +

    Typefloat

    +

    Default1.0

    +

    Positive float value used to impose a penalty over the tree's splits and force them to be avoided when possible. The greater the gamma value, the smaller the chance of a split occurring.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List with identifiers of the nominal attributes. If None, all features containing numbers are assumed to be numeric.

    +
  • +
  • +

    feature_quantizer

    +

    Typetree.splitter.Quantizer | None

    +

    DefaultNone

    +

    The algorithm used to quantize numeric features. Either a static quantizer (as in the original implementation) or a dynamic quantizer can be used. The correct choice and setup of the feature quantizer is a crucial step to determine the performance of SGTs. Feature quantizers are akin to the attribute observers used in Hoeffding Trees. By default, an instance of tree.splitter.StaticQuantizer (with default parameters) is used if this parameter is not set.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_node_updates

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    n_observations

    +
  • +
  • +

    n_splits

    +
  • +
+

Examples

+

from river import datasets
+from river import evaluate
+from river import metrics
+from river import tree
+
+dataset = datasets.TrumpApproval()
+model = tree.SGTRegressor(
+    delta=0.01,
+    lambda_value=0.01,
+    grace_period=20,
+    feature_quantizer=tree.splitter.DynamicQuantizer(std_prop=0.1)
+)
+metric = metrics.MAE()
+
+evaluate.progressive_val_score(dataset, model, metric)
+
+
MAE: 1.721818
+

+

Methods

+
+learn_one +

Fits to a set of features x and a real-valued target y.

+

Parameters

+
    +
  • x'dict'
  • +
  • y'base.typing.RegTarget'
  • +
  • w — defaults to 1.0
  • +
+

Returns

+

Regressor: self

+
+

+
+predict_one +

Predict the output of features x.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

base.typing.RegTarget: The prediction.

+
+

+

Notes

+

This implementation enhances the original proposal 1 by using an incremental strategy to +discretize numerical features dynamically, rather than relying on a calibration set and +parameterized number of bins. The strategy used is an adaptation of the Quantization Observer +(QO) 2. Different bin size setting policies are available for selection. +They directly related to number of split candidates the tree is going to explore, and thus, +how accurate its split decisions are going to be. Besides, the number of stored bins per +feature is directly related to the tree's memory usage and runtime.

+
+
+
    +
  1. +

    Gouk, H., Pfahringer, B., & Frank, E. (2019, October). Stochastic Gradient Trees. +In Asian Conference on Machine Learning (pp. 1094-1109). 

    +
  2. +
  3. +

    Mastelini, S.M. and de Leon Ferreira, A.C.P., 2021. Using dynamical quantization +to perform split attempts in online tree regressors. Pattern Recognition Letters. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/base/Branch/index.html b/0.19.0/api/tree/base/Branch/index.html new file mode 100644 index 0000000000..f914cd5233 --- /dev/null +++ b/0.19.0/api/tree/base/Branch/index.html @@ -0,0 +1,3753 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Branch - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Branch

+

A generic tree branch.

+

Parameters

+
    +
  • +

    children

    +

    Child branches and/or leaves.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +

    Distance to the deepest descendant.

    +
  • +
  • +

    n_branches

    +

    Number of branches, including thyself.

    +
  • +
  • +

    n_leaves

    +

    Number of leaves.

    +
  • +
  • +

    n_nodes

    +

    Number of descendants, including thyself.

    +
  • +
  • +

    repr_split

    +

    String representation of the split.

    +
  • +
+

Methods

+
+iter_bfs +

Iterate over nodes in breadth-first order.

+
+

+
+iter_branches +

Iterate over branches in depth-first order.

+
+

+
+iter_dfs +

Iterate over nodes in depth-first order.

+
+

+
+iter_edges +

Iterate over edges in depth-first order.

+
+

+
+iter_leaves +

Iterate over leaves from the left-most one to the right-most one.

+
+

+
+most_common_path +

Return a tuple with the branch index and the child node related to the most traversed path.

+

Used in case the split feature is missing from an instance.

+
+

+
+next +

Move to the next node down the tree.

+

Parameters

+
    +
  • x
  • +
+
+

+
+to_dataframe +

Build a DataFrame containing one record for each node.

+
+

+
+traverse +

Return the leaf corresponding to the given input.

+

Parameters

+
    +
  • x
  • +
  • until_leaf — defaults to True
  • +
+
+

+
+walk +

Iterate over the nodes of the path induced by x.

+

Parameters

+
    +
  • x
  • +
  • until_leaf — defaults to True
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/base/Leaf/index.html b/0.19.0/api/tree/base/Leaf/index.html new file mode 100644 index 0000000000..18b3a62f05 --- /dev/null +++ b/0.19.0/api/tree/base/Leaf/index.html @@ -0,0 +1,3695 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Leaf - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Leaf

+

A generic tree node.

+

Parameters

+
    +
  • +

    kwargs

    +

    Each provided keyword argument is stored in the leaf as an attribute.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
+

Methods

+
+iter_branches +
+
+iter_dfs +
+
+iter_edges +
+
+iter_leaves +
+
+walk +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/iSOUPTreeRegressor/index.html b/0.19.0/api/tree/iSOUPTreeRegressor/index.html new file mode 100644 index 0000000000..9cb50c8e84 --- /dev/null +++ b/0.19.0/api/tree/iSOUPTreeRegressor/index.html @@ -0,0 +1,3855 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iSOUPTreeRegressor - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

iSOUPTreeRegressor

+

Incremental Structured Output Prediction Tree (iSOUP-Tree) for multi-target regression.

+

This is an implementation of the iSOUP-Tree proposed by A. Osojnik, P. Panov, and S. Džeroski 1.

+

Parameters

+
    +
  • +

    grace_period

    +

    Typeint

    +

    Default200

    +

    Number of instances a leaf should observe between split attempts.

    +
  • +
  • +

    max_depth

    +

    Typeint | None

    +

    DefaultNone

    +

    The maximum depth a tree can reach. If None, the tree will grow indefinitely.

    +
  • +
  • +

    delta

    +

    Typefloat

    +

    Default1e-07

    +

    Allowed error in split decision, a value closer to 0 takes longer to decide.

    +
  • +
  • +

    tau

    +

    Typefloat

    +

    Default0.05

    +

    Threshold below which a split will be forced to break ties.

    +
  • +
  • +

    leaf_prediction

    +

    Typestr

    +

    Defaultadaptive

    +

    Prediction mechanism used at leafs.
    - 'mean' - Target mean
    - 'model' - Uses the model defined in leaf_model
    - 'adaptive' - Chooses between 'mean' and 'model' dynamically

    +
  • +
  • +

    leaf_model

    +

    Typebase.Regressor | dict | None

    +

    DefaultNone

    +

    The regression model(s) used to provide responses if leaf_prediction='model'. It can be either a regressor (in which case it is going to be replicated to all the targets) or a dictionary whose keys are target identifiers, and the values are instances of base.Regressor.If not provided, instances of [linear_model.LinearRegression`](../../linear-model/LinearRegression) with the default hyperparameters are used for all the targets. If a dictionary is passed and not all target models are specified, copies from the first model match in the dictionary will be used to the remaining targets.

    +
  • +
  • +

    model_selector_decay

    +

    Typefloat

    +

    Default0.95

    +

    The exponential decaying factor applied to the learning models' squared errors, that are monitored if leaf_prediction='adaptive'. Must be between 0 and 1. The closer to 1, the more importance is going to be given to past observations. On the other hand, if its value approaches 0, the recent observed errors are going to have more influence on the final decision.

    +
  • +
  • +

    nominal_attributes

    +

    Typelist | None

    +

    DefaultNone

    +

    List of Nominal attributes identifiers. If empty, then assume that all numeric attributes should be treated as continuous.

    +
  • +
  • +

    splitter

    +

    TypeSplitter | None

    +

    DefaultNone

    +

    The Splitter or Attribute Observer (AO) used to monitor the class statistics of numeric features and perform splits. Splitters are available in the tree.splitter module. Different splitters are available for classification and regression tasks. Classification and regression splitters can be distinguished by their property is_target_class. This is an advanced option. Special care must be taken when choosing different splitters. By default, tree.splitter.TEBSTSplitter is used if splitter is None.

    +
  • +
  • +

    min_samples_split

    +

    Typeint

    +

    Default5

    +

    The minimum number of samples every branch resulting from a split candidate must have to be considered valid.

    +
  • +
  • +

    binary_split

    +

    Typebool

    +

    DefaultFalse

    +

    If True, only allow binary splits.

    +
  • +
  • +

    max_size

    +

    Typefloat

    +

    Default500.0

    +

    The max size of the tree, in Megabytes (MB).

    +
  • +
  • +

    memory_estimate_period

    +

    Typeint

    +

    Default1000000

    +

    Interval (number of processed instances) between memory consumption checks.

    +
  • +
  • +

    stop_mem_management

    +

    Typebool

    +

    DefaultFalse

    +

    If True, stop growing as soon as memory limit is hit.

    +
  • +
  • +

    remove_poor_attrs

    +

    Typebool

    +

    DefaultFalse

    +

    If True, disable poor attributes to reduce memory usage.

    +
  • +
  • +

    merit_preprune

    +

    Typebool

    +

    DefaultTrue

    +

    If True, enable merit-based tree pre-pruning.

    +
  • +
+

Attributes

+
    +
  • +

    height

    +
  • +
  • +

    leaf_prediction

    +

    Return the prediction strategy used by the tree at its leaves.

    +
  • +
  • +

    max_size

    +

    Max allowed size tree can reach (in MB).

    +
  • +
  • +

    n_active_leaves

    +
  • +
  • +

    n_branches

    +
  • +
  • +

    n_inactive_leaves

    +
  • +
  • +

    n_leaves

    +
  • +
  • +

    n_nodes

    +
  • +
  • +

    split_criterion

    +

    Return a string with the name of the split criterion being used by the tree.

    +
  • +
  • +

    summary

    +

    Collect metrics corresponding to the current status of the tree in a string buffer.

    +
  • +
+

Examples

+

import numbers
+from river import compose
+from river import datasets
+from river import evaluate
+from river import linear_model
+from river import metrics
+from river import preprocessing
+from river import tree
+
+dataset = datasets.SolarFlare()
+
+num = compose.SelectType(numbers.Number) | preprocessing.MinMaxScaler()
+cat = compose.SelectType(str) | preprocessing.OneHotEncoder()
+
+model = tree.iSOUPTreeRegressor(
+    grace_period=100,
+    leaf_prediction='model',
+    leaf_model={
+        'c-class-flares': linear_model.LinearRegression(l2=0.02),
+        'm-class-flares': linear_model.PARegressor(),
+        'x-class-flares': linear_model.LinearRegression(l2=0.1)
+    }
+)
+
+pipeline = (num + cat) | model
+metric = metrics.multioutput.MicroAverage(metrics.MAE())
+
+evaluate.progressive_val_score(dataset, pipeline, metric)
+
+
MicroAverage(MAE): 0.426177
+

+

Methods

+
+debug_one +

Print an explanation of how x is predicted.

+

Parameters

+
    +
  • x'dict'
  • +
+

Returns

+

str | None: A representation of the path followed by the tree to predict x; None if

+
+

+
+draw +

Draw the tree using the graphviz library.

+

Since the tree is drawn without passing incoming samples, classification trees will show the majority class in their leaves, whereas regression trees will use the target mean.

+

Parameters

+
    +
  • max_depth'int | None' — defaults to None
    + The maximum depth a tree can reach. If None, the tree will grow indefinitely.
  • +
+
+

+
+learn_one +

Incrementally train the model with one sample.

+

Training tasks: * If the tree is empty, create a leaf node as the root. * If the tree is already initialized, find the corresponding leaf for the instance and update the leaf node statistics. * If growth is allowed and the number of instances that the leaf has observed between split attempts exceed the grace period then attempt to split.

+

Parameters

+
    +
  • x
  • +
  • y
  • +
  • sample_weight'float' — defaults to 1.0
  • +
+
+

+
+predict_one +

Predict the target value using one of the leaf prediction strategies.

+

Parameters

+
    +
  • x
  • +
+

Returns

+

Predicted target value.

+
+

+
+to_dataframe +

Return a representation of the current tree structure organized in a pandas.DataFrame object.

+

In case the tree is empty or it only contains a single node (a leaf), None is returned.

+

Returns

+

df

+
+

+
+
+
    +
  1. +

    Aljaž Osojnik, Panče Panov, and Sašo Džeroski. "Tree-based methods for online +multi-target regression." Journal of Intelligent Information Systems 50.2 (2018): 315-339. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/DynamicQuantizer/index.html b/0.19.0/api/tree/splitter/DynamicQuantizer/index.html new file mode 100644 index 0000000000..bba35cd301 --- /dev/null +++ b/0.19.0/api/tree/splitter/DynamicQuantizer/index.html @@ -0,0 +1,3832 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DynamicQuantizer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

DynamicQuantizer

+

Adapted version of the Quantizer Observer (QO)1 that is applied to Stochastic Gradient Trees (SGT).

+

This feature quantizer starts by partitioning the inputs using the passed radius value. As more splits are created in the SGTs, new feature quantizers will use std * std_prop as the quantization radius. In the expression, std represents the standard deviation of the input data, which is calculated incrementally.

+

Parameters

+
    +
  • +

    radius

    +

    Typefloat

    +

    Default0.5

    +

    The initial quantization radius.

    +
  • +
  • +

    std_prop

    +

    Typefloat

    +

    Default0.25

    +

    The proportion of the standard deviation that is going to be used to define the radius value for new quantizer instances following the initial one.

    +
  • +
+

Methods

+
+update +
+
+
+
    +
  1. +

    Mastelini, S.M. and de Leon Ferreira, A.C.P., 2021. Using dynamical quantization +to perform split attempts in online tree regressors. Pattern Recognition Letters. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/EBSTSplitter/index.html b/0.19.0/api/tree/splitter/EBSTSplitter/index.html new file mode 100644 index 0000000000..99f892d3cd --- /dev/null +++ b/0.19.0/api/tree/splitter/EBSTSplitter/index.html @@ -0,0 +1,3880 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + EBSTSplitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

EBSTSplitter

+

iSOUP-Tree's Extended Binary Search Tree (E-BST).

+

This class implements the Extended Binary Search Tree1 (E-BST) structure, using the variant employed by Osojnik et al.2 in the iSOUP-Tree algorithm. This structure is employed to observe the target space distribution.

+

Proposed along with Fast Incremental Model Tree with Drift Detection1 (FIMT-DD), E-BST was the first attribute observer (AO) proposed for incremental Hoeffding Tree regressors. This AO works by storing all observations between splits in an extended binary search tree structure. E-BST stores the input feature realizations and statistics of the target(s) that enable calculating the split heuristic at any time. To alleviate time and memory costs, E-BST implements a memory management routine, where the worst split candidates are pruned from the binary tree.

+

In this variant, only the left branch statistics are stored and the complete split-enabling statistics are calculated with an in-order traversal of the binary search tree.

+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool' — defaults to True
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

Not implemented in regression splitters.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+
+

+
+remove_bad_splits +

Remove bad splits.

+

Based on FIMT-DD's 1 procedure to remove bad split candidates from the E-BST. This mechanism is triggered every time a split attempt fails. The rationale is to remove points whose split merit is much worse than the best candidate overall (for which the growth decision already failed). Let \(m_1\) be the merit of the best split point and \(m_2\) be the merit of the second best split candidate. The ratio \(r = m_2/m_1\) along with the Hoeffding bound (\(\epsilon\)) are used to decide upon creating a split. A split occurs when \(r < 1 - \epsilon\). A split candidate, with merit \(m_i\), is considered badr if \(m_i / m_1 < r - 2\epsilon\). The rationale is the following: if the merit ratio for this point is smaller than the lower bound of \(r\), then the true merit of that split relative to the best one is small. Hence, this candidate can be safely removed. To avoid excessive and costly manipulations of the E-BST to update the stored statistics, only the nodes whose children are all bad split points are pruned, as defined in 1.

+

Parameters

+
    +
  • criterion
  • +
  • last_check_ratio'float'
  • +
  • last_check_vr'float'
  • +
  • last_check_e'float'
  • +
  • pre_split_dist'list | dict'
  • +
+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+
+
+
    +
  1. +

    Ikonomovska, E., Gama, J., & Džeroski, S. (2011). Learning model trees from evolving +data streams. Data mining and knowledge discovery, 23(1), 128-168. 

    +
  2. +
  3. +

    Osojnik, Aljaž. 2017. Structured output prediction on Data Streams +(Doctoral Dissertation) 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/ExhaustiveSplitter/index.html b/0.19.0/api/tree/splitter/ExhaustiveSplitter/index.html new file mode 100644 index 0000000000..70b487c870 --- /dev/null +++ b/0.19.0/api/tree/splitter/ExhaustiveSplitter/index.html @@ -0,0 +1,3867 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ExhaustiveSplitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

ExhaustiveSplitter

+

Numeric attribute observer for classification tasks that is based on a Binary Search Tree.

+

This algorithm1 is also referred to as exhaustive attribute observer, since it ends up storing all the observations between split attempts2.

+

This splitter cannot perform probability density estimations, so it does not work well when coupled with tree leaves using naive bayes models.

+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool'
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

The underlying data structure used to monitor the input does not allow probability density estimations. Hence, it always returns zero for any given input.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+
+
+
    +
  1. +

    Domingos, P. and Hulten, G., 2000, August. Mining high-speed data streams. +In Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery +and data mining (pp. 71-80). 

    +
  2. +
  3. +

    Pfahringer, B., Holmes, G. and Kirkby, R., 2008, May. Handling numeric attributes in +hoeffding trees. In Pacific-Asia Conference on Knowledge Discovery and Data Mining +(pp. 296-307). Springer, Berlin, Heidelberg. 

    +
  4. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/GaussianSplitter/index.html b/0.19.0/api/tree/splitter/GaussianSplitter/index.html new file mode 100644 index 0000000000..b2796b4912 --- /dev/null +++ b/0.19.0/api/tree/splitter/GaussianSplitter/index.html @@ -0,0 +1,3876 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GaussianSplitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

GaussianSplitter

+

Numeric attribute observer for classification tasks that is based on Gaussian estimators.

+

The distribution of each class is approximated using a Gaussian distribution. Hence, the probability density function can be easily calculated.

+

Parameters

+
    +
  • +

    n_splits

    +

    Typeint

    +

    Default10

    +

    The number of partitions to consider when querying for split candidates.

    +
  • +
+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool'
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

Get the probability for an attribute value given a class.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+

Returns

+

float: Probability for an attribute value given a class.

+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/HistogramSplitter/index.html b/0.19.0/api/tree/splitter/HistogramSplitter/index.html new file mode 100644 index 0000000000..897e2f13a0 --- /dev/null +++ b/0.19.0/api/tree/splitter/HistogramSplitter/index.html @@ -0,0 +1,3881 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + HistogramSplitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

HistogramSplitter

+

Numeric attribute observer for classification tasks that discretizes features using histograms.

+

Parameters

+
    +
  • +

    n_bins

    +

    Typeint

    +

    Default256

    +

    The maximum number of bins in the histogram.

    +
  • +
  • +

    n_splits

    +

    Typeint

    +

    Default32

    +

    The number of split points to evaluate when querying for the best split candidate.

    +
  • +
+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool'
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

Get the probability for an attribute value given a class.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+

Returns

+

float: Probability for an attribute value given a class.

+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/QOSplitter/index.html b/0.19.0/api/tree/splitter/QOSplitter/index.html new file mode 100644 index 0000000000..36d41657c6 --- /dev/null +++ b/0.19.0/api/tree/splitter/QOSplitter/index.html @@ -0,0 +1,3892 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + QOSplitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

QOSplitter

+

Quantization observer (QO).

+

This splitter utilizes a hash-based quantization algorithm to keep track of the target statistics and evaluate split candidates. QO, relies on the radius parameter to define discretization intervals for each incoming feature. Split candidates are defined as the midpoints between two consecutive hash slots. Both binary splits and multi-way splits can be created by this attribute observer. This class implements the algorithm described in 1.

+

The smaller the quantization radius, the more hash slots will be created to accommodate the discretized data. Hence, both the running time and memory consumption increase, but the resulting splits ought to be closer to the ones obtained by a batch exhaustive approach. On the other hand, if the radius is too large, fewer slots will be created, less memory and running time will be required, but at the cost of coarse split suggestions.

+

QO assumes that all features have the same range. It is always advised to scale the features to apply this splitter. That can be done using the preprocessing module. A good "rule of thumb" is to scale data using preprocessing.StandardScaler and define the radius as a proportion of the features' standard deviation. For instance, the default radius value would correspond to one quarter of the normalized features' standard deviation (since the scaled data has zero mean and unit variance). If the features come from normal distributions, by following the empirical rule, roughly 32 hash slots will be created.

+

Parameters

+
    +
  • +

    radius

    +

    Typefloat

    +

    Default0.25

    +

    The quantization radius. QO discretizes the incoming feature in intervals of equal length that are defined by this parameter.

    +
  • +
  • +

    allow_multiway_splits

    +

    DefaultFalse

    +

    Whether or not allow that multiway splits are evaluated. Numeric multi-way splits use the same quantization strategy of QO to create multiple tree branches. The same quantization radius is used, and each stored slot represents the split enabling statistics of one branch.

    +
  • +
+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool' — defaults to True
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

Get the probability for an attribute value given a class.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+

Returns

+

float: Probability for an attribute value given a class.

+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+
+
+
    +
  1. +

    Mastelini, S.M. and de Leon Ferreira, A.C.P., 2021. Using dynamical quantization to +perform split attempts in online tree regressors. Pattern Recognition Letters. 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/Quantizer/index.html b/0.19.0/api/tree/splitter/Quantizer/index.html new file mode 100644 index 0000000000..18399bbd8a --- /dev/null +++ b/0.19.0/api/tree/splitter/Quantizer/index.html @@ -0,0 +1,3802 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Quantizer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Quantizer

+

Base class for the feature quantizers used in Stochastic Gradient Trees1.

+

Methods

+
+update +
+
+
+
    +
  1. +

    Gouk, H., Pfahringer, B., & Frank, E. (2019, October). Stochastic Gradient Trees. +In Asian Conference on Machine Learning (pp. 1094-1109). 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/Splitter/index.html b/0.19.0/api/tree/splitter/Splitter/index.html new file mode 100644 index 0000000000..2a89f2ff0a --- /dev/null +++ b/0.19.0/api/tree/splitter/Splitter/index.html @@ -0,0 +1,3854 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Splitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Splitter

+

Base class for the tree splitters.

+

Each Attribute Observer (AO) or Splitter monitors one input feature and finds the best split point for this attribute. AOs can also perform other tasks related to the monitored feature, such as estimating its probability density function (classification case).

+

This class should not be instantiated, as none of its methods are implemented.

+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool'
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

Get the probability for an attribute value given a class.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+

Returns

+

float: Probability for an attribute value given a class.

+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/StaticQuantizer/index.html b/0.19.0/api/tree/splitter/StaticQuantizer/index.html new file mode 100644 index 0000000000..72aec538f8 --- /dev/null +++ b/0.19.0/api/tree/splitter/StaticQuantizer/index.html @@ -0,0 +1,3838 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + StaticQuantizer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

StaticQuantizer

+

Quantization strategy originally used in Stochastic Gradient Trees (SGT)1.

+

Firstly, a buffer of size warm_start is stored. The data stored in the buffer is then used to quantize the input feature into n_bins intervals. These intervals will be replicated to every new quantizer. Feature values lying outside of the limits defined by the initial buffer will be mapped to the head or tail of the list of intervals.

+

Parameters

+
    +
  • +

    n_bins

    +

    Typeint

    +

    Default64

    +

    The number of bins (intervals) to divide the input feature.

    +
  • +
  • +

    warm_start

    +

    Typeint

    +

    Default100

    +

    The number of observations used to initialize the quantization intervals.

    +
  • +
  • +

    buckets

    +

    Typelist | None

    +

    DefaultNone

    +

    This parameter is only used internally by the quantizer, so it must not be set. Once the intervals are defined, new instances of this quantizer will receive the quantization information via this parameter.

    +
  • +
+

Methods

+
+update +
+
+
+
    +
  1. +

    Gouk, H., Pfahringer, B., & Frank, E. (2019, October). Stochastic Gradient Trees. +In Asian Conference on Machine Learning (pp. 1094-1109). 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/tree/splitter/TEBSTSplitter/index.html b/0.19.0/api/tree/splitter/TEBSTSplitter/index.html new file mode 100644 index 0000000000..782154ea1b --- /dev/null +++ b/0.19.0/api/tree/splitter/TEBSTSplitter/index.html @@ -0,0 +1,3888 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TEBSTSplitter - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TEBSTSplitter

+

Truncated E-BST.

+

Variation of E-BST that rounds the incoming feature values before passing them to the binary search tree (BST). By doing so, the attribute observer might reduce its processing time and memory usage since small variations in the input values will end up being mapped to the same BST node.

+

Parameters

+
    +
  • +

    digits

    +

    Typeint

    +

    Default1

    +

    The number of decimal places used to round the input feature values.

    +
  • +
+

Attributes

+
    +
  • +

    is_numeric

    +

    Determine whether or not the splitter works with numerical features.

    +
  • +
  • +

    is_target_class

    +

    Check on which kind of learning task the splitter is designed to work. If True, the splitter works with classification trees, otherwise it is designed for regression trees.

    +
  • +
+

Methods

+
+best_evaluated_split_suggestion +

Get the best split suggestion given a criterion and the target's statistics.

+

Parameters

+
    +
  • criterion'SplitCriterion'
  • +
  • pre_split_dist'list | dict'
  • +
  • att_idx'base.typing.FeatureName'
  • +
  • binary_only'bool' — defaults to True
  • +
+

Returns

+

BranchFactory: Suggestion of the best attribute split.

+
+

+
+cond_proba +

Not implemented in regression splitters.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.ClfTarget'
  • +
+
+

+
+remove_bad_splits +

Remove bad splits.

+

Based on FIMT-DD's [^1] procedure to remove bad split candidates from the E-BST. This mechanism is triggered every time a split attempt fails. The rationale is to remove points whose split merit is much worse than the best candidate overall (for which the growth decision already failed). Let \(m_1\) be the merit of the best split point and \(m_2\) be the merit of the second best split candidate. The ratio \(r = m_2/m_1\) along with the Hoeffding bound (\(\epsilon\)) are used to decide upon creating a split. A split occurs when \(r < 1 - \epsilon\). A split candidate, with merit \(m_i\), is considered badr if \(m_i / m_1 < r - 2\epsilon\). The rationale is the following: if the merit ratio for this point is smaller than the lower bound of \(r\), then the true merit of that split relative to the best one is small. Hence, this candidate can be safely removed. To avoid excessive and costly manipulations of the E-BST to update the stored statistics, only the nodes whose children are all bad split points are pruned, as defined in [^1].

+

Parameters

+
    +
  • criterion
  • +
  • last_check_ratio'float'
  • +
  • last_check_vr'float'
  • +
  • last_check_e'float'
  • +
  • pre_split_dist'list | dict'
  • +
+
+

+
+update +

Update statistics of this observer given an attribute value, its target value and the weight of the instance observed.

+

Parameters

+
    +
  • att_val
  • +
  • target_val'base.typing.Target'
  • +
  • sample_weight'float'
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/Rolling/index.html b/0.19.0/api/utils/Rolling/index.html new file mode 100644 index 0000000000..57c1bcb578 --- /dev/null +++ b/0.19.0/api/utils/Rolling/index.html @@ -0,0 +1,3756 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Rolling - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

Rolling

+

A generic wrapper for performing rolling computations.

+

This can be wrapped around any object which implements both an update and a revert method. Inputs to update are stored in a queue. Elements of the queue are popped when the window is full.

+

Parameters

+
    +
  • +

    obj

    +

    TypeRollable

    +

    An object that implements both an update method and a rollingmethod.

    +
  • +
  • +

    window_size

    +

    Typeint

    +

    Size of the window.

    +
  • +
+

Attributes

+
    +
  • window_size
  • +
+

Examples

+

For instance, here is how you can compute a rolling average over a window of size 3:

+

from river import stats, utils
+
+X = [1, 3, 5, 7]
+rmean = utils.Rolling(stats.Mean(), window_size=3)
+
+for x in X:
+    print(rmean.update(x).get())
+
+
1.0
+2.0
+3.0
+5.0
+

+

Methods

+
+update +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/SortedWindow/index.html b/0.19.0/api/utils/SortedWindow/index.html new file mode 100644 index 0000000000..ba7a3b9717 --- /dev/null +++ b/0.19.0/api/utils/SortedWindow/index.html @@ -0,0 +1,3758 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SortedWindow - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

SortedWindow

+

Sorted running window data structure.

+

Parameters

+
    +
  • +

    size

    +

    Typeint

    +

    Size of the window to compute the rolling quantile.

    +
  • +
+

Attributes

+
    +
  • size
  • +
+

Examples

+

from river import utils
+
+window = utils.SortedWindow(size=3)
+
+for i in reversed(range(9)):
+    print(window.append(i))
+
+
[8]
+[7, 8]
+[6, 7, 8]
+[5, 6, 7]
+[4, 5, 6]
+[3, 4, 5]
+[2, 3, 4]
+[1, 2, 3]
+[0, 1, 2]
+

+

Methods

+ + + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/TimeRolling/index.html b/0.19.0/api/utils/TimeRolling/index.html new file mode 100644 index 0000000000..92bf4100f6 --- /dev/null +++ b/0.19.0/api/utils/TimeRolling/index.html @@ -0,0 +1,3743 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + TimeRolling - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

TimeRolling

+

A generic wrapper for performing time rolling computations.

+

This can be wrapped around any object which implements both an update and a revert method. Inputs to update are stored in a queue. Elements of the queue are popped when they are too old.

+

Parameters

+
    +
  • +

    obj

    +

    TypeRollable

    +

    An object that implements both an update method and a rollingmethod.

    +
  • +
  • +

    period

    +

    Typedt.timedelta

    +

    A duration of time, expressed as a datetime.timedelta.

    +
  • +
+

Examples

+

For instance, here is how you can compute a rolling average over a period of 3 days:

+

from river import stats, utils
+
+X = {
+    dt.datetime(2019, 1, 1): 1,
+    dt.datetime(2019, 1, 2): 5,
+    dt.datetime(2019, 1, 3): 9,
+    dt.datetime(2019, 1, 4): 13
+}
+
+rmean = utils.TimeRolling(stats.Mean(), period=dt.timedelta(days=3))
+for t, x in X.items():
+    print(rmean.update(x, t=t).get())
+
+
1.0
+3.0
+5.0
+9.0
+

+

Methods

+
+update +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/VectorDict/index.html b/0.19.0/api/utils/VectorDict/index.html new file mode 100644 index 0000000000..ce43adfb37 --- /dev/null +++ b/0.19.0/api/utils/VectorDict/index.html @@ -0,0 +1,3784 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + VectorDict - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

VectorDict

+

Methods

+
+abs +
+

+
+clear +
+

+
+get +

Parameters

+
    +
  • key
  • +
  • args
  • +
  • kwargs
  • +
+
+

+
+items +
+

+
+keys +
+

+
+max +
+

+
+maximum +

Parameters

+
    +
  • other
  • +
+
+

+
+min +
+

+
+minimum +

Parameters

+
    +
  • other
  • +
+
+

+
+pop +

Parameters

+
    +
  • args
  • +
  • kwargs
  • +
+
+

+
+popitem +
+

+
+setdefault +

Parameters

+
    +
  • key
  • +
  • args
  • +
  • kwargs
  • +
+
+

+
+to_dict +
+

+
+to_numpy +

Parameters

+
    +
  • fields
  • +
+
+

+
+update +

Parameters

+
    +
  • args
  • +
  • kwargs
  • +
+
+

+
+values +
+

+
+with_mask +

Parameters

+
    +
  • mask
  • +
  • copy — defaults to False
  • +
+
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/dict2numpy/index.html b/0.19.0/api/utils/dict2numpy/index.html new file mode 100644 index 0000000000..b700f09405 --- /dev/null +++ b/0.19.0/api/utils/dict2numpy/index.html @@ -0,0 +1,3705 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dict2numpy - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

dict2numpy

+

Convert a dictionary containing data to a numpy array.

+

There is not restriction to the type of keys in data, but values must be strictly numeric. To make sure random permutations of the features do not impact on the learning algorithms, keys are first converted to strings and then sorted prior to the conversion.

+

Parameters

+
    +
  • +

    data

    +

    A dictionary whose keys represent input attributes and the values represent their observed contents.

    +
  • +
+

Examples

+

from river.utils import dict2numpy
+dict2numpy({'a': 1, 'b': 2, 3: 3})
+
+
array([3, 1, 2])
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/expand-param-grid/index.html b/0.19.0/api/utils/expand-param-grid/index.html new file mode 100644 index 0000000000..00b84fd6a1 --- /dev/null +++ b/0.19.0/api/utils/expand-param-grid/index.html @@ -0,0 +1,3776 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + expand_param_grid - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

expand_param_grid

+

Expands a grid of parameters.

+

This method can be used to generate a list of model parametrizations from a dictionary where each parameter is associated with a list of possible parameters. In other words, it expands a grid of parameters.

+

Typically, this method can be used to create copies of a given model with different parameter choices. The models can then be used as part of a model selection process, such as a selection.SuccessiveHalvingClassifier or a selection.EWARegressor.

+

The syntax for the parameter grid is quite flexible. It allows nesting parameters and can therefore be used to generate parameters for a pipeline.

+

Parameters

+
    +
  • +

    model

    +

    Typebase.Estimator

    +
  • +
  • +

    grid

    +

    Typedict

    +

    The grid of parameters to expand. The provided dictionary can be nested. The only requirement is that the values at the leaves need to be lists.

    +
  • +
+

Examples

+

As an initial example, we can expand a grid of parameters for a single model.

+

from river import linear_model
+from river import optim
+from river import utils
+
+model = linear_model.LinearRegression()
+
+grid = {'optimizer': [optim.SGD(.1), optim.SGD(.01), optim.SGD(.001)]}
+models = utils.expand_param_grid(model, grid)
+len(models)
+
+
3
+

+

models[0]
+
+
LinearRegression (
+  optimizer=SGD (
+    lr=Constant (
+      learning_rate=0.1
+    )
+  )
+  loss=Squared ()
+  l2=0.
+  l1=0.
+  intercept_init=0.
+  intercept_lr=Constant (
+    learning_rate=0.01
+  )
+  clip_gradient=1e+12
+  initializer=Zeros ()
+)
+

+

You can expand parameters for multiple choices like so:

+

grid = {
+    'optimizer': [
+        (optim.SGD, {'lr': [.1, .01, .001]}),
+        (optim.Adam, {'lr': [.1, .01, .01]})
+    ]
+}
+models = utils.expand_param_grid(model, grid)
+len(models)
+
+
6
+

+

You may specify a grid of parameters for a pipeline via nesting:

+

from river import feature_extraction
+
+model = (
+    feature_extraction.BagOfWords() |
+    linear_model.LinearRegression()
+)
+
+grid = {
+    'BagOfWords': {
+        'strip_accents': [False, True]
+    },
+    'LinearRegression': {
+        'optimizer': [
+            (optim.SGD, {'lr': [.1, .01]}),
+            (optim.Adam, {'lr': [.1, .01]})
+        ]
+    }
+}
+
+models = utils.expand_param_grid(model, grid)
+len(models)
+
+
8
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/log-method-calls/index.html b/0.19.0/api/utils/log-method-calls/index.html new file mode 100644 index 0000000000..d706e34136 --- /dev/null +++ b/0.19.0/api/utils/log-method-calls/index.html @@ -0,0 +1,3746 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + log_method_calls - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

log_method_calls

+

A context manager to log method calls.

+

All method calls will be logged by default. This behavior can be overriden by passing filtering functions.

+

Parameters

+
    +
  • +

    class_condition

    +

    Typetyping.Callable[[typing.Any], bool] | None

    +

    DefaultNone

    +

    A function which determines if a class should be logged or not.

    +
  • +
  • +

    method_condition

    +

    Typetyping.Callable[[typing.Any], bool] | None

    +

    DefaultNone

    +

    A function which determines if a method should be logged or not.

    +
  • +
+

Examples

+

import io
+import logging
+from river import anomaly
+from river import compose
+from river import datasets
+from river import preprocessing
+from river import utils
+
+model = compose.Pipeline(
+    preprocessing.MinMaxScaler(),
+    anomaly.HalfSpaceTrees(seed=42)
+)
+
+class_condition = lambda x: x.__class__.__name__ in ('MinMaxScaler', 'HalfSpaceTrees')
+
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+logs = io.StringIO()
+sh = logging.StreamHandler(logs)
+sh.setLevel(logging.DEBUG)
+logger.addHandler(sh)
+
+with utils.log_method_calls(class_condition):
+    for x, y in datasets.CreditCard().take(1):
+        score = model.score_one(x)
+        model = model.learn_one(x)
+
+print(logs.getvalue())
+
+
MinMaxScaler.transform_one
+HalfSpaceTrees.score_one
+MinMaxScaler.learn_one
+MinMaxScaler.transform_one
+HalfSpaceTrees.learn_one
+

+
logs.close()
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/argmax/index.html b/0.19.0/api/utils/math/argmax/index.html new file mode 100644 index 0000000000..d488dd9a7b --- /dev/null +++ b/0.19.0/api/utils/math/argmax/index.html @@ -0,0 +1,4012 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + argmax - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

argmax

+

Argmax function.

+

Parameters

+
    +
  • +

    lst

    +

    Typelist

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/chain-dot/index.html b/0.19.0/api/utils/math/chain-dot/index.html new file mode 100644 index 0000000000..ebb917a3ad --- /dev/null +++ b/0.19.0/api/utils/math/chain-dot/index.html @@ -0,0 +1,4034 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + chain_dot - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

chain_dot

+

Returns the dot product of multiple vectors represented as dicts.

+

Parameters

+
    +
  • xs
  • +
+

Examples

+

from river import utils
+
+x = {'x0': 1, 'x1': 2, 'x2': 1}
+y = {'x1': 21, 'x2': 3}
+z = {'x1': 2, 'x2': 1 / 3}
+
+utils.math.chain_dot(x, y, z)
+
+
85.0
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/clamp/index.html b/0.19.0/api/utils/math/clamp/index.html new file mode 100644 index 0000000000..65fac0bae7 --- /dev/null +++ b/0.19.0/api/utils/math/clamp/index.html @@ -0,0 +1,4021 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + clamp - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

clamp

+

Clamp a number.

+

This is a synonym of clipping.

+

Parameters

+
    +
  • +

    x

    +

    Typefloat

    +
  • +
  • +

    minimum

    +

    Default0.0

    +
  • +
  • +

    maximum

    +

    Default1.0

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/dot/index.html b/0.19.0/api/utils/math/dot/index.html new file mode 100644 index 0000000000..ccd382e849 --- /dev/null +++ b/0.19.0/api/utils/math/dot/index.html @@ -0,0 +1,4040 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dot - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

dot

+

Returns the dot product of two vectors represented as dicts.

+

Parameters

+
    +
  • +

    x

    +

    Typedict

    +
  • +
  • +

    y

    +

    Typedict

    +
  • +
+

Examples

+

from river import utils
+
+x = {'x0': 1, 'x1': 2}
+y = {'x1': 21, 'x2': 3}
+
+utils.math.dot(x, y)
+
+
42
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/dotvecmat/index.html b/0.19.0/api/utils/math/dotvecmat/index.html new file mode 100644 index 0000000000..8c37c4f531 --- /dev/null +++ b/0.19.0/api/utils/math/dotvecmat/index.html @@ -0,0 +1,4043 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dotvecmat - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

dotvecmat

+

Vector times matrix from left side, i.e. transpose(x)A.

+

Parameters

+
    +
  • +

    x

    +
  • +
  • +

    A

    +
  • +
+

Examples

+

from river import utils
+
+x = {0: 4, 1: 5}
+
+A = {
+    (0, 0): 0, (0, 1): 1,
+    (1, 0): 2, (1, 1): 3
+}
+
+C = utils.math.dotvecmat(x, A)
+print(C)
+
+
{0: 10.0, 1: 19.0}
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/log-sum-2-exp/index.html b/0.19.0/api/utils/math/log-sum-2-exp/index.html new file mode 100644 index 0000000000..6b05abdaa2 --- /dev/null +++ b/0.19.0/api/utils/math/log-sum-2-exp/index.html @@ -0,0 +1,4018 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + log_sum_2_exp - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

log_sum_2_exp

+

Computation of log( (e^a + e^b) / 2) in an overflow-proof way

+

Parameters

+
    +
  • +

    a

    +

    Typefloat

    +

    First number

    +
  • +
  • +

    b

    +

    Typefloat

    +

    Second number

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/matmul2d/index.html b/0.19.0/api/utils/math/matmul2d/index.html new file mode 100644 index 0000000000..fc94bb16a9 --- /dev/null +++ b/0.19.0/api/utils/math/matmul2d/index.html @@ -0,0 +1,4055 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + matmul2d - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

matmul2d

+

Multiplication for 2D matrices.

+

Parameters

+
    +
  • +

    A

    +
  • +
  • +

    B

    +
  • +
+

Examples

+

import pprint
+from river import utils
+
+A = {
+    (0, 0): 2, (0, 1): 0, (0, 2): 4,
+    (1, 0): 5, (1, 1): 6, (1, 2): 0
+}
+
+B = {
+    (0, 0): 1, (0, 1): 1, (0, 2): 0, (0, 3): 0,
+    (1, 0): 2, (1, 1): 0, (1, 2): 1, (1, 3): 3,
+    (2, 0): 4, (2, 1): 0, (2, 2): 0, (2, 3): 0
+}
+
+C = utils.math.matmul2d(A, B)
+pprint.pprint(C)
+
+
{(0, 0): 18.0,
+    (0, 1): 2.0,
+    (0, 2): 0.0,
+    (0, 3): 0.0,
+    (1, 0): 17.0,
+    (1, 1): 5.0,
+    (1, 2): 6.0,
+    (1, 3): 18.0}
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/minkowski-distance/index.html b/0.19.0/api/utils/math/minkowski-distance/index.html new file mode 100644 index 0000000000..5954951f08 --- /dev/null +++ b/0.19.0/api/utils/math/minkowski-distance/index.html @@ -0,0 +1,4021 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + minkowski_distance - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

minkowski_distance

+

Minkowski distance.

+

Parameters

+
    +
  • +

    a

    +

    Typedict

    +
  • +
  • +

    b

    +

    Typedict

    +
  • +
  • +

    p

    +

    Typeint

    +

    Parameter for the Minkowski distance. When p=1, this is equivalent to using the Manhattan distance. When p=2, this is equivalent to using the Euclidean distance.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/norm/index.html b/0.19.0/api/utils/math/norm/index.html new file mode 100644 index 0000000000..62d32a0393 --- /dev/null +++ b/0.19.0/api/utils/math/norm/index.html @@ -0,0 +1,4016 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + norm - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

norm

+

Compute the norm of a dictionaries values.

+

Parameters

+
    +
  • +

    x

    +

    Typedict

    +
  • +
  • +

    order

    +

    DefaultNone

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/outer/index.html b/0.19.0/api/utils/math/outer/index.html new file mode 100644 index 0000000000..620fdc79bf --- /dev/null +++ b/0.19.0/api/utils/math/outer/index.html @@ -0,0 +1,4050 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + outer - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

outer

+

Outer-product between two vectors.

+

Parameters

+
    +
  • +

    u

    +

    Typedict

    +
  • +
  • +

    v

    +

    Typedict

    +
  • +
+

Examples

+

import pprint
+from river import utils
+
+u = dict(enumerate((1, 2, 3)))
+v = dict(enumerate((2, 4, 8)))
+
+uTv = utils.math.outer(u, v)
+pprint.pprint(uTv)
+
+
{(0, 0): 2,
+    (0, 1): 4,
+    (0, 2): 8,
+    (1, 0): 4,
+    (1, 1): 8,
+    (1, 2): 16,
+    (2, 0): 6,
+    (2, 1): 12,
+    (2, 2): 24}
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/prod/index.html b/0.19.0/api/utils/math/prod/index.html new file mode 100644 index 0000000000..2d952a196c --- /dev/null +++ b/0.19.0/api/utils/math/prod/index.html @@ -0,0 +1,4009 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + prod - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

prod

+

Product function.

+

Parameters

+
    +
  • iterable
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/sherman-morrison/index.html b/0.19.0/api/utils/math/sherman-morrison/index.html new file mode 100644 index 0000000000..9149455764 --- /dev/null +++ b/0.19.0/api/utils/math/sherman-morrison/index.html @@ -0,0 +1,4029 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + sherman_morrison - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/sigmoid/index.html b/0.19.0/api/utils/math/sigmoid/index.html new file mode 100644 index 0000000000..c5ac2b4040 --- /dev/null +++ b/0.19.0/api/utils/math/sigmoid/index.html @@ -0,0 +1,4012 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + sigmoid - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

sigmoid

+

Sigmoid function.

+

Parameters

+
    +
  • +

    x

    +

    Typefloat

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/sign/index.html b/0.19.0/api/utils/math/sign/index.html new file mode 100644 index 0000000000..c938de7dae --- /dev/null +++ b/0.19.0/api/utils/math/sign/index.html @@ -0,0 +1,4012 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + sign - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

sign

+

Sign function.

+

Parameters

+
    +
  • +

    x

    +

    Typefloat

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/softmax/index.html b/0.19.0/api/utils/math/softmax/index.html new file mode 100644 index 0000000000..d4fe6f8250 --- /dev/null +++ b/0.19.0/api/utils/math/softmax/index.html @@ -0,0 +1,4012 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + softmax - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

softmax

+

Normalizes a dictionary of predicted probabilities, in-place.

+

Parameters

+
    +
  • +

    y_pred

    +

    Typedict

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/math/woodbury-matrix/index.html b/0.19.0/api/utils/math/woodbury-matrix/index.html new file mode 100644 index 0000000000..dcc88be39a --- /dev/null +++ b/0.19.0/api/utils/math/woodbury-matrix/index.html @@ -0,0 +1,4029 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + woodbury_matrix - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+ +
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/norm/normalize-values-in-dict/index.html b/0.19.0/api/utils/norm/normalize-values-in-dict/index.html new file mode 100644 index 0000000000..042a5702ba --- /dev/null +++ b/0.19.0/api/utils/norm/normalize-values-in-dict/index.html @@ -0,0 +1,3748 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + normalize_values_in_dict - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

normalize_values_in_dict

+

Normalize the values in a dictionary using the given factor.

+

For each element in the dictionary, applies value/factor.

+

Parameters

+
    +
  • +

    dictionary

    +

    Dictionary to normalize.

    +
  • +
  • +

    factor

    +

    DefaultNone

    +

    Normalization factor value. If not set, use the sum of values.

    +
  • +
  • +

    inplace

    +

    DefaultTrue

    +

    If True, perform operation in-place

    +
  • +
  • +

    raise_error

    +

    DefaultFalse

    +

    In case the normalization factor is either 0 or None:
    - True: raise an error. - False: return gracefully (if inplace=False, a copy of) dictionary.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/norm/scale-values-in-dict/index.html b/0.19.0/api/utils/norm/scale-values-in-dict/index.html new file mode 100644 index 0000000000..893faf07b3 --- /dev/null +++ b/0.19.0/api/utils/norm/scale-values-in-dict/index.html @@ -0,0 +1,3742 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + scale_values_in_dict - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

scale_values_in_dict

+

Scale the values in a dictionary.

+

For each element in the dictionary, applies value * multiplier.

+

Parameters

+
    +
  • +

    dictionary

    +

    Dictionary to scale.

    +
  • +
  • +

    multiplier

    +

    Scaling value.

    +
  • +
  • +

    inplace

    +

    DefaultTrue

    +

    If True, perform operation in-place

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/numpy2dict/index.html b/0.19.0/api/utils/numpy2dict/index.html new file mode 100644 index 0000000000..4b330ba4b5 --- /dev/null +++ b/0.19.0/api/utils/numpy2dict/index.html @@ -0,0 +1,3706 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + numpy2dict - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

numpy2dict

+

Convert a numpy array to a dictionary.

+

Parameters

+
    +
  • +

    data

    +

    Typenp.ndarray

    +

    An one-dimensional numpy.array.

    +
  • +
+

Examples

+

import numpy as np
+from river.utils import numpy2dict
+numpy2dict(np.array([1.0, 2.0, 3.0]))
+
+
{0: 1.0, 1: 2.0, 2: 3.0}
+

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/pretty/humanize-bytes/index.html b/0.19.0/api/utils/pretty/humanize-bytes/index.html new file mode 100644 index 0000000000..537c4e6bf5 --- /dev/null +++ b/0.19.0/api/utils/pretty/humanize-bytes/index.html @@ -0,0 +1,3732 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + humanize_bytes - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

humanize_bytes

+

Returns a human-friendly byte size.

+

Parameters

+
    +
  • +

    n_bytes

    +

    Typeint

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/pretty/print-table/index.html b/0.19.0/api/utils/pretty/print-table/index.html new file mode 100644 index 0000000000..9bbe628384 --- /dev/null +++ b/0.19.0/api/utils/pretty/print-table/index.html @@ -0,0 +1,3744 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + print_table - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

print_table

+

Pretty-prints a table.

+

Parameters

+
    +
  • +

    headers

    +

    Typelist[str]

    +

    The column names.

    +
  • +
  • +

    columns

    +

    Typelist[list[str]]

    +

    The column values.

    +
  • +
  • +

    order

    +

    Typelist[int] | None

    +

    DefaultNone

    +

    Order in which to print the column the values. Defaults to the order in which the values are given.

    +
  • +
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/random/exponential/index.html b/0.19.0/api/utils/random/exponential/index.html new file mode 100644 index 0000000000..b554ee6b6d --- /dev/null +++ b/0.19.0/api/utils/random/exponential/index.html @@ -0,0 +1,3745 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + exponential - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

exponential

+

Sample a random value from a Poisson distribution.

+

Parameters

+
    +
  • +

    rate

    +

    Typefloat

    +

    Default1.0

    +
  • +
  • +

    rng

    +

    Default<module 'random' from '/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/random.py'>

    +
  • +
+
+
+
    +
  1. +

    Wikipedia article 

    +
  2. +
+
+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/api/utils/random/poisson/index.html b/0.19.0/api/utils/random/poisson/index.html new file mode 100644 index 0000000000..4f3cf5bd90 --- /dev/null +++ b/0.19.0/api/utils/random/poisson/index.html @@ -0,0 +1,3737 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + poisson - River + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + +

poisson

+

Sample a random value from a Poisson distribution.

+

Parameters

+
    +
  • +

    rate

    +

    Typefloat

    +
  • +
  • +

    rng

    +

    Default<module 'random' from '/opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/random.py'>

    +
  • +
+

[^1] Wikipedia article

+ + + + + + + + +
+
+ + +
+ +
+ + + + + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/0.19.0/assets/images/favicon.png b/0.19.0/assets/images/favicon.png new file mode 100644 index 0000000000..1cf13b9f9d Binary files /dev/null and b/0.19.0/assets/images/favicon.png differ diff --git a/0.19.0/assets/javascripts/bundle.dff1b7c8.min.js b/0.19.0/assets/javascripts/bundle.dff1b7c8.min.js new file mode 100644 index 0000000000..a89e799ad1 --- /dev/null +++ b/0.19.0/assets/javascripts/bundle.dff1b7c8.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var gi=Object.create;var dr=Object.defineProperty;var xi=Object.getOwnPropertyDescriptor;var yi=Object.getOwnPropertyNames,Ht=Object.getOwnPropertySymbols,Ei=Object.getPrototypeOf,hr=Object.prototype.hasOwnProperty,Xr=Object.prototype.propertyIsEnumerable;var Jr=(e,t,r)=>t in e?dr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,I=(e,t)=>{for(var r in t||(t={}))hr.call(t,r)&&Jr(e,r,t[r]);if(Ht)for(var r of Ht(t))Xr.call(t,r)&&Jr(e,r,t[r]);return e};var Zr=(e,t)=>{var r={};for(var o in e)hr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Ht)for(var o of Ht(e))t.indexOf(o)<0&&Xr.call(e,o)&&(r[o]=e[o]);return r};var br=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var wi=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of yi(t))!hr.call(e,n)&&n!==r&&dr(e,n,{get:()=>t[n],enumerable:!(o=xi(t,n))||o.enumerable});return e};var $t=(e,t,r)=>(r=e!=null?gi(Ei(e)):{},wi(t||!e||!e.__esModule?dr(r,"default",{value:e,enumerable:!0}):r,e));var to=br((vr,eo)=>{(function(e,t){typeof vr=="object"&&typeof eo!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(vr,function(){"use strict";function e(r){var o=!0,n=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(A){return!!(A&&A!==document&&A.nodeName!=="HTML"&&A.nodeName!=="BODY"&&"classList"in A&&"contains"in A.classList)}function c(A){var it=A.type,Ne=A.tagName;return!!(Ne==="INPUT"&&s[it]&&!A.readOnly||Ne==="TEXTAREA"&&!A.readOnly||A.isContentEditable)}function p(A){A.classList.contains("focus-visible")||(A.classList.add("focus-visible"),A.setAttribute("data-focus-visible-added",""))}function m(A){A.hasAttribute("data-focus-visible-added")&&(A.classList.remove("focus-visible"),A.removeAttribute("data-focus-visible-added"))}function f(A){A.metaKey||A.altKey||A.ctrlKey||(a(r.activeElement)&&p(r.activeElement),o=!0)}function u(A){o=!1}function d(A){a(A.target)&&(o||c(A.target))&&p(A.target)}function b(A){a(A.target)&&(A.target.classList.contains("focus-visible")||A.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),m(A.target))}function _(A){document.visibilityState==="hidden"&&(n&&(o=!0),re())}function re(){document.addEventListener("mousemove",Y),document.addEventListener("mousedown",Y),document.addEventListener("mouseup",Y),document.addEventListener("pointermove",Y),document.addEventListener("pointerdown",Y),document.addEventListener("pointerup",Y),document.addEventListener("touchmove",Y),document.addEventListener("touchstart",Y),document.addEventListener("touchend",Y)}function Z(){document.removeEventListener("mousemove",Y),document.removeEventListener("mousedown",Y),document.removeEventListener("mouseup",Y),document.removeEventListener("pointermove",Y),document.removeEventListener("pointerdown",Y),document.removeEventListener("pointerup",Y),document.removeEventListener("touchmove",Y),document.removeEventListener("touchstart",Y),document.removeEventListener("touchend",Y)}function Y(A){A.target.nodeName&&A.target.nodeName.toLowerCase()==="html"||(o=!1,Z())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",_,!0),re(),r.addEventListener("focus",d,!0),r.addEventListener("blur",b,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var Vr=br((Mt,Dr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof Mt=="object"&&typeof Dr=="object"?Dr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Mt=="object"?Mt.ClipboardJS=r():t.ClipboardJS=r()})(Mt,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return vi}});var s=i(279),a=i.n(s),c=i(370),p=i.n(c),m=i(817),f=i.n(m);function u(F){try{return document.execCommand(F)}catch(S){return!1}}var d=function(S){var y=f()(S);return u("cut"),y},b=d;function _(F){var S=document.documentElement.getAttribute("dir")==="rtl",y=document.createElement("textarea");y.style.fontSize="12pt",y.style.border="0",y.style.padding="0",y.style.margin="0",y.style.position="absolute",y.style[S?"right":"left"]="-9999px";var R=window.pageYOffset||document.documentElement.scrollTop;return y.style.top="".concat(R,"px"),y.setAttribute("readonly",""),y.value=F,y}var re=function(S,y){var R=_(S);y.container.appendChild(R);var P=f()(R);return u("copy"),R.remove(),P},Z=function(S){var y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},R="";return typeof S=="string"?R=re(S,y):S instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(S==null?void 0:S.type)?R=re(S.value,y):(R=f()(S),u("copy")),R},Y=Z;function A(F){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?A=function(y){return typeof y}:A=function(y){return y&&typeof Symbol=="function"&&y.constructor===Symbol&&y!==Symbol.prototype?"symbol":typeof y},A(F)}var it=function(){var S=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},y=S.action,R=y===void 0?"copy":y,P=S.container,q=S.target,Me=S.text;if(R!=="copy"&&R!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&A(q)==="object"&&q.nodeType===1){if(R==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(R==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Me)return Y(Me,{container:P});if(q)return R==="cut"?b(q):Y(q,{container:P})},Ne=it;function Ie(F){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Ie=function(y){return typeof y}:Ie=function(y){return y&&typeof Symbol=="function"&&y.constructor===Symbol&&y!==Symbol.prototype?"symbol":typeof y},Ie(F)}function pi(F,S){if(!(F instanceof S))throw new TypeError("Cannot call a class as a function")}function Gr(F,S){for(var y=0;y0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof P.action=="function"?P.action:this.defaultAction,this.target=typeof P.target=="function"?P.target:this.defaultTarget,this.text=typeof P.text=="function"?P.text:this.defaultText,this.container=Ie(P.container)==="object"?P.container:document.body}},{key:"listenClick",value:function(P){var q=this;this.listener=p()(P,"click",function(Me){return q.onClick(Me)})}},{key:"onClick",value:function(P){var q=P.delegateTarget||P.currentTarget,Me=this.action(q)||"copy",kt=Ne({action:Me,container:this.container,target:this.target(q),text:this.text(q)});this.emit(kt?"success":"error",{action:Me,text:kt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(P){return ur("action",P)}},{key:"defaultTarget",value:function(P){var q=ur("target",P);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(P){return ur("text",P)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(P){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return Y(P,q)}},{key:"cut",value:function(P){return b(P)}},{key:"isSupported",value:function(){var P=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof P=="string"?[P]:P,Me=!!document.queryCommandSupported;return q.forEach(function(kt){Me=Me&&!!document.queryCommandSupported(kt)}),Me}}]),y}(a()),vi=bi},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,c){for(;a&&a.nodeType!==n;){if(typeof a.matches=="function"&&a.matches(c))return a;a=a.parentNode}}o.exports=s},438:function(o,n,i){var s=i(828);function a(m,f,u,d,b){var _=p.apply(this,arguments);return m.addEventListener(u,_,b),{destroy:function(){m.removeEventListener(u,_,b)}}}function c(m,f,u,d,b){return typeof m.addEventListener=="function"?a.apply(null,arguments):typeof u=="function"?a.bind(null,document).apply(null,arguments):(typeof m=="string"&&(m=document.querySelectorAll(m)),Array.prototype.map.call(m,function(_){return a(_,f,u,d,b)}))}function p(m,f,u,d){return function(b){b.delegateTarget=s(b.target,f),b.delegateTarget&&d.call(m,b)}}o.exports=c},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(o,n,i){var s=i(879),a=i(438);function c(u,d,b){if(!u&&!d&&!b)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(b))throw new TypeError("Third argument must be a Function");if(s.node(u))return p(u,d,b);if(s.nodeList(u))return m(u,d,b);if(s.string(u))return f(u,d,b);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function p(u,d,b){return u.addEventListener(d,b),{destroy:function(){u.removeEventListener(d,b)}}}function m(u,d,b){return Array.prototype.forEach.call(u,function(_){_.addEventListener(d,b)}),{destroy:function(){Array.prototype.forEach.call(u,function(_){_.removeEventListener(d,b)})}}}function f(u,d,b){return a(document.body,u,d,b)}o.exports=c},817:function(o){function n(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var c=window.getSelection(),p=document.createRange();p.selectNodeContents(i),c.removeAllRanges(),c.addRange(p),s=c.toString()}return s}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,s,a){var c=this.e||(this.e={});return(c[i]||(c[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var c=this;function p(){c.off(i,p),s.apply(a,arguments)}return p._=s,this.on(i,p,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),c=0,p=a.length;for(c;c{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var _a=/["'&<>]/;Pn.exports=Aa;function Aa(e){var t=""+e,r=_a.exec(t);if(!r)return t;var o,n="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(p[0]===6||p[0]===2)){r=0;continue}if(p[0]===3&&(!i||p[1]>i[0]&&p[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function U(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],s;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(a){s={error:a}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(s)throw s.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||a(u,d)})})}function a(u,d){try{c(o[u](d))}catch(b){f(i[0][3],b)}}function c(u){u.value instanceof Ze?Promise.resolve(u.value.v).then(p,m):f(i[0][2],u)}function p(u){a("next",u)}function m(u){a("throw",u)}function f(u,d){u(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function no(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof Ee=="function"?Ee(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(s){return new Promise(function(a,c){s=e[i](s),n(a,c,s.done,s.value)})}}function n(i,s,a,c){Promise.resolve(c).then(function(p){i({value:p,done:a})},s)}}function C(e){return typeof e=="function"}function at(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var It=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(o,n){return n+1+") "+o.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function De(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Pe=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=Ee(s),c=a.next();!c.done;c=a.next()){var p=c.value;p.remove(this)}}catch(_){t={error:_}}finally{try{c&&!c.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var m=this.initialTeardown;if(C(m))try{m()}catch(_){i=_ instanceof It?_.errors:[_]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=Ee(f),d=u.next();!d.done;d=u.next()){var b=d.value;try{io(b)}catch(_){i=i!=null?i:[],_ instanceof It?i=D(D([],U(i)),U(_.errors)):i.push(_)}}}catch(_){o={error:_}}finally{try{d&&!d.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new It(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)io(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&De(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&De(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var xr=Pe.EMPTY;function Pt(e){return e instanceof Pe||e&&"closed"in e&&C(e.remove)&&C(e.add)&&C(e.unsubscribe)}function io(e){C(e)?e():e.unsubscribe()}var Le={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,s=n.isStopped,a=n.observers;return i||s?xr:(this.currentObservers=null,a.push(r),new Pe(function(){o.currentObservers=null,De(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,s=o.isStopped;n?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new j;return r.source=this,r},t.create=function(r,o){return new uo(r,o)},t}(j);var uo=function(e){ie(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:xr},t}(x);var yt={now:function(){return(yt.delegate||Date).now()},delegate:void 0};var Et=function(e){ie(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=yt);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,s=o._infiniteTimeWindow,a=o._timestampProvider,c=o._windowTime;n||(i.push(r),!s&&i.push(a.now()+c)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,s=n._buffer,a=s.slice(),c=0;c0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=mt.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var s=r.actions;o!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==o&&(mt.cancelAnimationFrame(o),r._scheduled=void 0)},t}(Wt);var vo=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(Ut);var Te=new vo(bo);var T=new j(function(e){return e.complete()});function Nt(e){return e&&C(e.schedule)}function Mr(e){return e[e.length-1]}function Qe(e){return C(Mr(e))?e.pop():void 0}function Oe(e){return Nt(Mr(e))?e.pop():void 0}function Dt(e,t){return typeof Mr(e)=="number"?e.pop():t}var lt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Vt(e){return C(e==null?void 0:e.then)}function zt(e){return C(e[pt])}function qt(e){return Symbol.asyncIterator&&C(e==null?void 0:e[Symbol.asyncIterator])}function Kt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function ki(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Qt=ki();function Yt(e){return C(e==null?void 0:e[Qt])}function Bt(e){return oo(this,arguments,function(){var r,o,n,i;return Rt(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,Ze(r.read())];case 3:return o=s.sent(),n=o.value,i=o.done,i?[4,Ze(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,Ze(n)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Gt(e){return C(e==null?void 0:e.getReader)}function W(e){if(e instanceof j)return e;if(e!=null){if(zt(e))return Hi(e);if(lt(e))return $i(e);if(Vt(e))return Ri(e);if(qt(e))return go(e);if(Yt(e))return Ii(e);if(Gt(e))return Pi(e)}throw Kt(e)}function Hi(e){return new j(function(t){var r=e[pt]();if(C(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function $i(e){return new j(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?L(function(n,i){return e(n,i,o)}):de,ge(1),r?He(t):Io(function(){return new Xt}))}}function Po(){for(var e=[],t=0;t=2,!0))}function le(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,c=a===void 0?!0:a;return function(p){var m,f,u,d=0,b=!1,_=!1,re=function(){f==null||f.unsubscribe(),f=void 0},Z=function(){re(),m=u=void 0,b=_=!1},Y=function(){var A=m;Z(),A==null||A.unsubscribe()};return g(function(A,it){d++,!_&&!b&&re();var Ne=u=u!=null?u:r();it.add(function(){d--,d===0&&!_&&!b&&(f=kr(Y,c))}),Ne.subscribe(it),!m&&d>0&&(m=new tt({next:function(Ie){return Ne.next(Ie)},error:function(Ie){_=!0,re(),f=kr(Z,n,Ie),Ne.error(Ie)},complete:function(){b=!0,re(),f=kr(Z,s),Ne.complete()}}),W(A).subscribe(m))})(p)}}function kr(e,t){for(var r=[],o=2;oe.next(document)),e}function z(e,t=document){return Array.from(t.querySelectorAll(e))}function N(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function Re(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}var ea=M(h(document.body,"focusin"),h(document.body,"focusout")).pipe(ke(1),V(void 0),l(()=>Re()||document.body),B(1));function er(e){return ea.pipe(l(t=>e.contains(t)),G())}function Je(e){return{x:e.offsetLeft,y:e.offsetTop}}function Uo(e){return M(h(window,"load"),h(window,"resize")).pipe(Ae(0,Te),l(()=>Je(e)),V(Je(e)))}function tr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return M(h(e,"scroll"),h(window,"resize")).pipe(Ae(0,Te),l(()=>tr(e)),V(tr(e)))}function No(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)No(e,r)}function O(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)No(o,n);return o}function rr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function ht(e){let t=O("script",{src:e});return $(()=>(document.head.appendChild(t),M(h(t,"load"),h(t,"error").pipe(v(()=>St(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),k(()=>document.head.removeChild(t)),ge(1))))}var Do=new x,ta=$(()=>typeof ResizeObserver=="undefined"?ht("https://unpkg.com/resize-observer-polyfill"):H(void 0)).pipe(l(()=>new ResizeObserver(e=>{for(let t of e)Do.next(t)})),v(e=>M(Ve,H(e)).pipe(k(()=>e.disconnect()))),B(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function xe(e){return ta.pipe(w(t=>t.observe(e)),v(t=>Do.pipe(L(({target:r})=>r===e),k(()=>t.unobserve(e)),l(()=>he(e)))),V(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function or(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var Vo=new x,ra=$(()=>H(new IntersectionObserver(e=>{for(let t of e)Vo.next(t)},{threshold:0}))).pipe(v(e=>M(Ve,H(e)).pipe(k(()=>e.disconnect()))),B(1));function nr(e){return ra.pipe(w(t=>t.observe(e)),v(t=>Vo.pipe(L(({target:r})=>r===e),k(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function zo(e,t=16){return dt(e).pipe(l(({y:r})=>{let o=he(e),n=bt(e);return r>=n.height-o.height-t}),G())}var ir={drawer:N("[data-md-toggle=drawer]"),search:N("[data-md-toggle=search]")};function qo(e){return ir[e].checked}function Ke(e,t){ir[e].checked!==t&&ir[e].click()}function We(e){let t=ir[e];return h(t,"change").pipe(l(()=>t.checked),V(t.checked))}function oa(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function na(){return M(h(window,"compositionstart").pipe(l(()=>!0)),h(window,"compositionend").pipe(l(()=>!1))).pipe(V(!1))}function Ko(){let e=h(window,"keydown").pipe(L(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:qo("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),L(({mode:t,type:r})=>{if(t==="global"){let o=Re();if(typeof o!="undefined")return!oa(o,r)}return!0}),le());return na().pipe(v(t=>t?T:e))}function fe(){return new URL(location.href)}function ot(e){location.href=e.href}function Qo(){return new x}function Yo(){return location.hash.slice(1)}function Pr(e){let t=O("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function ia(e){return M(h(window,"hashchange"),e).pipe(l(Yo),V(Yo()),L(t=>t.length>0),B(1))}function Bo(e){return ia(e).pipe(l(t=>ce(`[id="${t}"]`)),L(t=>typeof t!="undefined"))}function Fr(e){let t=matchMedia(e);return Zt(r=>t.addListener(()=>r(t.matches))).pipe(V(t.matches))}function Go(){let e=matchMedia("print");return M(h(window,"beforeprint").pipe(l(()=>!0)),h(window,"afterprint").pipe(l(()=>!1))).pipe(V(e.matches))}function jr(e,t){return e.pipe(v(r=>r?t():T))}function ar(e,t={credentials:"same-origin"}){return me(fetch(`${e}`,t)).pipe(pe(()=>T),v(r=>r.status!==200?St(()=>new Error(r.statusText)):H(r)))}function Ue(e,t){return ar(e,t).pipe(v(r=>r.json()),B(1))}function Jo(e,t){let r=new DOMParser;return ar(e,t).pipe(v(o=>o.text()),l(o=>r.parseFromString(o,"text/xml")),B(1))}function Xo(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function Zo(){return M(h(window,"scroll",{passive:!0}),h(window,"resize",{passive:!0})).pipe(l(Xo),V(Xo()))}function en(){return{width:innerWidth,height:innerHeight}}function tn(){return h(window,"resize",{passive:!0}).pipe(l(en),V(en()))}function rn(){return Q([Zo(),tn()]).pipe(l(([e,t])=>({offset:e,size:t})),B(1))}function sr(e,{viewport$:t,header$:r}){let o=t.pipe(X("size")),n=Q([o,r]).pipe(l(()=>Je(e)));return Q([r,t,n]).pipe(l(([{height:i},{offset:s,size:a},{x:c,y:p}])=>({offset:{x:s.x-c,y:s.y-p+i},size:a})))}function aa(e){return h(e,"message",t=>t.data)}function sa(e){let t=new x;return t.subscribe(r=>e.postMessage(r)),t}function on(e,t=new Worker(e)){let r=aa(t),o=sa(t),n=new x;n.subscribe(o);let i=o.pipe(J(),ee(!0));return n.pipe(J(),qe(r.pipe(K(i))),le())}var ca=N("#__config"),vt=JSON.parse(ca.textContent);vt.base=`${new URL(vt.base,fe())}`;function ue(){return vt}function te(e){return vt.features.includes(e)}function be(e,t){return typeof t!="undefined"?vt.translations[e].replace("#",t.toString()):vt.translations[e]}function ye(e,t=document){return N(`[data-md-component=${e}]`,t)}function ne(e,t=document){return z(`[data-md-component=${e}]`,t)}function pa(e){let t=N(".md-typeset > :first-child",e);return h(t,"click",{once:!0}).pipe(l(()=>N(".md-typeset",e)),l(r=>({hash:__md_hash(r.innerHTML)})))}function nn(e){if(!te("announce.dismiss")||!e.childElementCount)return T;if(!e.hidden){let t=N(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return $(()=>{let t=new x;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),pa(e).pipe(w(r=>t.next(r)),k(()=>t.complete()),l(r=>I({ref:e},r)))})}function ma(e,{target$:t}){return t.pipe(l(r=>({hidden:r!==e})))}function an(e,t){let r=new x;return r.subscribe(({hidden:o})=>{e.hidden=o}),ma(e,t).pipe(w(o=>r.next(o)),k(()=>r.complete()),l(o=>I({ref:e},o)))}function la(e,t){let r=$(()=>Q([Uo(e),dt(t)])).pipe(l(([{x:o,y:n},i])=>{let{width:s,height:a}=he(e);return{x:o-i.x+s/2,y:n-i.y+a/2}}));return er(e).pipe(v(o=>r.pipe(l(n=>({active:o,offset:n})),ge(+!o||1/0))))}function sn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return $(()=>{let i=new x,s=i.pipe(J(),ee(!0));return i.subscribe({next({offset:a}){e.style.setProperty("--md-tooltip-x",`${a.x}px`),e.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),nr(e).pipe(K(s)).subscribe(a=>{e.toggleAttribute("data-md-visible",a)}),M(i.pipe(L(({active:a})=>a)),i.pipe(ke(250),L(({active:a})=>!a))).subscribe({next({active:a}){a?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Ae(16,Te)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(Rr(125,Te),L(()=>!!e.offsetParent),l(()=>e.offsetParent.getBoundingClientRect()),l(({x:a})=>a)).subscribe({next(a){a?e.style.setProperty("--md-tooltip-0",`${-a}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),h(n,"click").pipe(K(s),L(a=>!(a.metaKey||a.ctrlKey))).subscribe(a=>{a.stopPropagation(),a.preventDefault()}),h(n,"mousedown").pipe(K(s),oe(i)).subscribe(([a,{active:c}])=>{var p;if(a.button!==0||a.metaKey||a.ctrlKey)a.preventDefault();else if(c){a.preventDefault();let m=e.parentElement.closest(".md-annotation");m instanceof HTMLElement?m.focus():(p=Re())==null||p.blur()}}),r.pipe(K(s),L(a=>a===o),ze(125)).subscribe(()=>e.focus()),la(e,t).pipe(w(a=>i.next(a)),k(()=>i.complete()),l(a=>I({ref:e},a)))})}function Wr(e){return O("div",{class:"md-tooltip",id:e},O("div",{class:"md-tooltip__inner md-typeset"}))}function cn(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return O("aside",{class:"md-annotation",tabIndex:0},Wr(t),O("a",{href:r,class:"md-annotation__index",tabIndex:-1},O("span",{"data-md-annotation-id":e})))}else return O("aside",{class:"md-annotation",tabIndex:0},Wr(t),O("span",{class:"md-annotation__index",tabIndex:-1},O("span",{"data-md-annotation-id":e})))}function pn(e){return O("button",{class:"md-clipboard md-icon",title:be("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}function Ur(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(c=>!e.terms[c]).reduce((c,p)=>[...c,O("del",null,p)," "],[]).slice(0,-1),i=ue(),s=new URL(e.location,i.base);te("search.highlight")&&s.searchParams.set("h",Object.entries(e.terms).filter(([,c])=>c).reduce((c,[p])=>`${c} ${p}`.trim(),""));let{tags:a}=ue();return O("a",{href:`${s}`,class:"md-search-result__link",tabIndex:-1},O("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&O("div",{class:"md-search-result__icon md-icon"}),r>0&&O("h1",null,e.title),r<=0&&O("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&e.tags.map(c=>{let p=a?c in a?`md-tag-icon md-tag--${a[c]}`:"md-tag-icon":"";return O("span",{class:`md-tag ${p}`},c)}),o>0&&n.length>0&&O("p",{class:"md-search-result__terms"},be("search.result.term.missing"),": ",...n)))}function mn(e){let t=e[0].score,r=[...e],o=ue(),n=r.findIndex(m=>!`${new URL(m.location,o.base)}`.includes("#")),[i]=r.splice(n,1),s=r.findIndex(m=>m.scoreUr(m,1)),...c.length?[O("details",{class:"md-search-result__more"},O("summary",{tabIndex:-1},O("div",null,c.length>0&&c.length===1?be("search.result.more.one"):be("search.result.more.other",c.length))),...c.map(m=>Ur(m,1)))]:[]];return O("li",{class:"md-search-result__item"},p)}function ln(e){return O("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>O("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?rr(r):r)))}function Nr(e){let t=`tabbed-control tabbed-control--${e}`;return O("div",{class:t,hidden:!0},O("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function fn(e){return O("div",{class:"md-typeset__scrollwrap"},O("div",{class:"md-typeset__table"},e))}function fa(e){let t=ue(),r=new URL(`../${e.version}/`,t.base);return O("li",{class:"md-version__item"},O("a",{href:`${r}`,class:"md-version__link"},e.title))}function un(e,t){return O("div",{class:"md-version"},O("button",{class:"md-version__current","aria-label":be("select.version")},t.title),O("ul",{class:"md-version__list"},e.map(fa)))}function ua(e){return e.tagName==="CODE"?z(".c, .c1, .cm",e):[e]}function da(e){let t=[];for(let r of ua(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let s;for(;s=/(\(\d+\))(!)?/.exec(i.textContent);){let[,a,c]=s;if(typeof c=="undefined"){let p=i.splitText(s.index);i=p.splitText(a.length),t.push(p)}else{i.textContent=a,t.push(i);break}}}}return t}function dn(e,t){t.append(...Array.from(e.childNodes))}function cr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,s=new Map;for(let a of da(t)){let[,c]=a.textContent.match(/\((\d+)\)/);ce(`:scope > li:nth-child(${c})`,e)&&(s.set(c,cn(c,i)),a.replaceWith(s.get(c)))}return s.size===0?T:$(()=>{let a=new x,c=a.pipe(J(),ee(!0)),p=[];for(let[m,f]of s)p.push([N(".md-typeset",f),N(`:scope > li:nth-child(${m})`,e)]);return o.pipe(K(c)).subscribe(m=>{e.hidden=!m,e.classList.toggle("md-annotation-list",m);for(let[f,u]of p)m?dn(f,u):dn(u,f)}),M(...[...s].map(([,m])=>sn(m,t,{target$:r}))).pipe(k(()=>a.complete()),le())})}function hn(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return hn(t)}}function bn(e,t){return $(()=>{let r=hn(e);return typeof r!="undefined"?cr(r,e,t):T})}var gn=$t(Vr());var ha=0;function xn(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return xn(t)}}function vn(e){return xe(e).pipe(l(({width:t})=>({scrollable:bt(e).width>t})),X("scrollable"))}function yn(e,t){let{matches:r}=matchMedia("(hover)"),o=$(()=>{let n=new x;if(n.subscribe(({scrollable:s})=>{s&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")}),gn.default.isSupported()&&(e.closest(".copy")||te("content.code.copy")&&!e.closest(".no-copy"))){let s=e.closest("pre");s.id=`__code_${ha++}`,s.insertBefore(pn(s.id),e)}let i=e.closest(".highlight");if(i instanceof HTMLElement){let s=xn(i);if(typeof s!="undefined"&&(i.classList.contains("annotate")||te("content.code.annotate"))){let a=cr(s,e,t);return vn(e).pipe(w(c=>n.next(c)),k(()=>n.complete()),l(c=>I({ref:e},c)),qe(xe(i).pipe(l(({width:c,height:p})=>c&&p),G(),v(c=>c?a:T))))}}return vn(e).pipe(w(s=>n.next(s)),k(()=>n.complete()),l(s=>I({ref:e},s)))});return te("content.lazy")?nr(e).pipe(L(n=>n),ge(1),v(()=>o)):o}function ba(e,{target$:t,print$:r}){let o=!0;return M(t.pipe(l(n=>n.closest("details:not([open])")),L(n=>e===n),l(()=>({action:"open",reveal:!0}))),r.pipe(L(n=>n||!o),w(()=>o=e.open),l(n=>({action:n?"open":"close"}))))}function En(e,t){return $(()=>{let r=new x;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),ba(e,t).pipe(w(o=>r.next(o)),k(()=>r.complete()),l(o=>I({ref:e},o)))})}var wn=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel rect,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel rect{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var zr,ga=0;function xa(){return typeof mermaid=="undefined"||mermaid instanceof Element?ht("https://unpkg.com/mermaid@9.4.3/dist/mermaid.min.js"):H(void 0)}function Sn(e){return e.classList.remove("mermaid"),zr||(zr=xa().pipe(w(()=>mermaid.initialize({startOnLoad:!1,themeCSS:wn,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),l(()=>{}),B(1))),zr.subscribe(()=>{e.classList.add("mermaid");let t=`__mermaid_${ga++}`,r=O("div",{class:"mermaid"}),o=e.textContent;mermaid.mermaidAPI.render(t,o,(n,i)=>{let s=r.attachShadow({mode:"closed"});s.innerHTML=n,e.replaceWith(r),i==null||i(s)})}),zr.pipe(l(()=>({ref:e})))}var Tn=O("table");function On(e){return e.replaceWith(Tn),Tn.replaceWith(fn(e)),H({ref:e})}function ya(e){let t=z(":scope > input",e),r=t.find(o=>o.checked)||t[0];return M(...t.map(o=>h(o,"change").pipe(l(()=>N(`label[for="${o.id}"]`))))).pipe(V(N(`label[for="${r.id}"]`)),l(o=>({active:o})))}function Mn(e,{viewport$:t}){let r=Nr("prev");e.append(r);let o=Nr("next");e.append(o);let n=N(".tabbed-labels",e);return $(()=>{let i=new x,s=i.pipe(J(),ee(!0));return Q([i,xe(e)]).pipe(Ae(1,Te),K(s)).subscribe({next([{active:a},c]){let p=Je(a),{width:m}=he(a);e.style.setProperty("--md-indicator-x",`${p.x}px`),e.style.setProperty("--md-indicator-width",`${m}px`);let f=tr(n);(p.xf.x+c.width)&&n.scrollTo({left:Math.max(0,p.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),Q([dt(n),xe(n)]).pipe(K(s)).subscribe(([a,c])=>{let p=bt(n);r.hidden=a.x<16,o.hidden=a.x>p.width-c.width-16}),M(h(r,"click").pipe(l(()=>-1)),h(o,"click").pipe(l(()=>1))).pipe(K(s)).subscribe(a=>{let{width:c}=he(n);n.scrollBy({left:c*a,behavior:"smooth"})}),te("content.tabs.link")&&i.pipe(je(1),oe(t)).subscribe(([{active:a},{offset:c}])=>{let p=a.innerText.trim();if(a.hasAttribute("data-md-switching"))a.removeAttribute("data-md-switching");else{let m=e.offsetTop-c.y;for(let u of z("[data-tabs]"))for(let d of z(":scope > input",u)){let b=N(`label[for="${d.id}"]`);if(b!==a&&b.innerText.trim()===p){b.setAttribute("data-md-switching",""),d.click();break}}window.scrollTo({top:e.offsetTop-m});let f=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([p,...f])])}}),i.pipe(K(s)).subscribe(()=>{for(let a of z("audio, video",e))a.pause()}),ya(e).pipe(w(a=>i.next(a)),k(()=>i.complete()),l(a=>I({ref:e},a)))}).pipe(rt(ae))}function Ln(e,{viewport$:t,target$:r,print$:o}){return M(...z(".annotate:not(.highlight)",e).map(n=>bn(n,{target$:r,print$:o})),...z("pre:not(.mermaid) > code",e).map(n=>yn(n,{target$:r,print$:o})),...z("pre.mermaid",e).map(n=>Sn(n)),...z("table:not([class])",e).map(n=>On(n)),...z("details",e).map(n=>En(n,{target$:r,print$:o})),...z("[data-tabs]",e).map(n=>Mn(n,{viewport$:t})))}function Ea(e,{alert$:t}){return t.pipe(v(r=>M(H(!0),H(!1).pipe(ze(2e3))).pipe(l(o=>({message:r,active:o})))))}function _n(e,t){let r=N(".md-typeset",e);return $(()=>{let o=new x;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ea(e,t).pipe(w(n=>o.next(n)),k(()=>o.complete()),l(n=>I({ref:e},n)))})}function wa({viewport$:e}){if(!te("header.autohide"))return H(!1);let t=e.pipe(l(({offset:{y:n}})=>n),Ce(2,1),l(([n,i])=>[nMath.abs(i-n.y)>100),l(([,[n]])=>n),G()),o=We("search");return Q([e,o]).pipe(l(([{offset:n},i])=>n.y>400&&!i),G(),v(n=>n?r:H(!1)),V(!1))}function An(e,t){return $(()=>Q([xe(e),wa(t)])).pipe(l(([{height:r},o])=>({height:r,hidden:o})),G((r,o)=>r.height===o.height&&r.hidden===o.hidden),B(1))}function Cn(e,{header$:t,main$:r}){return $(()=>{let o=new x,n=o.pipe(J(),ee(!0));return o.pipe(X("active"),Ge(t)).subscribe(([{active:i},{hidden:s}])=>{e.classList.toggle("md-header--shadow",i&&!s),e.hidden=s}),r.subscribe(o),t.pipe(K(n),l(i=>I({ref:e},i)))})}function Sa(e,{viewport$:t,header$:r}){return sr(e,{viewport$:t,header$:r}).pipe(l(({offset:{y:o}})=>{let{height:n}=he(e);return{active:o>=n}}),X("active"))}function kn(e,t){return $(()=>{let r=new x;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=ce(".md-content h1");return typeof o=="undefined"?T:Sa(o,t).pipe(w(n=>r.next(n)),k(()=>r.complete()),l(n=>I({ref:e},n)))})}function Hn(e,{viewport$:t,header$:r}){let o=r.pipe(l(({height:i})=>i),G()),n=o.pipe(v(()=>xe(e).pipe(l(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),X("bottom"))));return Q([o,n,t]).pipe(l(([i,{top:s,bottom:a},{offset:{y:c},size:{height:p}}])=>(p=Math.max(0,p-Math.max(0,s-c,i)-Math.max(0,p+c-a)),{offset:s-i,height:p,active:s-i<=c})),G((i,s)=>i.offset===s.offset&&i.height===s.height&&i.active===s.active))}function Ta(e){let t=__md_get("__palette")||{index:e.findIndex(r=>matchMedia(r.getAttribute("data-md-color-media")).matches)};return H(...e).pipe(se(r=>h(r,"change").pipe(l(()=>r))),V(e[Math.max(0,t.index)]),l(r=>({index:e.indexOf(r),color:{scheme:r.getAttribute("data-md-color-scheme"),primary:r.getAttribute("data-md-color-primary"),accent:r.getAttribute("data-md-color-accent")}})),B(1))}function $n(e){let t=O("meta",{name:"theme-color"});document.head.appendChild(t);let r=O("meta",{name:"color-scheme"});return document.head.appendChild(r),$(()=>{let o=new x;o.subscribe(i=>{document.body.setAttribute("data-md-color-switching","");for(let[s,a]of Object.entries(i.color))document.body.setAttribute(`data-md-color-${s}`,a);for(let s=0;s{let i=ye("header"),s=window.getComputedStyle(i);return r.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(a=>(+a).toString(16).padStart(2,"0")).join("")})).subscribe(i=>t.content=`#${i}`),o.pipe(_e(ae)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")});let n=z("input",e);return Ta(n).pipe(w(i=>o.next(i)),k(()=>o.complete()),l(i=>I({ref:e},i)))})}var qr=$t(Vr());function Oa(e){e.setAttribute("data-md-copying","");let t=e.innerText;return e.removeAttribute("data-md-copying"),t}function Rn({alert$:e}){qr.default.isSupported()&&new j(t=>{new qr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||Oa(N(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(w(t=>{t.trigger.focus()}),l(()=>be("clipboard.copied"))).subscribe(e)}function Ma(e){if(e.length<2)return[""];let[t,r]=[...e].sort((n,i)=>n.length-i.length).map(n=>n.replace(/[^/]+$/,"")),o=0;if(t===r)o=t.length;else for(;t.charCodeAt(o)===r.charCodeAt(o);)o++;return e.map(n=>n.replace(t.slice(0,o),""))}function pr(e){let t=__md_get("__sitemap",sessionStorage,e);if(t)return H(t);{let r=ue();return Jo(new URL("sitemap.xml",e||r.base)).pipe(l(o=>Ma(z("loc",o).map(n=>n.textContent))),pe(()=>T),He([]),w(o=>__md_set("__sitemap",o,sessionStorage,e)))}}function In({location$:e,viewport$:t}){let r=ue();if(location.protocol==="file:")return T;let o=pr().pipe(l(p=>p.map(m=>`${new URL(m,r.base)}`))),n=h(document.body,"click").pipe(oe(o),v(([p,m])=>{if(!(p.target instanceof Element))return T;let f=p.target.closest("a");if(f===null)return T;if(f.target||p.metaKey||p.ctrlKey)return T;let u=new URL(f.href);return u.search=u.hash="",m.includes(`${u}`)?(p.preventDefault(),H(new URL(f.href))):T}),le());n.pipe(ge(1)).subscribe(()=>{let p=ce("link[rel=icon]");typeof p!="undefined"&&(p.href=p.href)}),h(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),n.pipe(oe(t)).subscribe(([p,{offset:m}])=>{history.scrollRestoration="manual",history.replaceState(m,""),history.pushState(null,"",p)}),n.subscribe(e);let i=e.pipe(V(fe()),X("pathname"),je(1),v(p=>ar(p).pipe(pe(()=>(ot(p),T))))),s=new DOMParser,a=i.pipe(v(p=>p.text()),v(p=>{let m=s.parseFromString(p,"text/html");for(let u of["title","link[rel=canonical]","meta[name=author]","meta[name=description]","[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...te("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let d=ce(u),b=ce(u,m);typeof d!="undefined"&&typeof b!="undefined"&&d.replaceWith(b)}let f=ye("container");return Fe(z("script",f)).pipe(v(u=>{let d=m.createElement("script");if(u.src){for(let b of u.getAttributeNames())d.setAttribute(b,u.getAttribute(b));return u.replaceWith(d),new j(b=>{d.onload=()=>b.complete()})}else return d.textContent=u.textContent,u.replaceWith(d),T}),J(),ee(m))}),le());return h(window,"popstate").pipe(l(fe)).subscribe(e),e.pipe(V(fe()),Ce(2,1),v(([p,m])=>p.pathname===m.pathname&&p.hash!==m.hash?H(m):T)).subscribe(p=>{var m,f;history.state!==null||!p.hash?window.scrollTo(0,(f=(m=history.state)==null?void 0:m.y)!=null?f:0):(history.scrollRestoration="auto",Pr(p.hash),history.scrollRestoration="manual")}),a.pipe(oe(e)).subscribe(([,p])=>{var m,f;history.state!==null||!p.hash?window.scrollTo(0,(f=(m=history.state)==null?void 0:m.y)!=null?f:0):Pr(p.hash)}),a.pipe(v(()=>t),X("offset"),ke(100)).subscribe(({offset:p})=>{history.replaceState(p,"")}),a}var jn=$t(Fn());function Wn(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,s)=>`${i}${s}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return s=>(0,jn.default)(s).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function Lt(e){return e.type===1}function mr(e){return e.type===3}function Un(e,t){let r=on(e);return M(H(location.protocol!=="file:"),We("search")).pipe($e(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:te("search.suggest")}}})),r}function Nn({document$:e}){let t=ue(),r=Ue(new URL("../versions.json",t.base)).pipe(pe(()=>T)),o=r.pipe(l(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:s,aliases:a})=>s===i||a.includes(i))||n[0]}));r.pipe(l(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>h(document.body,"click").pipe(L(i=>!i.metaKey&&!i.ctrlKey),oe(o),v(([i,s])=>{if(i.target instanceof Element){let a=i.target.closest("a");if(a&&!a.target&&n.has(a.href)){let c=a.href;return!i.target.closest(".md-version")&&n.get(c)===s?T:(i.preventDefault(),H(c))}}return T}),v(i=>{let{version:s}=n.get(i);return pr(new URL(i)).pipe(l(a=>{let p=fe().href.replace(t.base,"");return a.includes(p.split("#")[0])?new URL(`../${s}/${p}`,t.base):new URL(i)}))})))).subscribe(n=>ot(n)),Q([r,o]).subscribe(([n,i])=>{N(".md-header__topic").appendChild(un(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var s;let i=__md_get("__outdated",sessionStorage);if(i===null){i=!0;let a=((s=t.version)==null?void 0:s.default)||"latest";Array.isArray(a)||(a=[a]);e:for(let c of a)for(let p of n.aliases)if(new RegExp(c,"i").test(p)){i=!1;break e}__md_set("__outdated",i,sessionStorage)}if(i)for(let a of ne("outdated"))a.hidden=!1})}function ka(e,{worker$:t}){let{searchParams:r}=fe();r.has("q")&&(Ke("search",!0),e.value=r.get("q"),e.focus(),We("search").pipe($e(i=>!i)).subscribe(()=>{let i=new URL(location.href);i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=er(e),n=M(t.pipe($e(Lt)),h(e,"keyup"),o).pipe(l(()=>e.value),G());return Q([n,o]).pipe(l(([i,s])=>({value:i,focus:s})),B(1))}function Dn(e,{worker$:t}){let r=new x,o=r.pipe(J(),ee(!0));Q([t.pipe($e(Lt)),r],(i,s)=>s).pipe(X("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(X("focus")).subscribe(({focus:i})=>{i&&Ke("search",i)}),h(e.form,"reset").pipe(K(o)).subscribe(()=>e.focus());let n=N("header [for=__search]");return h(n,"click").subscribe(()=>e.focus()),ka(e,{worker$:t}).pipe(w(i=>r.next(i)),k(()=>r.complete()),l(i=>I({ref:e},i)),B(1))}function Vn(e,{worker$:t,query$:r}){let o=new x,n=zo(e.parentElement).pipe(L(Boolean)),i=e.parentElement,s=N(":scope > :first-child",e),a=N(":scope > :last-child",e);We("search").subscribe(m=>a.setAttribute("role",m?"list":"presentation")),o.pipe(oe(r),Hr(t.pipe($e(Lt)))).subscribe(([{items:m},{value:f}])=>{switch(m.length){case 0:s.textContent=f.length?be("search.result.none"):be("search.result.placeholder");break;case 1:s.textContent=be("search.result.one");break;default:let u=rr(m.length);s.textContent=be("search.result.other",u)}});let c=o.pipe(w(()=>a.innerHTML=""),v(({items:m})=>M(H(...m.slice(0,10)),H(...m.slice(10)).pipe(Ce(4),Ir(n),v(([f])=>f)))),l(mn),le());return c.subscribe(m=>a.appendChild(m)),c.pipe(se(m=>{let f=ce("details",m);return typeof f=="undefined"?T:h(f,"toggle").pipe(K(o),l(()=>f))})).subscribe(m=>{m.open===!1&&m.offsetTop<=i.scrollTop&&i.scrollTo({top:m.offsetTop})}),t.pipe(L(mr),l(({data:m})=>m)).pipe(w(m=>o.next(m)),k(()=>o.complete()),l(m=>I({ref:e},m)))}function Ha(e,{query$:t}){return t.pipe(l(({value:r})=>{let o=fe();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function zn(e,t){let r=new x,o=r.pipe(J(),ee(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),h(e,"click").pipe(K(o)).subscribe(n=>n.preventDefault()),Ha(e,t).pipe(w(n=>r.next(n)),k(()=>r.complete()),l(n=>I({ref:e},n)))}function qn(e,{worker$:t,keyboard$:r}){let o=new x,n=ye("search-query"),i=M(h(n,"keydown"),h(n,"focus")).pipe(_e(ae),l(()=>n.value),G());return o.pipe(Ge(i),l(([{suggest:a},c])=>{let p=c.split(/([\s-]+)/);if(a!=null&&a.length&&p[p.length-1]){let m=a[a.length-1];m.startsWith(p[p.length-1])&&(p[p.length-1]=m)}else p.length=0;return p})).subscribe(a=>e.innerHTML=a.join("").replace(/\s/g," ")),r.pipe(L(({mode:a})=>a==="search")).subscribe(a=>{switch(a.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(L(mr),l(({data:a})=>a)).pipe(w(a=>o.next(a)),k(()=>o.complete()),l(()=>({ref:e})))}function Kn(e,{index$:t,keyboard$:r}){let o=ue();try{let n=Un(o.search,t),i=ye("search-query",e),s=ye("search-result",e);h(e,"click").pipe(L(({target:c})=>c instanceof Element&&!!c.closest("a"))).subscribe(()=>Ke("search",!1)),r.pipe(L(({mode:c})=>c==="search")).subscribe(c=>{let p=Re();switch(c.type){case"Enter":if(p===i){let m=new Map;for(let f of z(":first-child [href]",s)){let u=f.firstElementChild;m.set(f,parseFloat(u.getAttribute("data-md-score")))}if(m.size){let[[f]]=[...m].sort(([,u],[,d])=>d-u);f.click()}c.claim()}break;case"Escape":case"Tab":Ke("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof p=="undefined")i.focus();else{let m=[i,...z(":not(details) > [href], summary, details[open] [href]",s)],f=Math.max(0,(Math.max(0,m.indexOf(p))+m.length+(c.type==="ArrowUp"?-1:1))%m.length);m[f].focus()}c.claim();break;default:i!==Re()&&i.focus()}}),r.pipe(L(({mode:c})=>c==="global")).subscribe(c=>{switch(c.type){case"f":case"s":case"/":i.focus(),i.select(),c.claim();break}});let a=Dn(i,{worker$:n});return M(a,Vn(s,{worker$:n,query$:a})).pipe(qe(...ne("search-share",e).map(c=>zn(c,{query$:a})),...ne("search-suggest",e).map(c=>qn(c,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ve}}function Qn(e,{index$:t,location$:r}){return Q([t,r.pipe(V(fe()),L(o=>!!o.searchParams.get("h")))]).pipe(l(([o,n])=>Wn(o.config)(n.searchParams.get("h"))),l(o=>{var s;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let a=i.nextNode();a;a=i.nextNode())if((s=a.parentElement)!=null&&s.offsetHeight){let c=a.textContent,p=o(c);p.length>c.length&&n.set(a,p)}for(let[a,c]of n){let{childNodes:p}=O("span",null,c);a.replaceWith(...Array.from(p))}return{ref:e,nodes:n}}))}function $a(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return Q([r,t]).pipe(l(([{offset:i,height:s},{offset:{y:a}}])=>(s=s+Math.min(n,Math.max(0,a-i))-n,{height:s,locked:a>=i+n})),G((i,s)=>i.height===s.height&&i.locked===s.locked))}function Kr(e,o){var n=o,{header$:t}=n,r=Zr(n,["header$"]);let i=N(".md-sidebar__scrollwrap",e),{y:s}=Je(i);return $(()=>{let a=new x,c=a.pipe(J(),ee(!0)),p=a.pipe(Ae(0,Te));return p.pipe(oe(t)).subscribe({next([{height:m},{height:f}]){i.style.height=`${m-2*s}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),p.pipe($e()).subscribe(()=>{for(let m of z(".md-nav__link--active[href]",e)){let f=or(m);if(typeof f!="undefined"){let u=m.offsetTop-f.offsetTop,{height:d}=he(f);f.scrollTo({top:u-d/2})}}}),me(z("label[tabindex]",e)).pipe(se(m=>h(m,"click").pipe(l(()=>m),K(c)))).subscribe(m=>{let f=N(`[id="${m.htmlFor}"]`);N(`[aria-labelledby="${m.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),$a(e,r).pipe(w(m=>a.next(m)),k(()=>a.complete()),l(m=>I({ref:e},m)))})}function Yn(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return Tt(Ue(`${r}/releases/latest`).pipe(pe(()=>T),l(o=>({version:o.tag_name})),He({})),Ue(r).pipe(pe(()=>T),l(o=>({stars:o.stargazers_count,forks:o.forks_count})),He({}))).pipe(l(([o,n])=>I(I({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return Ue(r).pipe(l(o=>({repositories:o.public_repos})),He({}))}}function Bn(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return Ue(r).pipe(pe(()=>T),l(({star_count:o,forks_count:n})=>({stars:o,forks:n})),He({}))}function Gn(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return Yn(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return Bn(r,o)}return T}var Ra;function Ia(e){return Ra||(Ra=$(()=>{let t=__md_get("__source",sessionStorage);if(t)return H(t);if(ne("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return T}return Gn(e.href).pipe(w(o=>__md_set("__source",o,sessionStorage)))}).pipe(pe(()=>T),L(t=>Object.keys(t).length>0),l(t=>({facts:t})),B(1)))}function Jn(e){let t=N(":scope > :last-child",e);return $(()=>{let r=new x;return r.subscribe(({facts:o})=>{t.appendChild(ln(o)),t.classList.add("md-source__repository--active")}),Ia(e).pipe(w(o=>r.next(o)),k(()=>r.complete()),l(o=>I({ref:e},o)))})}function Pa(e,{viewport$:t,header$:r}){return xe(document.body).pipe(v(()=>sr(e,{header$:r,viewport$:t})),l(({offset:{y:o}})=>({hidden:o>=10})),X("hidden"))}function Xn(e,t){return $(()=>{let r=new x;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(te("navigation.tabs.sticky")?H({hidden:!1}):Pa(e,t)).pipe(w(o=>r.next(o)),k(()=>r.complete()),l(o=>I({ref:e},o)))})}function Fa(e,{viewport$:t,header$:r}){let o=new Map,n=z("[href^=\\#]",e);for(let a of n){let c=decodeURIComponent(a.hash.substring(1)),p=ce(`[id="${c}"]`);typeof p!="undefined"&&o.set(a,p)}let i=r.pipe(X("height"),l(({height:a})=>{let c=ye("main"),p=N(":scope > :first-child",c);return a+.8*(p.offsetTop-c.offsetTop)}),le());return xe(document.body).pipe(X("height"),v(a=>$(()=>{let c=[];return H([...o].reduce((p,[m,f])=>{for(;c.length&&o.get(c[c.length-1]).tagName>=f.tagName;)c.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let d=f.offsetParent;for(;d;d=d.offsetParent)u+=d.offsetTop;return p.set([...c=[...c,m]].reverse(),u)},new Map))}).pipe(l(c=>new Map([...c].sort(([,p],[,m])=>p-m))),Ge(i),v(([c,p])=>t.pipe(Cr(([m,f],{offset:{y:u},size:d})=>{let b=u+d.height>=Math.floor(a.height);for(;f.length;){let[,_]=f[0];if(_-p=u&&!b)f=[m.pop(),...f];else break}return[m,f]},[[],[...c]]),G((m,f)=>m[0]===f[0]&&m[1]===f[1])))))).pipe(l(([a,c])=>({prev:a.map(([p])=>p),next:c.map(([p])=>p)})),V({prev:[],next:[]}),Ce(2,1),l(([a,c])=>a.prev.length{let i=new x,s=i.pipe(J(),ee(!0));if(i.subscribe(({prev:a,next:c})=>{for(let[p]of c)p.classList.remove("md-nav__link--passed"),p.classList.remove("md-nav__link--active");for(let[p,[m]]of a.entries())m.classList.add("md-nav__link--passed"),m.classList.toggle("md-nav__link--active",p===a.length-1)}),te("toc.follow")){let a=M(t.pipe(ke(1),l(()=>{})),t.pipe(ke(250),l(()=>"smooth")));i.pipe(L(({prev:c})=>c.length>0),Ge(o.pipe(_e(ae))),oe(a)).subscribe(([[{prev:c}],p])=>{let[m]=c[c.length-1];if(m.offsetHeight){let f=or(m);if(typeof f!="undefined"){let u=m.offsetTop-f.offsetTop,{height:d}=he(f);f.scrollTo({top:u-d/2,behavior:p})}}})}return te("navigation.tracking")&&t.pipe(K(s),X("offset"),ke(250),je(1),K(n.pipe(je(1))),Ot({delay:250}),oe(i)).subscribe(([,{prev:a}])=>{let c=fe(),p=a[a.length-1];if(p&&p.length){let[m]=p,{hash:f}=new URL(m.href);c.hash!==f&&(c.hash=f,history.replaceState({},"",`${c}`))}else c.hash="",history.replaceState({},"",`${c}`)}),Fa(e,{viewport$:t,header$:r}).pipe(w(a=>i.next(a)),k(()=>i.complete()),l(a=>I({ref:e},a)))})}function ja(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(l(({offset:{y:s}})=>s),Ce(2,1),l(([s,a])=>s>a&&a>0),G()),i=r.pipe(l(({active:s})=>s));return Q([i,n]).pipe(l(([s,a])=>!(s&&a)),G(),K(o.pipe(je(1))),ee(!0),Ot({delay:250}),l(s=>({hidden:s})))}function ei(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new x,s=i.pipe(J(),ee(!0));return i.subscribe({next({hidden:a}){e.hidden=a,a?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(K(s),X("height")).subscribe(({height:a})=>{e.style.top=`${a+16}px`}),h(e,"click").subscribe(a=>{a.preventDefault(),window.scrollTo({top:0})}),ja(e,{viewport$:t,main$:o,target$:n}).pipe(w(a=>i.next(a)),k(()=>i.complete()),l(a=>I({ref:e},a)))}function ti({document$:e,tablet$:t}){e.pipe(v(()=>z(".md-toggle--indeterminate")),w(r=>{r.indeterminate=!0,r.checked=!1}),se(r=>h(r,"change").pipe($r(()=>r.classList.contains("md-toggle--indeterminate")),l(()=>r))),oe(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function Wa(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function ri({document$:e}){e.pipe(v(()=>z("[data-md-scrollfix]")),w(t=>t.removeAttribute("data-md-scrollfix")),L(Wa),se(t=>h(t,"touchstart").pipe(l(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function oi({viewport$:e,tablet$:t}){Q([We("search"),t]).pipe(l(([r,o])=>r&&!o),v(r=>H(r).pipe(ze(r?400:100))),oe(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function Ua(){return location.protocol==="file:"?ht(`${new URL("search/search_index.js",Qr.base)}`).pipe(l(()=>__index),B(1)):Ue(new URL("search/search_index.json",Qr.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var nt=Wo(),At=Qo(),gt=Bo(At),Yr=Ko(),Se=rn(),lr=Fr("(min-width: 960px)"),ii=Fr("(min-width: 1220px)"),ai=Go(),Qr=ue(),si=document.forms.namedItem("search")?Ua():Ve,Br=new x;Rn({alert$:Br});te("navigation.instant")&&In({location$:At,viewport$:Se}).subscribe(nt);var ni;((ni=Qr.version)==null?void 0:ni.provider)==="mike"&&Nn({document$:nt});M(At,gt).pipe(ze(125)).subscribe(()=>{Ke("drawer",!1),Ke("search",!1)});Yr.pipe(L(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=ce("link[rel=prev]");typeof t!="undefined"&&ot(t);break;case"n":case".":let r=ce("link[rel=next]");typeof r!="undefined"&&ot(r);break;case"Enter":let o=Re();o instanceof HTMLLabelElement&&o.click()}});ti({document$:nt,tablet$:lr});ri({document$:nt});oi({viewport$:Se,tablet$:lr});var Xe=An(ye("header"),{viewport$:Se}),_t=nt.pipe(l(()=>ye("main")),v(e=>Hn(e,{viewport$:Se,header$:Xe})),B(1)),Na=M(...ne("consent").map(e=>an(e,{target$:gt})),...ne("dialog").map(e=>_n(e,{alert$:Br})),...ne("header").map(e=>Cn(e,{viewport$:Se,header$:Xe,main$:_t})),...ne("palette").map(e=>$n(e)),...ne("search").map(e=>Kn(e,{index$:si,keyboard$:Yr})),...ne("source").map(e=>Jn(e))),Da=$(()=>M(...ne("announce").map(e=>nn(e)),...ne("content").map(e=>Ln(e,{viewport$:Se,target$:gt,print$:ai})),...ne("content").map(e=>te("search.highlight")?Qn(e,{index$:si,location$:At}):T),...ne("header-title").map(e=>kn(e,{viewport$:Se,header$:Xe})),...ne("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?jr(ii,()=>Kr(e,{viewport$:Se,header$:Xe,main$:_t})):jr(lr,()=>Kr(e,{viewport$:Se,header$:Xe,main$:_t}))),...ne("tabs").map(e=>Xn(e,{viewport$:Se,header$:Xe})),...ne("toc").map(e=>Zn(e,{viewport$:Se,header$:Xe,main$:_t,target$:gt})),...ne("top").map(e=>ei(e,{viewport$:Se,header$:Xe,main$:_t,target$:gt})))),ci=nt.pipe(v(()=>Da),qe(Na),B(1));ci.subscribe();window.document$=nt;window.location$=At;window.target$=gt;window.keyboard$=Yr;window.viewport$=Se;window.tablet$=lr;window.screen$=ii;window.print$=ai;window.alert$=Br;window.component$=ci;})(); +//# sourceMappingURL=bundle.dff1b7c8.min.js.map + diff --git a/0.19.0/assets/javascripts/bundle.dff1b7c8.min.js.map b/0.19.0/assets/javascripts/bundle.dff1b7c8.min.js.map new file mode 100644 index 0000000000..82d902384b --- /dev/null +++ b/0.19.0/assets/javascripts/bundle.dff1b7c8.min.js.map @@ -0,0 +1,8 @@ +{ + "version": 3, + "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/clipboard/dist/clipboard.js", "node_modules/escape-html/index.js", "src/assets/javascripts/bundle.ts", "node_modules/rxjs/node_modules/tslib/tslib.es6.js", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/assets/javascripts/browser/document/index.ts", "src/assets/javascripts/browser/element/_/index.ts", "src/assets/javascripts/browser/element/focus/index.ts", "src/assets/javascripts/browser/element/offset/_/index.ts", "src/assets/javascripts/browser/element/offset/content/index.ts", "src/assets/javascripts/utilities/h/index.ts", "src/assets/javascripts/utilities/round/index.ts", "src/assets/javascripts/browser/script/index.ts", "src/assets/javascripts/browser/element/size/_/index.ts", "src/assets/javascripts/browser/element/size/content/index.ts", "src/assets/javascripts/browser/element/visibility/index.ts", "src/assets/javascripts/browser/toggle/index.ts", "src/assets/javascripts/browser/keyboard/index.ts", "src/assets/javascripts/browser/location/_/index.ts", "src/assets/javascripts/browser/location/hash/index.ts", "src/assets/javascripts/browser/media/index.ts", "src/assets/javascripts/browser/request/index.ts", "src/assets/javascripts/browser/viewport/offset/index.ts", "src/assets/javascripts/browser/viewport/size/index.ts", "src/assets/javascripts/browser/viewport/_/index.ts", "src/assets/javascripts/browser/viewport/at/index.ts", "src/assets/javascripts/browser/worker/index.ts", "src/assets/javascripts/_/index.ts", "src/assets/javascripts/components/_/index.ts", "src/assets/javascripts/components/announce/index.ts", "src/assets/javascripts/components/consent/index.ts", "src/assets/javascripts/components/content/annotation/_/index.ts", "src/assets/javascripts/templates/tooltip/index.tsx", "src/assets/javascripts/templates/annotation/index.tsx", "src/assets/javascripts/templates/clipboard/index.tsx", "src/assets/javascripts/templates/search/index.tsx", "src/assets/javascripts/templates/source/index.tsx", "src/assets/javascripts/templates/tabbed/index.tsx", "src/assets/javascripts/templates/table/index.tsx", "src/assets/javascripts/templates/version/index.tsx", "src/assets/javascripts/components/content/annotation/list/index.ts", "src/assets/javascripts/components/content/annotation/block/index.ts", "src/assets/javascripts/components/content/code/_/index.ts", "src/assets/javascripts/components/content/details/index.ts", "src/assets/javascripts/components/content/mermaid/index.css", "src/assets/javascripts/components/content/mermaid/index.ts", "src/assets/javascripts/components/content/table/index.ts", "src/assets/javascripts/components/content/tabs/index.ts", "src/assets/javascripts/components/content/_/index.ts", "src/assets/javascripts/components/dialog/index.ts", "src/assets/javascripts/components/header/_/index.ts", "src/assets/javascripts/components/header/title/index.ts", "src/assets/javascripts/components/main/index.ts", "src/assets/javascripts/components/palette/index.ts", "src/assets/javascripts/integrations/clipboard/index.ts", "src/assets/javascripts/integrations/sitemap/index.ts", "src/assets/javascripts/integrations/instant/index.ts", "src/assets/javascripts/integrations/search/highlighter/index.ts", "src/assets/javascripts/integrations/search/worker/message/index.ts", "src/assets/javascripts/integrations/search/worker/_/index.ts", "src/assets/javascripts/integrations/version/index.ts", "src/assets/javascripts/components/search/query/index.ts", "src/assets/javascripts/components/search/result/index.ts", "src/assets/javascripts/components/search/share/index.ts", "src/assets/javascripts/components/search/suggest/index.ts", "src/assets/javascripts/components/search/_/index.ts", "src/assets/javascripts/components/search/highlight/index.ts", "src/assets/javascripts/components/sidebar/index.ts", "src/assets/javascripts/components/source/facts/github/index.ts", "src/assets/javascripts/components/source/facts/gitlab/index.ts", "src/assets/javascripts/components/source/facts/_/index.ts", "src/assets/javascripts/components/source/_/index.ts", "src/assets/javascripts/components/tabs/index.ts", "src/assets/javascripts/components/toc/index.ts", "src/assets/javascripts/components/top/index.ts", "src/assets/javascripts/patches/indeterminate/index.ts", "src/assets/javascripts/patches/scrollfix/index.ts", "src/assets/javascripts/patches/scrolllock/index.ts", "src/assets/javascripts/polyfills/index.ts"], + "sourceRoot": "../../..", + "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*\n * Copyright (c) 2016-2023 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantLoading,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up instant loading, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantLoading({ location$, viewport$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.component$ = component$ /* Component observable */\n", "/*! *****************************************************************************\r\nCopyright (c) Microsoft Corporation.\r\n\r\nPermission to use, copy, modify, and/or distribute this software for any\r\npurpose with or without fee is hereby granted.\r\n\r\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\r\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\r\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\r\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\r\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\r\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\r\nPERFORMANCE OF THIS SOFTWARE.\r\n***************************************************************************** */\r\n/* global Reflect, Promise */\r\n\r\nvar extendStatics = function(d, b) {\r\n extendStatics = Object.setPrototypeOf ||\r\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\r\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\r\n return extendStatics(d, b);\r\n};\r\n\r\nexport function __extends(d, b) {\r\n if (typeof b !== \"function\" && b !== null)\r\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\r\n extendStatics(d, b);\r\n function __() { this.constructor = d; }\r\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\r\n}\r\n\r\nexport var __assign = function() {\r\n __assign = Object.assign || function __assign(t) {\r\n for (var s, i = 1, n = arguments.length; i < n; i++) {\r\n s = arguments[i];\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\r\n }\r\n return t;\r\n }\r\n return __assign.apply(this, arguments);\r\n}\r\n\r\nexport function __rest(s, e) {\r\n var t = {};\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\r\n t[p] = s[p];\r\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\r\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\r\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\r\n t[p[i]] = s[p[i]];\r\n }\r\n return t;\r\n}\r\n\r\nexport function __decorate(decorators, target, key, desc) {\r\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\r\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\r\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\r\n return c > 3 && r && Object.defineProperty(target, key, r), r;\r\n}\r\n\r\nexport function __param(paramIndex, decorator) {\r\n return function (target, key) { decorator(target, key, paramIndex); }\r\n}\r\n\r\nexport function __metadata(metadataKey, metadataValue) {\r\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\r\n}\r\n\r\nexport function __awaiter(thisArg, _arguments, P, generator) {\r\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\r\n return new (P || (P = Promise))(function (resolve, reject) {\r\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\r\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\r\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\r\n step((generator = generator.apply(thisArg, _arguments || [])).next());\r\n });\r\n}\r\n\r\nexport function __generator(thisArg, body) {\r\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\r\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\r\n function verb(n) { return function (v) { return step([n, v]); }; }\r\n function step(op) {\r\n if (f) throw new TypeError(\"Generator is already executing.\");\r\n while (_) try {\r\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\r\n if (y = 0, t) op = [op[0] & 2, t.value];\r\n switch (op[0]) {\r\n case 0: case 1: t = op; break;\r\n case 4: _.label++; return { value: op[1], done: false };\r\n case 5: _.label++; y = op[1]; op = [0]; continue;\r\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\r\n default:\r\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\r\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\r\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\r\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\r\n if (t[2]) _.ops.pop();\r\n _.trys.pop(); continue;\r\n }\r\n op = body.call(thisArg, _);\r\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\r\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\r\n }\r\n}\r\n\r\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });\r\n}) : (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n o[k2] = m[k];\r\n});\r\n\r\nexport function __exportStar(m, o) {\r\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\r\n}\r\n\r\nexport function __values(o) {\r\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\r\n if (m) return m.call(o);\r\n if (o && typeof o.length === \"number\") return {\r\n next: function () {\r\n if (o && i >= o.length) o = void 0;\r\n return { value: o && o[i++], done: !o };\r\n }\r\n };\r\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\r\n}\r\n\r\nexport function __read(o, n) {\r\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\r\n if (!m) return o;\r\n var i = m.call(o), r, ar = [], e;\r\n try {\r\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\r\n }\r\n catch (error) { e = { error: error }; }\r\n finally {\r\n try {\r\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\r\n }\r\n finally { if (e) throw e.error; }\r\n }\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spread() {\r\n for (var ar = [], i = 0; i < arguments.length; i++)\r\n ar = ar.concat(__read(arguments[i]));\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spreadArrays() {\r\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\r\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\r\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\r\n r[k] = a[j];\r\n return r;\r\n}\r\n\r\nexport function __spreadArray(to, from, pack) {\r\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\r\n if (ar || !(i in from)) {\r\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\r\n ar[i] = from[i];\r\n }\r\n }\r\n return to.concat(ar || Array.prototype.slice.call(from));\r\n}\r\n\r\nexport function __await(v) {\r\n return this instanceof __await ? (this.v = v, this) : new __await(v);\r\n}\r\n\r\nexport function __asyncGenerator(thisArg, _arguments, generator) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\r\n return i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i;\r\n function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; }\r\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\r\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\r\n function fulfill(value) { resume(\"next\", value); }\r\n function reject(value) { resume(\"throw\", value); }\r\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\r\n}\r\n\r\nexport function __asyncDelegator(o) {\r\n var i, p;\r\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\r\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: n === \"return\" } : f ? f(v) : v; } : f; }\r\n}\r\n\r\nexport function __asyncValues(o) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var m = o[Symbol.asyncIterator], i;\r\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\r\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\r\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\r\n}\r\n\r\nexport function __makeTemplateObject(cooked, raw) {\r\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\r\n return cooked;\r\n};\r\n\r\nvar __setModuleDefault = Object.create ? (function(o, v) {\r\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\r\n}) : function(o, v) {\r\n o[\"default\"] = v;\r\n};\r\n\r\nexport function __importStar(mod) {\r\n if (mod && mod.__esModule) return mod;\r\n var result = {};\r\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\r\n __setModuleDefault(result, mod);\r\n return result;\r\n}\r\n\r\nexport function __importDefault(mod) {\r\n return (mod && mod.__esModule) ? mod : { default: mod };\r\n}\r\n\r\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\r\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\r\n}\r\n\r\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\r\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\r\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\r\n}\r\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n *\n * @class Subscription\n */\nexport class Subscription implements SubscriptionLike {\n /** @nocollapse */\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n * @return {void}\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n *\n * @class Subscriber\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @nocollapse\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param {T} [value] The `next` value.\n * @return {void}\n */\n next(value?: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param {any} [err] The `error` exception.\n * @return {void}\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n * @return {void}\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as (((value: T) => void) | undefined),\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent\n * @param subscriber The stopped subscriber\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n *\n * @class Observable\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @constructor\n * @param {Function} subscribe the function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @owner Observable\n * @method create\n * @param {Function} subscribe? the subscriber function to be passed to the Observable constructor\n * @return {Observable} a new observable\n * @nocollapse\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @method lift\n * @param operator the operator defining the operation to take on the observable\n * @return a new observable with the Operator applied\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param {Observer|Function} observerOrNext (optional) Either an observer with methods to be called,\n * or the first of three possible handlers, which is the handler for each value emitted from the subscribed\n * Observable.\n * @param {Function} error (optional) A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param {Function} complete (optional) A handler for a terminal event resulting from successful completion.\n * @return {Subscription} a subscription reference to the registered handlers\n * @method subscribe\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next a handler for each value emitted by the observable\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @method Symbol.observable\n * @return {Observable} this instance of the observable\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n * @method pipe\n * @return {Observable} the Observable result of all of the operators having\n * been called in the order they were passed in.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @method toPromise\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @nocollapse\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return {Observable} Observable that the Subject casts to\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\n/**\n * @class AnonymousSubject\n */\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param bufferSize The size of the buffer to replay on subscription\n * @param windowTime The amount of time the buffered items will stay buffered\n * @param timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n *\n * @class Action\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler.\n * @return {void}\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n * @return {any}\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @class Scheduler\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return {number} A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param {function(state: ?T): ?Subscription} work A function representing a\n * task, or some unit of work to be executed by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler itself.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @return {Subscription} A subscription in order to be able to unsubscribe\n * the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @type {boolean}\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @type {any}\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n const flushId = this._scheduled;\n this._scheduled = undefined;\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an