diff --git a/README.md b/README.md
index 36c4644e..40eac2b8 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,10 @@
 Python wrapper for MOA to allow efficient use of existing algorithms with a more modern API
 
 > [!IMPORTANT]
-> * **[How to Install CapyMOA](docs/installation.md)**
-> * **[How to Contribute Tests](docs/testing.md)**
-> * **[How to Contribute Documentation](docs/README.md)**
+> * **[How to install CapyMOA](docs/installation.md)**
+> * **[How to add documentation](docs/contributing/docs.md)**
+> * **[How to add tests](docs/contributing/tests.md)**
+> * **[How to add new algorithms or methods](docs/contributing/learners.md)**
 
 
 # Functionality
diff --git a/docs/api/classifiers.rst b/docs/api/classifiers.rst
index d58707eb..cefeff58 100644
--- a/docs/api/classifiers.rst
+++ b/docs/api/classifiers.rst
@@ -1,9 +1,8 @@
 Classifiers
 ===========
-Classifiers implement the :class:`capymoa.learner.learners.Classifier` interface.
+Classifiers implement the :class:`capymoa.base.Classifier` interface.
 
-.. automodule:: capymoa.learner.classifier
+.. automodule:: capymoa.classifier
     :members:
     :undoc-members:
     :show-inheritance:
-    :inherited-members:
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
index b8efe4ec..6e213949 100644
--- a/docs/api/datasets.rst
+++ b/docs/api/datasets.rst
@@ -8,3 +8,9 @@ and used being downloaded the first time you use them.
     :undoc-members:
     :show-inheritance:
     :inherited-members:
+
+.. automodule:: capymoa.datasets.downloader
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :inherited-members:
\ No newline at end of file
diff --git a/docs/api/api.rst b/docs/api/index.rst
similarity index 74%
rename from docs/api/api.rst
rename to docs/api/index.rst
index a025fb2f..818a4962 100644
--- a/docs/api/api.rst
+++ b/docs/api/index.rst
@@ -16,12 +16,18 @@ with the :ref:`tutorials<tutorials>`.
     datasets
     instance
 
+..  toctree::
+    :maxdepth: 1
+    :caption: Interfaces
+
+    learner
+    moa_learner
+
 ..  toctree::
     :maxdepth: 1
     :caption: Learners
 
     regressor
-    learners
     ssl
     classifiers
 
@@ -30,3 +36,10 @@ with the :ref:`tutorials<tutorials>`.
     :caption: Evaluation
     
     evaluation
+
+
+..  toctree::
+    :maxdepth: 1
+    :caption: Other
+    
+    splitcriteria
diff --git a/docs/api/instance.rst b/docs/api/instance.rst
index 7e93f96f..26a113f6 100644
--- a/docs/api/instance.rst
+++ b/docs/api/instance.rst
@@ -2,7 +2,7 @@ Instance
 ========
 Instances are the basic unit of data in CapyMOA.
 
-..  automodule:: capymoa.stream.instance
+..  automodule:: capymoa.instance
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/api/learners.rst b/docs/api/learner.rst
similarity index 64%
rename from docs/api/learners.rst
rename to docs/api/learner.rst
index 643dd941..6472ad92 100644
--- a/docs/api/learners.rst
+++ b/docs/api/learner.rst
@@ -3,17 +3,17 @@ Learners
 CapyMOA defines different interfaces for learners performing different machine
 learning tasks.
 
-..  autoclass:: capymoa.learner.learners.Classifier
+..  autoclass:: capymoa.base.Classifier
     :members:
     :undoc-members:
     :inherited-members:
 
-..  autoclass:: capymoa.learner.learners.Regressor
+..  autoclass:: capymoa.base.Regressor
     :members:
     :undoc-members:
     :inherited-members:
 
-.. autoclass:: capymoa.learner.learners.ClassifierSSL
+.. autoclass:: capymoa.base.ClassifierSSL
     :members:
     :undoc-members:
     :inherited-members:
\ No newline at end of file
diff --git a/docs/api/moa_learner.rst b/docs/api/moa_learner.rst
new file mode 100644
index 00000000..e10e9965
--- /dev/null
+++ b/docs/api/moa_learner.rst
@@ -0,0 +1,14 @@
+MOA Learners
+============
+Interfaces for objects that wrap MOA functionality.
+
+..  autoclass:: capymoa.base.MOAClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+..  autoclass:: capymoa.base.MOARegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/docs/api/regressor.rst b/docs/api/regressor.rst
index 79a64286..93c01821 100644
--- a/docs/api/regressor.rst
+++ b/docs/api/regressor.rst
@@ -1,10 +1,9 @@
 Regressors
 ==========
-Regressors implement the :class:`capymoa.learner.learners.Regressor` interface.
+Regressors implement the :class:`capymoa.base.Regressor` interface.
 
-.. automodule:: capymoa.learner.regressor
+.. automodule:: capymoa.regressor
     :members:
     :undoc-members:
     :show-inheritance:
-    :inherited-members:
 
diff --git a/docs/api/splitcriteria.rst b/docs/api/splitcriteria.rst
new file mode 100644
index 00000000..9416fd37
--- /dev/null
+++ b/docs/api/splitcriteria.rst
@@ -0,0 +1,10 @@
+Split Criterions
+================
+Decision trees are built by splitting the data into groups based on a split
+criterion. The split criterion is a function that measures the quality of a
+split.
+
+..  automodule:: capymoa.splitcriteria
+    :members:
+    :undoc-members:
+    :inherited-members:
\ No newline at end of file
diff --git a/docs/api/ssl.rst b/docs/api/ssl.rst
index 752d76f6..2719af93 100644
--- a/docs/api/ssl.rst
+++ b/docs/api/ssl.rst
@@ -1,8 +1,8 @@
-Semi-Supervised Classifiers
-===========================
-Semi-Supervised classifiers implement the :class:`capymoa.learner.learners.ClassifierSSL` interface.
+Semi-Supervised Learners (SSL)
+==============================
+Semi-Supervised classifiers implement the :class:`capymoa.base.ClassifierSSL` interface.
 
-.. automodule:: capymoa.learner.ssl.classifier
+.. automodule:: capymoa.ssl.classifier
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/conf.py b/docs/conf.py
index 30d2a854..51933aa2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -25,6 +25,18 @@
     "myst_parser",
 ]
 
+nitpick_ignore_regex = [
+    ('py:class', r'sklearn\..*'),
+    ('py:class', r'numpy\..*'),
+    ('py:class', r'pathlib\..*'),
+    ('py:class', r'abc\..*'),
+    ('py:class', r'moa\..*'),
+    ('py:class', r'com\..*'),
+    ('py:class', r'java\..*'),
+    ('py:class', r'org\..*'),
+    ('py:class', r'torch\..*'),
+
+]
 bibtex_bibfiles = ['references.bib']
 
 autoclass_content = 'class'
@@ -45,11 +57,13 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = "sphinx_book_theme"
+html_theme = "pydata_sphinx_theme"
 html_static_path = ['_static']
 
 # Setup symbolic links for notebooks
 
+python_maximum_signature_line_length = 88
+
 notebooks = Path("../notebooks")
 notebook_doc_source = Path("notebooks")
 if not notebook_doc_source.exists():
diff --git a/docs/README.md b/docs/contributing/docs.md
similarity index 100%
rename from docs/README.md
rename to docs/contributing/docs.md
diff --git a/docs/contributing/index.rst b/docs/contributing/index.rst
new file mode 100644
index 00000000..8b552ca7
--- /dev/null
+++ b/docs/contributing/index.rst
@@ -0,0 +1,10 @@
+Contributing
+============
+This part of the documentation is for developers and contributors.
+
+.. toctree::
+   :maxdepth: 2
+
+   learners
+   tests
+   docs
diff --git a/docs/contributing/learners.md b/docs/contributing/learners.md
new file mode 100644
index 00000000..29cc5628
--- /dev/null
+++ b/docs/contributing/learners.md
@@ -0,0 +1,76 @@
+# Adding Learners
+This document describes adding a new classifier, regressor, or 
+another learner to CapyMOA. Before doing this, you should have read the
+[installation guide](../installation.md) to set up your development environment.
+
+## Where does my new learner go?
+You should add your new learner to the appropriate directory:
+- Classifiers go in `src/capymoa/classifier`.
+- Regressors go in `src/capymoa/regressor`.
+- Semi-supervised classifiers go in `src/capymoa/ssl/classifier`.
+
+Each standalone learner should be in its own file, prefixed with `_` to indicate that they are not meant to be imported directly. Instead, they are imported by an `__init__.py` file. The `__init__.py` file is a special file that tells Python to treat the directory as a package.
+
+For example, to add a new classifier class called `MyNewLearner`, you should implement it in `src/capymoa/classifier/_my_new_learner.py` and add it to the `src/capymoa/classifier/__init__.py` file. The `__init__.py` will look like this:
+```python
+from ._my_new_learner import MyNewLearner
+...
+__all__ = [
+    'MyNewLearner',
+    ...
+]
+```
+
+The prefix and init files allow users to import all classifiers, regressors, 
+or semi-supervised from one package while splitting the code into multiple files. You can, for example, import your new learner with the following:
+```python
+from capymoa.classifier import MyNewLearner
+```
+
+## What does a learner implement?
+<!-- TODO: Link to capymoa documentation -->
+A learner should implement the appropriate interface:
+* `capymoa.base.Classifier` for classifiers.
+* `capymoa.base.Regressor` for regressors.
+* `capymoa.base.ClassifierSSL` for semi-supervised classifiers.
+
+If your method is a wrapper around a MOA learner, you should use the appropriate
+base class:
+* `capymoa.base.MOAClassifier` for classifiers.
+* `capymoa.base.MOARegressor` for regressors.
+
+## How do I test my new learner?
+You should add a test to ensure your learner achieves and continues to achieves
+the expected performance in future versions. CapyMOA provides parametrized
+tests for classifiers, regressors, and semi-supervised classifiers. You should
+not need to write any new test code. Instead, you should add your test's
+parameters to the appropriate test file:
+- `tests/test_classifiers.py` for classifiers.
+- `tests/test_regressors.py` for regressors.
+- `tests/test_ssl_classifiers.py` for semi-supervised classifiers.
+
+To run your tests, use the following command:
+```bash
+python -m pytest -k MyNewLearner
+```
+The `-k MyNewLearner` flag tells PyTest to run tests containing `MyNewLearner` in the test ID.
+
+* If you want to add documented exemplar usage of your learner, you can add doctests.
+See the [testing guide](tests.md) for more information.
+
+* If you need custom test code for your learner, you can add a new test file in
+`tests`.
+
+## How do I document my new learner?
+You should add a docstring to your learner that describes the learner and its
+parameters. The docstring should be in the Sphinx format. Check the 
+[documenation guide](docs.md) for more information and an example.
+
+## How to debug failed GitHub Actions?
+Before submitting your pull request, you may wish to run all tests to
+ensure your changes will succeed in GitHub Actions. You can run all tests with:
+```bash
+invoke test
+```
+If you run into issues with GitHub actions failing to build documentation. Follow
+the instructions in the [documentation guide](docs.md) to build the documentation locally. The documentation build settings are intentionally strict to ensure the documentation builds correctly.
diff --git a/docs/testing.md b/docs/contributing/tests.md
similarity index 95%
rename from docs/testing.md
rename to docs/contributing/tests.md
index add6b1fd..1eacf348 100644
--- a/docs/testing.md
+++ b/docs/contributing/tests.md
@@ -1,6 +1,6 @@
 # Adding Tests
 Ensure you have installed the development dependencies by following the instructions
-in the [installation guide](installation.md). To run all tests, use the following command:
+in the [installation guide](../installation.md). To run all tests, use the following command:
 ```bash
 invoke test
 ```
diff --git a/docs/index.rst b/docs/index.rst
index a1b6f0a5..2a40b3f1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -33,7 +33,7 @@ and modules.
 .. toctree::
    :maxdepth: 2
 
-   api/api
+   api/index
 
 Contributing
 ------------
@@ -41,10 +41,8 @@ This part of the documentation is for developers and contributors.
 
 .. toctree::
    :maxdepth: 2
-   :caption: Contributing
 
-   testing
-   README
+   contributing/index
 
 Indices and tables
 ==================
diff --git a/invoke.yml b/invoke.yml
index 7421975d..913af84c 100644
--- a/invoke.yml
+++ b/invoke.yml
@@ -5,8 +5,14 @@ moa_url: "https://homepages.ecs.vuw.ac.nz/~antonlee/capymoa/versions/240412_moa.
 
 # What notebooks to skip when running them as tests.
 test_skip_notebooks:
-  - notebooks/04_drift_streams.ipynb
-  - notebooks/02_learners_api_examples.ipynb
-  - notebooks/Basic_Classification_Examples.ipynb
   - notebooks/00_getting_started.ipynb
+  - notebooks/01_evaluation_and_data_reading.ipynb
+  - notebooks/02_learners_api_examples.ipynb
   - notebooks/03_using_sklearn_pytorch.ipynb
+  - notebooks/04_drift_streams.ipynb
+  - notebooks/Basic_Classification_Examples.ipynb
+  - notebooks/Creating_new_classifier.ipynb
+  - notebooks/Data_Reading.ipynb
+  - notebooks/Preprocessing.ipynb
+  - notebooks/SSL_example.ipynb
+
diff --git a/pyproject.toml b/pyproject.toml
index 805811b0..30f2ba08 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,14 +33,15 @@ dev=[
     "jupyter",
     "nbmake",
     "pytest-xdist",
-    "invoke"
+    "invoke",
+    "wget"
 ]
 
 doc=[
     # Documentation generator
     "sphinx",
     # Theme for the documentation
-    "sphinx-book-theme",
+    "pydata-sphinx-theme",
     # Allows to include Jupyter notebooks in the documentation
     "sphinx-autobuild",
     # Allows to include Jupyter notebooks in the documentation
diff --git a/src/capymoa/__init__.py b/src/capymoa/__init__.py
index 4f3a3a77..757193c7 100644
--- a/src/capymoa/__init__.py
+++ b/src/capymoa/__init__.py
@@ -1,6 +1,4 @@
 from .prepare_jpype import _start_jpype
 
-
+# It is important that this is called before importing any other module
 _start_jpype()
-"""Whenever capymoa is imported, start jpype.
-"""
diff --git a/src/capymoa/_utils.py b/src/capymoa/_utils.py
index 0a5f9609..e663904b 100644
--- a/src/capymoa/_utils.py
+++ b/src/capymoa/_utils.py
@@ -37,7 +37,7 @@ def _get_moa_creation_CLI(moa_object):
 
     >>> from moa.streams import ConceptDriftStream
     ...
-    >>> stream = ConceptDriftStream() 
+    >>> stream = ConceptDriftStream()
     >>> _get_moa_creation_CLI(stream)
     'streams.ConceptDriftStream'
     """
diff --git a/src/capymoa/learner/learners.py b/src/capymoa/base.py
similarity index 97%
rename from src/capymoa/learner/learners.py
rename to src/capymoa/base.py
index b368e6db..1fa86019 100644
--- a/src/capymoa/learner/learners.py
+++ b/src/capymoa/base.py
@@ -5,9 +5,8 @@
 from moa.classifiers import Classifier as MOA_Classifier_Interface
 from moa.core import Utils
 
-from capymoa.stream.instance import (Instance, LabeledInstance,
-                                     RegressionInstance)
-from capymoa.stream.stream import Schema
+from capymoa.instance import Instance, LabeledInstance, RegressionInstance
+from capymoa.stream._stream import Schema
 from capymoa.type_alias import LabelIndex, LabelProbabilities, TargetValue
 
 ##############################################################
@@ -153,7 +152,9 @@ def train(self, instance):
         self.moa_learner.trainOnInstance(instance.java_instance)
 
     def predict(self, instance):
-        return Utils.maxIndex(self.moa_learner.getVotesForInstance(instance.java_instance))
+        return Utils.maxIndex(
+            self.moa_learner.getVotesForInstance(instance.java_instance)
+        )
 
     def predict_proba(self, instance):
         return self.moa_learner.getVotesForInstance(instance.java_instance)
diff --git a/src/capymoa/classifier/__init__.py b/src/capymoa/classifier/__init__.py
new file mode 100644
index 00000000..3fd5388d
--- /dev/null
+++ b/src/capymoa/classifier/__init__.py
@@ -0,0 +1,16 @@
+from ._adaptive_random_forest import AdaptiveRandomForest
+from ._efdt import EFDT
+from ._hoeffding_tree import HoeffdingTree
+from ._naive_bayes import NaiveBayes
+from ._online_bagging import OnlineBagging
+from ._passive_aggressive_classifier import PassiveAggressiveClassifier
+
+__all__ = [
+    "AdaptiveRandomForest",
+    "AdaptiveRandomForest",
+    "EFDT",
+    "HoeffdingTree",
+    "NaiveBayes",
+    "OnlineBagging",
+    "PassiveAggressiveClassifier",
+]
diff --git a/src/capymoa/learner/classifier/classifiers.py b/src/capymoa/classifier/_adaptive_random_forest.py
similarity index 71%
rename from src/capymoa/learner/classifier/classifiers.py
rename to src/capymoa/classifier/_adaptive_random_forest.py
index 8fe9cf95..50ed0549 100644
--- a/src/capymoa/learner/classifier/classifiers.py
+++ b/src/capymoa/classifier/_adaptive_random_forest.py
@@ -1,18 +1,9 @@
-# Library imports
-from capymoa.learner.learners import (
+from capymoa.base import (
     MOAClassifier,
-    MOARegressor,
-    _get_moa_creation_CLI,
     _extract_moa_learner_CLI,
 )
 
-# MOA/Java imports
-from moa.classifiers import Classifier
-from moa.classifiers.meta import AdaptiveRandomForest as MOA_AdaptiveRandomForest
-from moa.classifiers.meta import OzaBag as MOA_OzaBag
-from moa.classifiers.meta import (
-    AdaptiveRandomForestRegressor as MOA_AdaptiveRandomForestRegressor,
-)
+from moa.classifiers.meta import AdaptiveRandomForest as _MOA_AdaptiveRandomForest
 
 
 # TODO: replace the m_features_mode logic such that we can infer from m_features_per_tree_size, e.g. if value is double between 0.0 and 1.0 = percentage
@@ -85,29 +76,5 @@ def __init__(
             schema=schema,
             CLI=CLI,
             random_seed=random_seed,
-            moa_learner=MOA_AdaptiveRandomForest(),
-        )
-
-
-class OnlineBagging(MOAClassifier):
-    def __init__(
-        self, schema=None, CLI=None, random_seed=1, base_learner=None, ensemble_size=100
-    ):
-        # This method basically configures the CLI, object creation is delegated to MOAClassifier (the super class, through super().__init___()))
-        # Initialize instance attributes with default values, if the CLI was not set.
-        if CLI is None:
-            self.base_learner = (
-                "trees.HoeffdingTree"
-                if base_learner is None
-                else _extract_moa_learner_CLI(base_learner)
-            )
-            self.ensemble_size = ensemble_size
-            CLI = f"-l {self.base_learner} -s {self.ensemble_size}"
-
-        super().__init__(
-            schema=schema, CLI=CLI, random_seed=random_seed, moa_learner=MOA_OzaBag()
+            moa_learner=_MOA_AdaptiveRandomForest(),
         )
-
-    def __str__(self):
-        # Overrides the default class name from MOA (OzaBag)
-        return "OnlineBagging"
diff --git a/src/capymoa/learner/classifier/efdt.py b/src/capymoa/classifier/_efdt.py
similarity index 97%
rename from src/capymoa/learner/classifier/efdt.py
rename to src/capymoa/classifier/_efdt.py
index 28eb13b4..ba399790 100644
--- a/src/capymoa/learner/classifier/efdt.py
+++ b/src/capymoa/classifier/_efdt.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 from typing import Union
 
-from capymoa.learner import MOAClassifier
-from capymoa.learner.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
+from capymoa.base import MOAClassifier
+from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
 from capymoa.stream import Schema
 from capymoa._utils import build_cli_str_from_mapping_and_locals
 
diff --git a/src/capymoa/learner/classifier/hoeffding_tree.py b/src/capymoa/classifier/_hoeffding_tree.py
similarity index 96%
rename from src/capymoa/learner/classifier/hoeffding_tree.py
rename to src/capymoa/classifier/_hoeffding_tree.py
index 3e7656f8..0fc0fe5b 100644
--- a/src/capymoa/learner/classifier/hoeffding_tree.py
+++ b/src/capymoa/classifier/_hoeffding_tree.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 from typing import Union
 
-from capymoa.learner import MOAClassifier
-from capymoa.learner.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
+from capymoa.base import MOAClassifier
+from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
 from capymoa.stream import Schema
 from capymoa._utils import build_cli_str_from_mapping_and_locals
 
diff --git a/src/capymoa/learner/classifier/naive_bayes.py b/src/capymoa/classifier/_naive_bayes.py
similarity index 74%
rename from src/capymoa/learner/classifier/naive_bayes.py
rename to src/capymoa/classifier/_naive_bayes.py
index 4320a4a0..f18bf68f 100644
--- a/src/capymoa/learner/classifier/naive_bayes.py
+++ b/src/capymoa/classifier/_naive_bayes.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 import typing
 
-from capymoa.learner import MOAClassifier
+from capymoa.base import MOAClassifier
 from capymoa.stream import Schema
 
 import moa.classifiers.bayes as moa_bayes
@@ -12,18 +12,14 @@ class NaiveBayes(MOAClassifier):
     Performs classic Bayesian prediction while making the naive assumption that all inputs are independent. Naive Bayes is a classifier algorithm known for its simplicity and low computational cost. Given n different classes, the trained Naive Bayes classifier predicts, for every unlabeled instance I, the class C to which it belongs with high accuracy.
 
     :param schema: The schema of the stream, defaults to None.
-    :type schema: object, optional
     :param random_seed: The random seed passed to the MOA learner, defaults to 0.
-    :type random_seed: int, optional
     """
 
-
     def __init__(self, schema: typing.Union[Schema, None] = None, random_seed: int = 0):
-        super(NaiveBayes, self).__init__(moa_learner=moa_bayes.NaiveBayes(), 
-                                        schema=schema,
-                                        random_seed=random_seed)
+        super(NaiveBayes, self).__init__(
+            moa_learner=moa_bayes.NaiveBayes(), schema=schema, random_seed=random_seed
+        )
 
     def __str__(self):
         # Overrides the default class name from MOA (OzaBag)
         return "Naive Bayes CapyMOA Classifier"
-
diff --git a/src/capymoa/classifier/_online_bagging.py b/src/capymoa/classifier/_online_bagging.py
new file mode 100644
index 00000000..f27c1b34
--- /dev/null
+++ b/src/capymoa/classifier/_online_bagging.py
@@ -0,0 +1,29 @@
+from capymoa.base import (
+    MOAClassifier,
+    _extract_moa_learner_CLI,
+)
+
+from moa.classifiers.meta import OzaBag as _MOA_OzaBag
+
+class OnlineBagging(MOAClassifier):
+    def __init__(
+        self, schema=None, CLI=None, random_seed=1, base_learner=None, ensemble_size=100
+    ):
+        # This method basically configures the CLI, object creation is delegated to MOAClassifier (the super class, through super().__init___()))
+        # Initialize instance attributes with default values, if the CLI was not set.
+        if CLI is None:
+            self.base_learner = (
+                "trees.HoeffdingTree"
+                if base_learner is None
+                else _extract_moa_learner_CLI(base_learner)
+            )
+            self.ensemble_size = ensemble_size
+            CLI = f"-l {self.base_learner} -s {self.ensemble_size}"
+
+        super().__init__(
+            schema=schema, CLI=CLI, random_seed=random_seed, moa_learner=_MOA_OzaBag()
+        )
+
+    def __str__(self):
+        # Overrides the default class name from MOA (OzaBag)
+        return "OnlineBagging"
diff --git a/src/capymoa/learner/classifier/sklearn.py b/src/capymoa/classifier/_passive_aggressive_classifier.py
similarity index 95%
rename from src/capymoa/learner/classifier/sklearn.py
rename to src/capymoa/classifier/_passive_aggressive_classifier.py
index 07acb964..30826e63 100644
--- a/src/capymoa/learner/classifier/sklearn.py
+++ b/src/capymoa/classifier/_passive_aggressive_classifier.py
@@ -1,10 +1,10 @@
 from typing import Optional, Dict, Union, Literal
-from capymoa.learner.learners import Classifier
+from capymoa.base import Classifier
 from sklearn.linear_model import (
     PassiveAggressiveClassifier as skPassiveAggressiveClassifier,
 )
-from capymoa.stream.instance import Instance, LabeledInstance
-from capymoa.stream.stream import Schema
+from capymoa.instance import Instance, LabeledInstance
+from capymoa.stream._stream import Schema
 from capymoa.type_alias import LabelIndex, LabelProbabilities
 import numpy as np
 
@@ -21,7 +21,7 @@ class PassiveAggressiveClassifier(Classifier):
     <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>`_
 
     >>> from capymoa.datasets import ElectricityTiny
-    >>> from capymoa.learner.classifier import PassiveAggressiveClassifier
+    >>> from capymoa.classifier import PassiveAggressiveClassifier
     >>> from capymoa.evaluation import prequential_evaluation
     >>> stream = ElectricityTiny()
     >>> schema = stream.get_schema()
diff --git a/src/capymoa/datasets/__init__.py b/src/capymoa/datasets/__init__.py
index c188cb33..cc1ee4cc 100644
--- a/src/capymoa/datasets/__init__.py
+++ b/src/capymoa/datasets/__init__.py
@@ -1,4 +1,4 @@
-from .datasets import (
+from ._datasets import (
     CovtFD,
     Covtype,
     RBFm_100k,
@@ -6,11 +6,9 @@
     Hyper100k,
     Sensor,
     ElectricityTiny,
-    Fried
-)
-from .downloader import (
-    get_download_dir
+    Fried,
 )
+from .downloader import get_download_dir
 
 __all__ = [
     "Hyper100k",
@@ -21,5 +19,5 @@
     "Sensor",
     "ElectricityTiny",
     "Fried",
-    "get_download_dir"
+    "get_download_dir",
 ]
diff --git a/src/capymoa/datasets/datasets.py b/src/capymoa/datasets/_datasets.py
similarity index 99%
rename from src/capymoa/datasets/datasets.py
rename to src/capymoa/datasets/_datasets.py
index 1d7d90e8..e006ca4e 100644
--- a/src/capymoa/datasets/datasets.py
+++ b/src/capymoa/datasets/_datasets.py
@@ -51,6 +51,7 @@ class ElectricityTiny(DownloadARFFGzip):
     filename = "electricity_tiny.arff"
     remote_url = ROOT_URL
 
+
 class CovtypeTiny(DownloadARFFGzip):
     """A truncated version of the Covtype dataset with 1000 instances."""
 
diff --git a/src/capymoa/datasets/downloader.py b/src/capymoa/datasets/downloader.py
index 83e5d94b..07c2d8e9 100644
--- a/src/capymoa/datasets/downloader.py
+++ b/src/capymoa/datasets/downloader.py
@@ -5,12 +5,12 @@
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Any, Optional
-import shutil
 
 import wget
 from moa.streams import ArffFileStream
 
-from capymoa.stream.stream import Stream
+from capymoa.stream._stream import Stream
+
 
 def get_download_dir():
     """A default directory to store datasets in. Defaults to `./data` when the
@@ -18,6 +18,7 @@ def get_download_dir():
     """
     return environ.get("CAPYMOA_DATASETS_DIR", "data")
 
+
 class DownloadableDataset(ABC, Stream):
     filename: str = None
     """Name of the dataset in the capymoa dataset directory"""
@@ -54,7 +55,7 @@ def _resolve_dataset(self, auto_download: bool, directory: Path):
                 )
 
         return stream
-    
+
     def get_path(self):
         return self._path
 
diff --git a/src/capymoa/evaluation/__init__.py b/src/capymoa/evaluation/__init__.py
index 80e9dbe2..d87cba53 100644
--- a/src/capymoa/evaluation/__init__.py
+++ b/src/capymoa/evaluation/__init__.py
@@ -10,7 +10,7 @@
     RegressionEvaluator,
 )
 
-__ALL__ = [
+__all__ = [
     "prequential_evaluation",
     "prequential_SSL_evaluation",
     "test_then_train_evaluation",
diff --git a/src/capymoa/evaluation/evaluation.py b/src/capymoa/evaluation/evaluation.py
index 2732e536..a989e0ea 100644
--- a/src/capymoa/evaluation/evaluation.py
+++ b/src/capymoa/evaluation/evaluation.py
@@ -5,8 +5,8 @@
 import warnings
 import random
 
-from capymoa.stream.stream import Schema, Stream
-from capymoa.learner.learners import ClassifierSSL
+from capymoa.stream import Schema, Stream
+from capymoa.base import ClassifierSSL
 
 from com.yahoo.labs.samoa.instances import Instances, Attribute, DenseInstance
 from moa.core import InstanceExample
@@ -106,13 +106,19 @@ def update(self, y_target_index: int, y_pred_index: Optional[int]):
         :raises ValueError: If the values are not valid indexes in the schema.
         """
         if not isinstance(y_target_index, (np.integer, int)):
-            raise ValueError(f"y_target_index must be an integer, not {type(y_target_index)}")
+            raise ValueError(
+                f"y_target_index must be an integer, not {type(y_target_index)}"
+            )
         if not (y_pred_index is None or isinstance(y_pred_index, (np.integer, int))):
-            raise ValueError(f"y_pred_index must be an integer, not {type(y_pred_index)}")
+            raise ValueError(
+                f"y_pred_index must be an integer, not {type(y_pred_index)}"
+            )
 
-        # If the prediction is invalid, it could mean the classifier is abstaining from making a prediction; 
+        # If the prediction is invalid, it could mean the classifier is abstaining from making a prediction;
         # thus, it is allowed to continue (unless parameterized differently).
-        if y_pred_index is not None and not self.schema.is_y_index_in_range(y_pred_index):
+        if y_pred_index is not None and not self.schema.is_y_index_in_range(
+            y_pred_index
+        ):
             if self.allow_abstaining:
                 y_pred_index = None
             else:
@@ -131,7 +137,7 @@ def update(self, y_target_index: int, y_pred_index: Optional[int]):
         # if y_pred is None, it indicates the learner did not produce a prediction for this instance,
         # count as an error
         if y_pred_index is None:
-            # TODO: I'm not sure what the actual logic should be here, but for 
+            # TODO: I'm not sure what the actual logic should be here, but for
             # now I'm just setting the prediction to the first class since this
             # does not break the tests.
             y_pred_index = 0
@@ -153,9 +159,7 @@ def update(self, y_target_index: int, y_pred_index: Optional[int]):
 
         # If the window_size is set, then check if it should record the intermediary results.
         if self.window_size is not None and self.instances_seen % self.window_size == 0:
-            performance_values = (
-                self.metrics()
-            )
+            performance_values = self.metrics()
             self.result_windows.append(performance_values)
 
     def metrics_header(self):
@@ -172,7 +176,10 @@ def metrics(self):
         ]
 
     def metrics_dict(self):
-        return {header: value for header, value in zip(self.metrics_header(), self.metrics())}
+        return {
+            header: value
+            for header, value in zip(self.metrics_header(), self.metrics())
+        }
 
     def metrics_per_window(self):
         return pd.DataFrame(self.result_windows, columns=self.metrics_header())
@@ -435,8 +442,8 @@ def test_then_train_evaluation(
         "cumulative": evaluator,
         "wallclock": elapsed_wallclock_time,
         "cpu_time": elapsed_cpu_time,
-        "max_instances":max_instances, 
-        "stream":stream,
+        "max_instances": max_instances,
+        "stream": stream,
     }
 
     return results
@@ -713,8 +720,8 @@ def prequential_SSL_evaluation(
         "windowed": evaluator_windowed,
         "wallclock": elapsed_wallclock_time,
         "cpu_time": elapsed_cpu_time,
-        "max_instances":max_instances, 
-        "stream":stream,
+        "max_instances": max_instances,
+        "stream": stream,
         "unlabeled": unlabeled_counter,
         "unlabeled_ratio": unlabeled_counter / instancesProcessed,
     }
@@ -788,8 +795,8 @@ def _test_then_train_evaluation_fast(
         "cumulative": evaluator,
         "wallclock": elapsed_wallclock_time,
         "cpu_time": elapsed_cpu_time,
-        "max_instances":max_instances, 
-        "stream":stream,
+        "max_instances": max_instances,
+        "stream": stream,
     }
 
     return results
@@ -855,8 +862,8 @@ def _prequential_evaluation_fast(stream, learner, max_instances=None, window_siz
         "windowed": windowed_evaluator,
         "wallclock": elapsed_wallclock_time,
         "cpu_time": elapsed_cpu_time,
-        "max_instances":max_instances, 
-        "stream":stream,
+        "max_instances": max_instances,
+        "stream": stream,
     }
 
     return results
@@ -942,8 +949,8 @@ def test_then_train_SSL_evaluation_fast(
         "cumulative": evaluator,
         "wallclock": elapsed_wallclock_time,
         "cpu_time": elapsed_cpu_time,
-        "max_instances":max_instances, 
-        "stream":stream,
+        "max_instances": max_instances,
+        "stream": stream,
     }
 
     for measure in moa_results.otherMeasurements.keySet():
@@ -1019,8 +1026,8 @@ def prequential_SSL_evaluation_fast(
         "windowed": windowed_evaluator,
         "wallclock": elapsed_wallclock_time,
         "cpu_time": elapsed_cpu_time,
-        "max_instances":max_instances, 
-        "stream":stream,
+        "max_instances": max_instances,
+        "stream": stream,
         "other_measurements": dict(moa_results.otherMeasurements),
     }
 
@@ -1087,7 +1094,6 @@ def prequential_evaluation_multiple_learners(
             else:
                 y = instance.y_value
 
-
             results[learner_name]["cumulative"].update(y, prediction)
             if window_size is not None:
                 results[learner_name]["windowed"].update(y, prediction)
diff --git a/src/capymoa/evaluation/visualization.py b/src/capymoa/evaluation/visualization.py
index ce0b9966..a49b849c 100644
--- a/src/capymoa/evaluation/visualization.py
+++ b/src/capymoa/evaluation/visualization.py
@@ -4,10 +4,18 @@
 from com.yahoo.labs.samoa.instances import InstancesHeader
 
 
-def plot_windowed_results(*results, metric="classifications correct (percent)", 
-                            plot_title=None, xlabel=None, ylabel=None,
-                            figure_path="./", figure_name=None, save_only=True
-                          ):
+def plot_windowed_results(
+    *results,
+    metric="classifications correct (percent)",
+    plot_title=None,
+    xlabel=None,
+    ylabel=None,
+    figure_path="./",
+    figure_name=None,
+    save_only=True,
+    # ,
+    # drift_locations=None, gradual_drift_window_lengths=None
+):
     """
     Plot a comparison of values from multiple evaluators based on a selected column using line plots.
     It assumes the results contain windowed results ('windowed') which often originate from metrics_per_window()
@@ -17,34 +25,36 @@ def plot_windowed_results(*results, metric="classifications correct (percent)",
     """
     dfs = []
     labels = []
-    
-    num_instances = results[0].get('max_instances', None)
-    stream = results[0].get('stream', None)
+
+    num_instances = results[0].get("max_instances", None)
+    stream = results[0].get("stream", None)
 
     if num_instances is not None:
-        window_size = results[0]['windowed'].window_size
-        num_windows = results[0]['windowed'].metrics_per_window().shape[0]
+        window_size = results[0]["windowed"].window_size
+        num_windows = results[0]["windowed"].metrics_per_window().shape[0]
         x_values = []
-        for i in range(1, num_windows+1):
+        for i in range(1, num_windows + 1):
             x_values.append(i * window_size)
         # print(f'x_values: {x_values}')
 
     # Check if the given metric exists in all DataFrames
     for result in results:
-        df = result['windowed'].metrics_per_window()
+        df = result["windowed"].metrics_per_window()
         if metric not in df.columns:
-            print(f"Column '{metric}' not found in metrics DataFrame for {result['learner']}. Skipping.")
+            print(
+                f"Column '{metric}' not found in metrics DataFrame for {result['learner']}. Skipping."
+            )
         else:
             dfs.append(df)
-            if 'experiment_id' in result:
-                labels.append(result['experiment_id'])
+            if "experiment_id" in result:
+                labels.append(result["experiment_id"])
             else:
-                labels.append(result['learner'])
-    
+                labels.append(result["learner"])
+
     if not dfs:
         print("No valid DataFrames to plot.")
         return
-    
+
     # Create a figure
     plt.figure(figsize=(12, 5))
 
@@ -52,10 +62,23 @@ def plot_windowed_results(*results, metric="classifications correct (percent)",
     for i, df in enumerate(dfs):
         # print(f'df.index: {df.index}')
         if num_instances is not None:
-            plt.plot(x_values, df[metric], label=labels[i], marker='o', linestyle='-', markersize=5)
+            plt.plot(
+                x_values,
+                df[metric],
+                label=labels[i],
+                marker="o",
+                linestyle="-",
+                markersize=5,
+            )
         else:
-            plt.plot(df.index, df[metric], label=labels[i], marker='o', linestyle='-', markersize=5)
-
+            plt.plot(
+                df.index,
+                df[metric],
+                label=labels[i],
+                marker="o",
+                linestyle="-",
+                markersize=5,
+            )
 
     if stream is not None and isinstance(stream, DriftStream):
         drifts = stream.get_drifts()
@@ -66,43 +89,52 @@ def plot_windowed_results(*results, metric="classifications correct (percent)",
         # Add vertical lines at drift locations
         if drift_locations:
             for location in drift_locations:
-                plt.axvline(location, color='red', linestyle='-')
-        
+                plt.axvline(location, color="red", linestyle="-")
+
         # Add gradual drift windows as 70% transparent rectangles
         if gradual_drift_window_lengths:
             if not drift_locations:
-                print("Error: gradual_drift_window_lengths is provided, but drift_locations is not.")
+                print(
+                    "Error: gradual_drift_window_lengths is provided, but drift_locations is not."
+                )
                 return
-            
+
             if len(drift_locations) != len(gradual_drift_window_lengths):
-                print("Error: drift_locations and gradual_drift_window_lengths must have the same length.")
+                print(
+                    "Error: drift_locations and gradual_drift_window_lengths must have the same length."
+                )
                 return
-            
+
             for i in range(len(drift_locations)):
                 location = drift_locations[i]
                 window_length = gradual_drift_window_lengths[i]
-                
+
                 # Plot the 70% transparent rectangle
-                plt.axvspan(location - window_length / 2, location + window_length / 2, alpha=0.2, color='red')
-    
+                plt.axvspan(
+                    location - window_length / 2,
+                    location + window_length / 2,
+                    alpha=0.2,
+                    color="red",
+                )
+
     # Add labels and title
-    xlabel = xlabel if xlabel is not None else '# Instances'
+    xlabel = xlabel if xlabel is not None else "# Instances"
     plt.xlabel(xlabel)
     ylabel = ylabel if ylabel is not None else metric
     plt.ylabel(ylabel)
     plot_title = plot_title if plot_title is not None else metric
     plt.title(plot_title)
-    
+
     # Add legend
     plt.legend()
     plt.grid(True)
-    
+
     # Show the plot or save it to the specified path
     if save_only == False:
         plt.show()
     elif figure_path is not None:
         if figure_name is None:
-            figure_name = result['learner'] + "_" + ylabel.replace(' ', '')
+            figure_name = result["learner"] + "_" + ylabel.replace(" ", "")
         plt.savefig(figure_path + figure_name)
 
 
diff --git a/src/capymoa/stream/instance.py b/src/capymoa/instance.py
similarity index 97%
rename from src/capymoa/stream/instance.py
rename to src/capymoa/instance.py
index f33b8091..d582919d 100644
--- a/src/capymoa/stream/instance.py
+++ b/src/capymoa/instance.py
@@ -62,7 +62,7 @@ def from_array(cls, schema: "Schema", instance: FeatureVector) -> "Instance":
 
         >>> from capymoa.stream import Schema
         ...
-        >>> from capymoa.stream.instance import Instance
+        >>> from capymoa.instance import Instance
         >>> import numpy as np
         >>> schema = Schema.from_custom(
         ...     ["f1", "f2"],
@@ -146,7 +146,7 @@ class LabeledInstance(Instance):
 
     >>> from capymoa.datasets import ElectricityTiny
     ...
-    >>> from capymoa.stream.instance import LabeledInstance
+    >>> from capymoa.instance import LabeledInstance
     >>> stream = ElectricityTiny()
     >>> instance: LabeledInstance = stream.next_instance()
     >>> instance.y_label
@@ -182,7 +182,7 @@ def from_array(
 
         >>> from capymoa.stream import Schema
         ...
-        >>> from capymoa.stream.instance import LabeledInstance
+        >>> from capymoa.instance import LabeledInstance
         >>> import numpy as np
         >>> schema = Schema.from_custom(
         ...     ["f1", "f2"],
@@ -253,7 +253,7 @@ class RegressionInstance(Instance):
 
     >>> from capymoa.datasets import Fried
     ...
-    >>> from capymoa.stream.instance import RegressionInstance
+    >>> from capymoa.instance import RegressionInstance
     >>> stream = Fried()
     >>> instance: RegressionInstance = stream.next_instance()
     >>> instance.y_value
@@ -286,7 +286,7 @@ def from_array(
 
         >>> from capymoa.stream import Schema
         ...
-        >>> from capymoa.stream.instance import LabeledInstance
+        >>> from capymoa.instance import LabeledInstance
         >>> import numpy as np
         >>> schema = Schema.from_custom(
         ...     ["f1", "f2"],
diff --git a/src/capymoa/learner/__init__.py b/src/capymoa/learner/__init__.py
deleted file mode 100644
index e8d2355e..00000000
--- a/src/capymoa/learner/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from .learners import (
-    Classifier,
-    MOAClassifier,
-    ClassifierSSL,
-    MOAClassifierSSL,
-    Regressor,
-    MOARegressor,
-    SKClassifier,
-)
-
-
-__ALL__ = [
-    "Classifier",
-    "MOAClassifier",
-    "ClassifierSSL",
-    "MOAClassifierSSL",
-    "Regressor",
-    "MOARegressor",
-    "SKClassifier",
-]
diff --git a/src/capymoa/learner/classifier/__init__.py b/src/capymoa/learner/classifier/__init__.py
deleted file mode 100644
index 51c8b531..00000000
--- a/src/capymoa/learner/classifier/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from .classifiers import AdaptiveRandomForest, OnlineBagging, AdaptiveRandomForest
-from .efdt import EFDT
-from .sklearn import PassiveAggressiveClassifier
-from .hoeffding_tree import HoeffdingTree
-from .naive_bayes import NaiveBayes
-
-__all__ = [
-    "AdaptiveRandomForest",
-    "OnlineBagging",
-    "AdaptiveRandomForest",
-    "EFDT",
-    "HoeffdingTree",
-    "NaiveBayes",
-    "PassiveAggressiveClassifier",
-]
diff --git a/src/capymoa/learner/regressor/__init__.py b/src/capymoa/learner/regressor/__init__.py
deleted file mode 100644
index cead1fa7..00000000
--- a/src/capymoa/learner/regressor/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from .regressors import (
-    KNNRegressor,
-    AdaptiveRandomForestRegressor,
-    FIMTDD,
-    ARFFIMTDD,
-    ORTO,
-    SOKNLBT,
-    SOKNL,
-)
-
-__all__ = [
-    "KNNRegressor",
-    "AdaptiveRandomForestRegressor",
-    "FIMTDD",
-    "ARFFIMTDD",
-    "ORTO",
-    "SOKNLBT",
-    "SOKNL",
-]
diff --git a/src/capymoa/learner/regressor/regressors.py b/src/capymoa/learner/regressor/regressors.py
deleted file mode 100644
index d717ab48..00000000
--- a/src/capymoa/learner/regressor/regressors.py
+++ /dev/null
@@ -1,493 +0,0 @@
-# Library imports
-from typing import Optional, Union
-
-from capymoa.learner.learners import (
-    MOARegressor,
-)
-
-from capymoa.learner.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
-from capymoa.stream.stream import Schema
-from moa.classifiers.lazy import kNN as MOA_kNN
-from moa.classifiers.meta import (
-    AdaptiveRandomForestRegressor as MOA_AdaptiveRandomForestRegressor,
-    SelfOptimisingKNearestLeaves as MOA_SOKNL,
-)
-from moa.classifiers.trees import (
-    FIMTDD as _MOA_FIMTDD,
-    ARFFIMTDD as _MOA_ARFFIMTDD,
-    ORTO as _MOA_ORTO,
-    SelfOptimisingBaseTree as _MOA_SelfOptimisingBaseTree,
-)
-
-
-########################
-######### TREES ########
-########################
-class FIMTDD(MOARegressor):
-    """Implementation of the FIMT-DD tree as described by Ikonomovska et al."""
-
-    def __init__(
-        self,
-        schema: Schema,
-        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
-        grace_period: int = 200,
-        split_confidence: float = 1.0e-7,
-        tie_threshold: float = 0.05,
-        page_hinckley_alpha: float = 0.005,
-        page_hinckley_threshold: int = 50,
-        alternate_tree_fading_factor: float = 0.995,
-        alternate_tree_t_min: int = 150,
-        alternate_tree_time: int = 1500,
-        regression_tree: bool = False,
-        learning_ratio: float = 0.02,
-        learning_ratio_decay_factor: float = 0.001,
-        learning_ratio_const: bool = False,
-        random_seed: Optional[int] = None,
-    ) -> None:
-        """
-        Construct FIMTDD.
-
-        :param split_criterion: Split criterion to use.
-        :param grace_period: Number of instances a leaf should observe between split attempts.
-        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
-        :param tie_threshold: Threshold below which a split will be forced to break ties.
-        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
-        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
-        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
-        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
-        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
-        :param regression_tree: Build a regression tree instead of a model tree.
-        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
-        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
-        :param learning_ratio_const: Keep learning rate constant instead of decaying.
-        """
-        cli = []
-
-        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
-        cli.append(f"-g {grace_period}")
-        cli.append(f"-c {split_confidence}")
-        cli.append(f"-t {tie_threshold}")
-        cli.append(f"-a {page_hinckley_alpha}")
-        cli.append(f"-h {page_hinckley_threshold}")
-        cli.append(f"-f {alternate_tree_fading_factor}")
-        cli.append(f"-y {alternate_tree_t_min}")
-        cli.append(f"-u {alternate_tree_time}")
-        cli.append("-e") if regression_tree else None
-        cli.append(f"-l {learning_ratio}")
-        cli.append(f"-d {learning_ratio_decay_factor}")
-        cli.append("-p") if learning_ratio_const else None
-
-        self.moa_learner = _MOA_FIMTDD()
-
-        super().__init__(
-            schema=schema,
-            CLI=" ".join(cli),
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-
-class ARFFIMTDD(MOARegressor):
-    """Modified Fast Incremental Model Tree with Drift Detection for basic
-    learner for ARF-Regas described by Ikonomovska et al."""
-
-    def __init__(
-        self,
-        schema: Schema,
-        subspace_size_size: int = 2,
-        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
-        grace_period: int = 200,
-        split_confidence: float = 1.0e-7,
-        tie_threshold: float = 0.05,
-        page_hinckley_alpha: float = 0.005,
-        page_hinckley_threshold: int = 50,
-        alternate_tree_fading_factor: float = 0.995,
-        alternate_tree_t_min: int = 150,
-        alternate_tree_time: int = 1500,
-        learning_ratio: float = 0.02,
-        learning_ratio_decay_factor: float = 0.001,
-        learning_ratio_const: bool = False,
-        random_seed: Optional[int] = None,
-    ) -> None:
-        """
-        Construct ARFFIMTDD.
-
-        :param subspace_size_size: Number of features per subset for each node split. Negative values = #features - k
-        :param split_criterion: Split criterion to use.
-        :param grace_period: Number of instances a leaf should observe between split attempts.
-        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
-        :param tie_threshold: Threshold below which a split will be forced to break ties.
-        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
-        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
-        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
-        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
-        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
-        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
-        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
-        :param learning_ratio_const: Keep learning rate constant instead of decaying.
-        """
-        cli = []
-
-        cli.append(f"-k {subspace_size_size}")
-        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
-        cli.append(f"-g {grace_period}")
-        cli.append(f"-c {split_confidence}")
-        cli.append(f"-t {tie_threshold}")
-        cli.append(f"-a {page_hinckley_alpha}")
-        cli.append(f"-h {page_hinckley_threshold}")
-        cli.append(f"-f {alternate_tree_fading_factor}")
-        cli.append(f"-y {alternate_tree_t_min}")
-        cli.append(f"-u {alternate_tree_time}")
-        cli.append(f"-l {learning_ratio}")
-        cli.append(f"-d {learning_ratio_decay_factor}")
-        cli.append("-p") if learning_ratio_const else None
-
-        self.moa_learner = _MOA_ARFFIMTDD()
-
-        super().__init__(
-            schema=schema,
-            CLI=" ".join(cli),
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-
-class ORTO(MOARegressor):
-    """Implementation of the ORTO tree as described by Ikonomovska et al."""
-
-    def __init__(
-        self,
-        schema: Schema,
-        max_trees: int = 10,
-        max_option_level: int = 10,
-        option_decay_factor: float = 0.9,
-        option_fading_factor: float = 0.9995,
-        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
-        grace_period: int = 200,
-        split_confidence: float = 1.0e-7,
-        tie_threshold: float = 0.05,
-        page_hinckley_alpha: float = 0.005,
-        page_hinckley_threshold: int = 50,
-        alternate_tree_fading_factor: float = 0.995,
-        alternate_tree_t_min: int = 150,
-        alternate_tree_time: int = 1500,
-        regression_tree: bool = False,
-        learning_ratio: float = 0.02,
-        learning_ratio_decay_factor: float = 0.001,
-        learning_ratio_const: bool = False,
-        random_seed: Optional[int] = None,
-    ) -> None:
-        """
-        Construct ORTO.
-
-        :param max_trees: The maximum number of trees contained in the option tree.
-        :param max_option_level: The maximal depth at which option nodes can be created.
-        :param option_decay_factor: The option decay factor that determines how many options can be selected at a given level.
-        :param option_fading_factor: The fading factor used for comparing subtrees of an option node.
-        :param split_criterion: Split criterion to use.
-        :param grace_period: Number of instances a leaf should observe between split attempts.
-        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
-        :param tie_threshold: Threshold below which a split will be forced to break ties.
-        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
-        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
-        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
-        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
-        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
-        :param regression_tree: Build a regression tree instead of a model tree.
-        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
-        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
-        :param learning_ratio_const: Keep learning rate constant instead of decaying.
-        """
-        cli = []
-
-        cli.append(f"-m {max_trees}")
-        cli.append(f"-x {max_option_level}")
-        cli.append(f"-z {option_decay_factor}")
-        cli.append(f"-q {option_fading_factor}")
-        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
-        cli.append(f"-g {grace_period}")
-        cli.append(f"-c {split_confidence}")
-        cli.append(f"-t {tie_threshold}")
-        cli.append(f"-a {page_hinckley_alpha}")
-        cli.append(f"-h {page_hinckley_threshold}")
-        cli.append(f"-f {alternate_tree_fading_factor}")
-        cli.append(f"-y {alternate_tree_t_min}")
-        cli.append(f"-u {alternate_tree_time}")
-        cli.append("-e") if regression_tree else None
-        cli.append(f"-l {learning_ratio}")
-        cli.append(f"-d {learning_ratio_decay_factor}")
-        cli.append("-p") if learning_ratio_const else None
-
-        self.moa_learner = _MOA_ORTO()
-
-        super().__init__(
-            schema=schema,
-            CLI=" ".join(cli),
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-
-class SOKNLBT(MOARegressor):
-    """Implementation of the FIMT-DD tree as described by Ikonomovska et al."""
-
-    def __init__(
-        self,
-        schema: Schema,
-        subspace_size_size: int = 2,
-        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
-        grace_period: int = 200,
-        split_confidence: float = 1.0e-7,
-        tie_threshold: float = 0.05,
-        page_hinckley_alpha: float = 0.005,
-        page_hinckley_threshold: int = 50,
-        alternate_tree_fading_factor: float = 0.995,
-        alternate_tree_t_min: int = 150,
-        alternate_tree_time: int = 1500,
-        learning_ratio: float = 0.02,
-        learning_ratio_decay_factor: float = 0.001,
-        learning_ratio_const: bool = False,
-        random_seed: Optional[int] = None,
-    ) -> None:
-        """
-        Construct SelfOptimisingBaseTree.
-
-        :param subspace_size_size: Number of features per subset for each node split. Negative values = #features - k
-        :param split_criterion: Split criterion to use.
-        :param grace_period: Number of instances a leaf should observe between split attempts.
-        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
-        :param tie_threshold: Threshold below which a split will be forced to break ties.
-        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
-        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
-        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
-        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
-        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
-        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
-        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
-        :param learning_ratio_const: Keep learning rate constant instead of decaying.
-        """
-        cli = []
-
-        cli.append(f"-k {subspace_size_size}")
-        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
-        cli.append(f"-g {grace_period}")
-        cli.append(f"-c {split_confidence}")
-        cli.append(f"-t {tie_threshold}")
-        cli.append(f"-a {page_hinckley_alpha}")
-        cli.append(f"-h {page_hinckley_threshold}")
-        cli.append(f"-f {alternate_tree_fading_factor}")
-        cli.append(f"-y {alternate_tree_t_min}")
-        cli.append(f"-u {alternate_tree_time}")
-        cli.append(f"-l {learning_ratio}")
-        cli.append(f"-d {learning_ratio_decay_factor}")
-        cli.append("-p") if learning_ratio_const else None
-
-        self.moa_learner = _MOA_SelfOptimisingBaseTree()
-
-        super().__init__(
-            schema=schema,
-            CLI=" ".join(cli),
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-
-########################
-######### LAZY #########
-########################
-
-
-class KNNRegressor(MOARegressor):
-    """
-    The default number of neighbors (k) is set to 3 instead of 10 (as in MOA)
-    """
-
-    def __init__(
-        self, schema=None, CLI=None, random_seed=1, k=3, median=False, window_size=1000
-    ):
-        # Important, should create the MOA object before invoking the super class __init__
-        self.moa_learner = MOA_kNN()
-        super().__init__(
-            schema=schema,
-            CLI=CLI,
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-        # Initialize instance attributes with default values, CLI was not set.
-        if self.CLI is None:
-            self.k = k
-            self.median = median
-            self.window_size = window_size
-            self.moa_learner.getOptions().setViaCLIString(
-                f"-k {self.k} {'-m' if self.median else ''} -w \
-             {self.window_size}"
-            )
-            self.moa_learner.prepareForUse()
-            self.moa_learner.resetLearning()
-
-    def __str__(self):
-        # Overrides the default class name from MOA
-        return "kNNRegressor"
-
-
-########################
-####### ENSEMBLES ######
-########################
-
-
-# TODO: replace the m_features_mode logic such that we can infer from m_features_per_tree_size, e.g. if value is double between 0.0 and 1.0 = percentage
-class AdaptiveRandomForestRegressor(MOARegressor):
-    def __init__(
-        self,
-        schema=None,
-        CLI=None,
-        random_seed=1,
-        tree_learner=None,
-        ensemble_size=100,
-        max_features=0.6,
-        lambda_param=6.0,  # m_features_mode=None, m_features_per_tree_size=60,
-        drift_detection_method=None,
-        warning_detection_method=None,
-        disable_drift_detection=False,
-        disable_background_learner=False,
-    ):
-        # Important: must create the MOA object before invoking the super class __init__
-        self.moa_learner = MOA_AdaptiveRandomForestRegressor()
-        super().__init__(
-            schema=schema,
-            CLI=CLI,
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-        # Initialize instance attributes with default values, CLI was not set.
-        if self.CLI is None:
-            self.tree_learner = (
-                ARFFIMTDD(schema, grace_period=50, split_confidence=0.01)
-                if tree_learner is None
-                else tree_learner
-            )
-            self.ensemble_size = ensemble_size
-
-            self.max_features = max_features
-            if isinstance(self.max_features, float) and 0.0 <= self.max_features <= 1.0:
-                self.m_features_mode = "(Percentage (M * (m / 100)))"
-                self.m_features_per_tree_size = int(self.max_features * 100)
-            elif isinstance(self.max_features, int):
-                self.m_features_mode = "(Specified m (integer value))"
-                self.m_features_per_tree_size = max_features
-            elif self.max_features in ["sqrt"]:
-                self.m_features_mode = "(sqrt(M)+1)"
-                self.m_features_per_tree_size = -1  # or leave it unchanged
-            elif self.max_features is None:
-                self.m_features_mode = "(Percentage (M * (m / 100)))"
-                self.m_features_per_tree_size = 60
-            else:
-                # Handle other cases or raise an exception if needed
-                raise ValueError("Invalid value for max_features")
-
-            # self.m_features_mode = "(Percentage (M * (m / 100)))" if m_features_mode is None else m_features_mode
-            # self.m_features_per_tree_size = m_features_per_tree_size
-            self.lambda_param = lambda_param
-            self.drift_detection_method = (
-                "(ADWINChangeDetector -a 1.0E-3)"
-                if drift_detection_method is None
-                else drift_detection_method
-            )
-            self.warning_detection_method = (
-                "(ADWINChangeDetector -a 1.0E-2)"
-                if warning_detection_method is None
-                else warning_detection_method
-            )
-            self.disable_drift_detection = disable_drift_detection
-            self.disable_background_learner = disable_background_learner
-
-            self.moa_learner.getOptions().setViaCLIString(
-                f"-l {self.tree_learner} -s {self.ensemble_size} -o {self.m_features_mode} -m \
-                {self.m_features_per_tree_size} -a {self.lambda_param} -x {self.drift_detection_method} -p \
-                {self.warning_detection_method} {'-u' if self.disable_drift_detection else ''}  {'-q' if self.disable_background_learner else ''}"
-            )
-            self.moa_learner.prepareForUse()
-            self.moa_learner.resetLearning()
-
-
-class SOKNL(MOARegressor):
-    def __init__(
-        self,
-        schema=None,
-        CLI=None,
-        random_seed=1,
-        tree_learner=None,
-        ensemble_size=100,
-        max_features=0.6,
-        lambda_param=6.0,  # m_features_mode=None, m_features_per_tree_size=60,
-        drift_detection_method=None,
-        warning_detection_method=None,
-        disable_drift_detection=False,
-        disable_background_learner=False,
-        self_optimising=True,
-        k_value=10,
-    ):
-        # Important: must create the MOA object before invoking the super class __init__
-        self.moa_learner = MOA_SOKNL()
-        super().__init__(
-            schema=schema,
-            CLI=CLI,
-            random_seed=random_seed,
-            moa_learner=self.moa_learner,
-        )
-
-        # Initialize instance attributes with default values, CLI was not set.
-        if self.CLI is None:
-            self.tree_learner = (
-                # "(SelfOptimisingBaseTree -s VarianceReductionSplitCriterion -g 50 -c 0.01)"
-                SOKNLBT(schema, grace_period=50, split_confidence=0.01)
-                if tree_learner is None
-                else tree_learner
-            )
-            self.ensemble_size = ensemble_size
-
-            self.max_features = max_features
-            if isinstance(self.max_features, float) and 0.0 <= self.max_features <= 1.0:
-                self.m_features_mode = "(Percentage (M * (m / 100)))"
-                self.m_features_per_tree_size = int(self.max_features * 100)
-            elif isinstance(self.max_features, int):
-                self.m_features_mode = "(Specified m (integer value))"
-                self.m_features_per_tree_size = max_features
-            elif self.max_features in ["sqrt"]:
-                self.m_features_mode = "(sqrt(M)+1)"
-                self.m_features_per_tree_size = -1  # or leave it unchanged
-            elif self.max_features is None:
-                self.m_features_mode = "(Percentage (M * (m / 100)))"
-                self.m_features_per_tree_size = 60
-            else:
-                # Handle other cases or raise an exception if needed
-                raise ValueError("Invalid value for max_features")
-
-            # self.m_features_mode = "(Percentage (M * (m / 100)))" if m_features_mode is None else m_features_mode
-            # self.m_features_per_tree_size = m_features_per_tree_size
-            self.lambda_param = lambda_param
-            self.drift_detection_method = (
-                "(ADWINChangeDetector -a 1.0E-3)"
-                if drift_detection_method is None
-                else drift_detection_method
-            )
-            self.warning_detection_method = (
-                "(ADWINChangeDetector -a 1.0E-2)"
-                if warning_detection_method is None
-                else warning_detection_method
-            )
-            self.disable_drift_detection = disable_drift_detection
-            self.disable_background_learner = disable_background_learner
-
-            self.self_optimising = self_optimising
-            self.k_value = k_value
-
-            self.moa_learner.getOptions().setViaCLIString(
-                f"-l {self.tree_learner} -s {self.ensemble_size} {'-f' if self.self_optimising else ''} -k {self.k_value} -o {self.m_features_mode} -m \
-                {self.m_features_per_tree_size} -a {self.lambda_param} -x {self.drift_detection_method} -p \
-                {self.warning_detection_method} {'-u' if self.disable_drift_detection else ''}  {'-q' if self.disable_background_learner else ''}"
-            )
-            self.moa_learner.prepareForUse()
-            self.moa_learner.resetLearning()
diff --git a/src/capymoa/learner/ssl/classifier/__init__.py b/src/capymoa/learner/ssl/classifier/__init__.py
deleted file mode 100644
index 42d00b44..00000000
--- a/src/capymoa/learner/ssl/classifier/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .CPSSDS import CPSSDS
-from .OSNN import OSNN
-from .batch import BatchClassifierSSL
-
-__all__ = ["BatchClassifierSSL", "CPSSDS", "OSNN"]
diff --git a/src/capymoa/prepare_jpype.py b/src/capymoa/prepare_jpype.py
index dccfddf4..af257429 100644
--- a/src/capymoa/prepare_jpype.py
+++ b/src/capymoa/prepare_jpype.py
@@ -1,9 +1,7 @@
 # Python imports
-import subprocess
 import configparser
 import jpype
 import jpype.imports
-from jpype.types import *
 import os
 from pathlib import Path
 
diff --git a/src/capymoa/regressor/__init__.py b/src/capymoa/regressor/__init__.py
new file mode 100644
index 00000000..8ea9dd43
--- /dev/null
+++ b/src/capymoa/regressor/__init__.py
@@ -0,0 +1,16 @@
+from ._soknl import SOKNL, SOKNLBT
+from ._orto import ORTO
+from ._knn import KNNRegressor
+from ._fimtdd import FIMTDD
+from ._arffimtdd import ARFFIMTDD
+from ._adaptive_random_forrest import AdaptiveRandomForestRegressor
+
+__all__ = [
+    "SOKNLBT",
+    "SOKNL",
+    "ORTO",
+    "KNNRegressor",
+    "FIMTDD",
+    "ARFFIMTDD",
+    "AdaptiveRandomForestRegressor",
+]
diff --git a/src/capymoa/regressor/_adaptive_random_forrest.py b/src/capymoa/regressor/_adaptive_random_forrest.py
new file mode 100644
index 00000000..294c6020
--- /dev/null
+++ b/src/capymoa/regressor/_adaptive_random_forrest.py
@@ -0,0 +1,84 @@
+# Library imports
+
+from capymoa.base import MOARegressor
+from ._arffimtdd import ARFFIMTDD
+
+from moa.classifiers.meta import (
+    AdaptiveRandomForestRegressor as MOA_AdaptiveRandomForestRegressor,
+)
+
+
+# TODO: replace the m_features_mode logic such that we can infer from m_features_per_tree_size, e.g. if value is double between 0.0 and 1.0 = percentage
+class AdaptiveRandomForestRegressor(MOARegressor):
+    def __init__(
+        self,
+        schema=None,
+        CLI=None,
+        random_seed=1,
+        tree_learner=None,
+        ensemble_size=100,
+        max_features=0.6,
+        lambda_param=6.0,  # m_features_mode=None, m_features_per_tree_size=60,
+        drift_detection_method=None,
+        warning_detection_method=None,
+        disable_drift_detection=False,
+        disable_background_learner=False,
+    ):
+        # Important: must create the MOA object before invoking the super class __init__
+        self.moa_learner = MOA_AdaptiveRandomForestRegressor()
+        super().__init__(
+            schema=schema,
+            CLI=CLI,
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
+
+        # Initialize instance attributes with default values, CLI was not set.
+        if self.CLI is None:
+            self.tree_learner = (
+                ARFFIMTDD(schema, grace_period=50, split_confidence=0.01)
+                if tree_learner is None
+                else tree_learner
+            )
+            self.ensemble_size = ensemble_size
+
+            self.max_features = max_features
+            if isinstance(self.max_features, float) and 0.0 <= self.max_features <= 1.0:
+                self.m_features_mode = "(Percentage (M * (m / 100)))"
+                self.m_features_per_tree_size = int(self.max_features * 100)
+            elif isinstance(self.max_features, int):
+                self.m_features_mode = "(Specified m (integer value))"
+                self.m_features_per_tree_size = max_features
+            elif self.max_features in ["sqrt"]:
+                self.m_features_mode = "(sqrt(M)+1)"
+                self.m_features_per_tree_size = -1  # or leave it unchanged
+            elif self.max_features is None:
+                self.m_features_mode = "(Percentage (M * (m / 100)))"
+                self.m_features_per_tree_size = 60
+            else:
+                # Handle other cases or raise an exception if needed
+                raise ValueError("Invalid value for max_features")
+
+            # self.m_features_mode = "(Percentage (M * (m / 100)))" if m_features_mode is None else m_features_mode
+            # self.m_features_per_tree_size = m_features_per_tree_size
+            self.lambda_param = lambda_param
+            self.drift_detection_method = (
+                "(ADWINChangeDetector -a 1.0E-3)"
+                if drift_detection_method is None
+                else drift_detection_method
+            )
+            self.warning_detection_method = (
+                "(ADWINChangeDetector -a 1.0E-2)"
+                if warning_detection_method is None
+                else warning_detection_method
+            )
+            self.disable_drift_detection = disable_drift_detection
+            self.disable_background_learner = disable_background_learner
+
+            self.moa_learner.getOptions().setViaCLIString(
+                f"-l {self.tree_learner} -s {self.ensemble_size} -o {self.m_features_mode} -m \
+                {self.m_features_per_tree_size} -a {self.lambda_param} -x {self.drift_detection_method} -p \
+                {self.warning_detection_method} {'-u' if self.disable_drift_detection else ''}  {'-q' if self.disable_background_learner else ''}"
+            )
+            self.moa_learner.prepareForUse()
+            self.moa_learner.resetLearning()
diff --git a/src/capymoa/regressor/_arffimtdd.py b/src/capymoa/regressor/_arffimtdd.py
new file mode 100644
index 00000000..c228f69e
--- /dev/null
+++ b/src/capymoa/regressor/_arffimtdd.py
@@ -0,0 +1,73 @@
+# Library imports
+from typing import Optional, Union
+
+from capymoa.base import MOARegressor
+
+from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
+from capymoa.stream._stream import Schema
+from moa.classifiers.trees import ARFFIMTDD as _MOA_ARFFIMTDD
+
+
+class ARFFIMTDD(MOARegressor):
+    """Modified Fast Incremental Model Tree with Drift Detection for basic
+    learner for ARF-Regas described by Ikonomovska et al."""
+
+    def __init__(
+        self,
+        schema: Schema,
+        subspace_size_size: int = 2,
+        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
+        grace_period: int = 200,
+        split_confidence: float = 1.0e-7,
+        tie_threshold: float = 0.05,
+        page_hinckley_alpha: float = 0.005,
+        page_hinckley_threshold: int = 50,
+        alternate_tree_fading_factor: float = 0.995,
+        alternate_tree_t_min: int = 150,
+        alternate_tree_time: int = 1500,
+        learning_ratio: float = 0.02,
+        learning_ratio_decay_factor: float = 0.001,
+        learning_ratio_const: bool = False,
+        random_seed: Optional[int] = None,
+    ) -> None:
+        """
+        Construct ARFFIMTDD.
+
+        :param subspace_size_size: Number of features per subset for each node split. Negative values = #features - k
+        :param split_criterion: Split criterion to use.
+        :param grace_period: Number of instances a leaf should observe between split attempts.
+        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
+        :param tie_threshold: Threshold below which a split will be forced to break ties.
+        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
+        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
+        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
+        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
+        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
+        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
+        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
+        :param learning_ratio_const: Keep learning rate constant instead of decaying.
+        """
+        cli = []
+
+        cli.append(f"-k {subspace_size_size}")
+        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
+        cli.append(f"-g {grace_period}")
+        cli.append(f"-c {split_confidence}")
+        cli.append(f"-t {tie_threshold}")
+        cli.append(f"-a {page_hinckley_alpha}")
+        cli.append(f"-h {page_hinckley_threshold}")
+        cli.append(f"-f {alternate_tree_fading_factor}")
+        cli.append(f"-y {alternate_tree_t_min}")
+        cli.append(f"-u {alternate_tree_time}")
+        cli.append(f"-l {learning_ratio}")
+        cli.append(f"-d {learning_ratio_decay_factor}")
+        cli.append("-p") if learning_ratio_const else None
+
+        self.moa_learner = _MOA_ARFFIMTDD()
+
+        super().__init__(
+            schema=schema,
+            CLI=" ".join(cli),
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
diff --git a/src/capymoa/regressor/_fimtdd.py b/src/capymoa/regressor/_fimtdd.py
new file mode 100644
index 00000000..172b48c6
--- /dev/null
+++ b/src/capymoa/regressor/_fimtdd.py
@@ -0,0 +1,71 @@
+from typing import Optional, Union
+
+from capymoa.base import MOARegressor
+
+from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
+from capymoa.stream._stream import Schema
+from moa.classifiers.trees import FIMTDD as _MOA_FIMTDD
+
+
+class FIMTDD(MOARegressor):
+    """Implementation of the FIMT-DD tree as described by Ikonomovska et al."""
+
+    def __init__(
+        self,
+        schema: Schema,
+        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
+        grace_period: int = 200,
+        split_confidence: float = 1.0e-7,
+        tie_threshold: float = 0.05,
+        page_hinckley_alpha: float = 0.005,
+        page_hinckley_threshold: int = 50,
+        alternate_tree_fading_factor: float = 0.995,
+        alternate_tree_t_min: int = 150,
+        alternate_tree_time: int = 1500,
+        regression_tree: bool = False,
+        learning_ratio: float = 0.02,
+        learning_ratio_decay_factor: float = 0.001,
+        learning_ratio_const: bool = False,
+        random_seed: Optional[int] = None,
+    ) -> None:
+        """
+        Construct FIMTDD.
+
+        :param split_criterion: Split criterion to use.
+        :param grace_period: Number of instances a leaf should observe between split attempts.
+        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
+        :param tie_threshold: Threshold below which a split will be forced to break ties.
+        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
+        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
+        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
+        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
+        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
+        :param regression_tree: Build a regression tree instead of a model tree.
+        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
+        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
+        :param learning_ratio_const: Keep learning rate constant instead of decaying.
+        """
+        cli = []
+
+        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
+        cli.append(f"-g {grace_period}")
+        cli.append(f"-c {split_confidence}")
+        cli.append(f"-t {tie_threshold}")
+        cli.append(f"-a {page_hinckley_alpha}")
+        cli.append(f"-h {page_hinckley_threshold}")
+        cli.append(f"-f {alternate_tree_fading_factor}")
+        cli.append(f"-y {alternate_tree_t_min}")
+        cli.append(f"-u {alternate_tree_time}")
+        cli.append("-e") if regression_tree else None
+        cli.append(f"-l {learning_ratio}")
+        cli.append(f"-d {learning_ratio_decay_factor}")
+        cli.append("-p") if learning_ratio_const else None
+
+        self.moa_learner = _MOA_FIMTDD()
+
+        super().__init__(
+            schema=schema,
+            CLI=" ".join(cli),
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
diff --git a/src/capymoa/regressor/_knn.py b/src/capymoa/regressor/_knn.py
new file mode 100644
index 00000000..b641cf76
--- /dev/null
+++ b/src/capymoa/regressor/_knn.py
@@ -0,0 +1,36 @@
+from capymoa.base import MOARegressor
+from moa.classifiers.lazy import kNN as _moa_kNN
+
+
+class KNNRegressor(MOARegressor):
+    """
+    The default number of neighbors (k) is set to 3 instead of 10 (as in MOA)
+    """
+
+    def __init__(
+        self, schema=None, CLI=None, random_seed=1, k=3, median=False, window_size=1000
+    ):
+        # Important, should create the MOA object before invoking the super class __init__
+        self.moa_learner = _moa_kNN()
+        super().__init__(
+            schema=schema,
+            CLI=CLI,
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
+
+        # Initialize instance attributes with default values, CLI was not set.
+        if self.CLI is None:
+            self.k = k
+            self.median = median
+            self.window_size = window_size
+            self.moa_learner.getOptions().setViaCLIString(
+                f"-k {self.k} {'-m' if self.median else ''} -w \
+             {self.window_size}"
+            )
+            self.moa_learner.prepareForUse()
+            self.moa_learner.resetLearning()
+
+    def __str__(self):
+        # Overrides the default class name from MOA
+        return "kNNRegressor"
diff --git a/src/capymoa/regressor/_orto.py b/src/capymoa/regressor/_orto.py
new file mode 100644
index 00000000..172bf38b
--- /dev/null
+++ b/src/capymoa/regressor/_orto.py
@@ -0,0 +1,83 @@
+from typing import Optional, Union
+
+from capymoa.stream import Schema
+from capymoa.base import MOARegressor
+from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
+
+from moa.classifiers.trees import ORTO as _MOA_ORTO
+
+
+class ORTO(MOARegressor):
+    """Implementation of the ORTO tree as described by Ikonomovska et al."""
+
+    def __init__(
+        self,
+        schema: Schema,
+        max_trees: int = 10,
+        max_option_level: int = 10,
+        option_decay_factor: float = 0.9,
+        option_fading_factor: float = 0.9995,
+        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
+        grace_period: int = 200,
+        split_confidence: float = 1.0e-7,
+        tie_threshold: float = 0.05,
+        page_hinckley_alpha: float = 0.005,
+        page_hinckley_threshold: int = 50,
+        alternate_tree_fading_factor: float = 0.995,
+        alternate_tree_t_min: int = 150,
+        alternate_tree_time: int = 1500,
+        regression_tree: bool = False,
+        learning_ratio: float = 0.02,
+        learning_ratio_decay_factor: float = 0.001,
+        learning_ratio_const: bool = False,
+        random_seed: Optional[int] = None,
+    ) -> None:
+        """
+        Construct ORTO.
+
+        :param max_trees: The maximum number of trees contained in the option tree.
+        :param max_option_level: The maximal depth at which option nodes can be created.
+        :param option_decay_factor: The option decay factor that determines how many options can be selected at a given level.
+        :param option_fading_factor: The fading factor used for comparing subtrees of an option node.
+        :param split_criterion: Split criterion to use.
+        :param grace_period: Number of instances a leaf should observe between split attempts.
+        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
+        :param tie_threshold: Threshold below which a split will be forced to break ties.
+        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
+        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
+        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
+        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
+        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
+        :param regression_tree: Build a regression tree instead of a model tree.
+        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
+        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
+        :param learning_ratio_const: Keep learning rate constant instead of decaying.
+        """
+        cli = []
+
+        cli.append(f"-m {max_trees}")
+        cli.append(f"-x {max_option_level}")
+        cli.append(f"-z {option_decay_factor}")
+        cli.append(f"-q {option_fading_factor}")
+        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
+        cli.append(f"-g {grace_period}")
+        cli.append(f"-c {split_confidence}")
+        cli.append(f"-t {tie_threshold}")
+        cli.append(f"-a {page_hinckley_alpha}")
+        cli.append(f"-h {page_hinckley_threshold}")
+        cli.append(f"-f {alternate_tree_fading_factor}")
+        cli.append(f"-y {alternate_tree_t_min}")
+        cli.append(f"-u {alternate_tree_time}")
+        cli.append("-e") if regression_tree else None
+        cli.append(f"-l {learning_ratio}")
+        cli.append(f"-d {learning_ratio_decay_factor}")
+        cli.append("-p") if learning_ratio_const else None
+
+        self.moa_learner = _MOA_ORTO()
+
+        super().__init__(
+            schema=schema,
+            CLI=" ".join(cli),
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
diff --git a/src/capymoa/regressor/_soknl.py b/src/capymoa/regressor/_soknl.py
new file mode 100644
index 00000000..dcac87ee
--- /dev/null
+++ b/src/capymoa/regressor/_soknl.py
@@ -0,0 +1,156 @@
+# Library imports
+from typing import Optional, Union
+
+from capymoa.base import (
+    MOARegressor,
+)
+
+from capymoa.splitcriteria import SplitCriterion, _split_criterion_to_cli_str
+from capymoa.stream._stream import Schema
+from moa.classifiers.meta import SelfOptimisingKNearestLeaves as _MOA_SOKNL
+from moa.classifiers.trees import SelfOptimisingBaseTree as _MOA_SelfOptimisingBaseTree
+
+
+class SOKNLBT(MOARegressor):
+    """Implementation of the FIMT-DD tree as described by Ikonomovska et al."""
+
+    def __init__(
+        self,
+        schema: Schema,
+        subspace_size_size: int = 2,
+        split_criterion: Union[SplitCriterion, str] = "VarianceReductionSplitCriterion",
+        grace_period: int = 200,
+        split_confidence: float = 1.0e-7,
+        tie_threshold: float = 0.05,
+        page_hinckley_alpha: float = 0.005,
+        page_hinckley_threshold: int = 50,
+        alternate_tree_fading_factor: float = 0.995,
+        alternate_tree_t_min: int = 150,
+        alternate_tree_time: int = 1500,
+        learning_ratio: float = 0.02,
+        learning_ratio_decay_factor: float = 0.001,
+        learning_ratio_const: bool = False,
+        random_seed: Optional[int] = None,
+    ) -> None:
+        """
+        Construct SelfOptimisingBaseTree.
+
+        :param subspace_size_size: Number of features per subset for each node split. Negative values = #features - k
+        :param split_criterion: Split criterion to use.
+        :param grace_period: Number of instances a leaf should observe between split attempts.
+        :param split_confidence: Allowed error in split decision, values close to 0 will take long to decide.
+        :param tie_threshold: Threshold below which a split will be forced to break ties.
+        :param page_hinckley_alpha: Alpha value to use in the Page Hinckley change detection tests.
+        :param page_hinckley_threshold: Threshold value used in the Page Hinckley change detection tests.
+        :param alternate_tree_fading_factor: Fading factor used to decide if an alternate tree should replace an original.
+        :param alternate_tree_t_min: Tmin value used to decide if an alternate tree should replace an original.
+        :param alternate_tree_time: The number of instances used to decide if an alternate tree should be discarded.
+        :param learning_ratio: Learning ratio to used for training the Perceptrons in the leaves.
+        :param learning_ratio_decay_factor: Learning rate decay factor (not used when learning rate is constant).
+        :param learning_ratio_const: Keep learning rate constant instead of decaying.
+        """
+        cli = []
+
+        cli.append(f"-k {subspace_size_size}")
+        cli.append(f"-s ({_split_criterion_to_cli_str(split_criterion)})")
+        cli.append(f"-g {grace_period}")
+        cli.append(f"-c {split_confidence}")
+        cli.append(f"-t {tie_threshold}")
+        cli.append(f"-a {page_hinckley_alpha}")
+        cli.append(f"-h {page_hinckley_threshold}")
+        cli.append(f"-f {alternate_tree_fading_factor}")
+        cli.append(f"-y {alternate_tree_t_min}")
+        cli.append(f"-u {alternate_tree_time}")
+        cli.append(f"-l {learning_ratio}")
+        cli.append(f"-d {learning_ratio_decay_factor}")
+        cli.append("-p") if learning_ratio_const else None
+
+        self.moa_learner = _MOA_SelfOptimisingBaseTree()
+
+        super().__init__(
+            schema=schema,
+            CLI=" ".join(cli),
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
+
+
+class SOKNL(MOARegressor):
+    def __init__(
+        self,
+        schema=None,
+        CLI=None,
+        random_seed=1,
+        tree_learner=None,
+        ensemble_size=100,
+        max_features=0.6,
+        lambda_param=6.0,  # m_features_mode=None, m_features_per_tree_size=60,
+        drift_detection_method=None,
+        warning_detection_method=None,
+        disable_drift_detection=False,
+        disable_background_learner=False,
+        self_optimising=True,
+        k_value=10,
+    ):
+        # Important: must create the MOA object before invoking the super class __init__
+        self.moa_learner = _MOA_SOKNL()
+        super().__init__(
+            schema=schema,
+            CLI=CLI,
+            random_seed=random_seed,
+            moa_learner=self.moa_learner,
+        )
+
+        # Initialize instance attributes with default values, CLI was not set.
+        if self.CLI is None:
+            self.tree_learner = (
+                # "(SelfOptimisingBaseTree -s VarianceReductionSplitCriterion -g 50 -c 0.01)"
+                SOKNLBT(schema, grace_period=50, split_confidence=0.01)
+                if tree_learner is None
+                else tree_learner
+            )
+            self.ensemble_size = ensemble_size
+
+            self.max_features = max_features
+            if isinstance(self.max_features, float) and 0.0 <= self.max_features <= 1.0:
+                self.m_features_mode = "(Percentage (M * (m / 100)))"
+                self.m_features_per_tree_size = int(self.max_features * 100)
+            elif isinstance(self.max_features, int):
+                self.m_features_mode = "(Specified m (integer value))"
+                self.m_features_per_tree_size = max_features
+            elif self.max_features in ["sqrt"]:
+                self.m_features_mode = "(sqrt(M)+1)"
+                self.m_features_per_tree_size = -1  # or leave it unchanged
+            elif self.max_features is None:
+                self.m_features_mode = "(Percentage (M * (m / 100)))"
+                self.m_features_per_tree_size = 60
+            else:
+                # Handle other cases or raise an exception if needed
+                raise ValueError("Invalid value for max_features")
+
+            # self.m_features_mode = "(Percentage (M * (m / 100)))" if m_features_mode is None else m_features_mode
+            # self.m_features_per_tree_size = m_features_per_tree_size
+            self.lambda_param = lambda_param
+            self.drift_detection_method = (
+                "(ADWINChangeDetector -a 1.0E-3)"
+                if drift_detection_method is None
+                else drift_detection_method
+            )
+            self.warning_detection_method = (
+                "(ADWINChangeDetector -a 1.0E-2)"
+                if warning_detection_method is None
+                else warning_detection_method
+            )
+            self.disable_drift_detection = disable_drift_detection
+            self.disable_background_learner = disable_background_learner
+
+            self.self_optimising = self_optimising
+            self.k_value = k_value
+
+            self.moa_learner.getOptions().setViaCLIString(
+                f"-l {self.tree_learner} -s {self.ensemble_size} {'-f' if self.self_optimising else ''} -k {self.k_value} -o {self.m_features_mode} -m \
+                {self.m_features_per_tree_size} -a {self.lambda_param} -x {self.drift_detection_method} -p \
+                {self.warning_detection_method} {'-u' if self.disable_drift_detection else ''}  {'-q' if self.disable_background_learner else ''}"
+            )
+            self.moa_learner.prepareForUse()
+            self.moa_learner.resetLearning()
diff --git a/src/capymoa/learner/splitcriteria.py b/src/capymoa/splitcriteria.py
similarity index 94%
rename from src/capymoa/learner/splitcriteria.py
rename to src/capymoa/splitcriteria.py
index b4894179..e50c3311 100644
--- a/src/capymoa/learner/splitcriteria.py
+++ b/src/capymoa/splitcriteria.py
@@ -4,8 +4,9 @@
 import moa.classifiers.core.splitcriteria as moa_split
 
 
-class SplitCriterion():
+class SplitCriterion:
     """Split criterions are used to evaluate the quality of a split in a decision tree."""
+
     _java_object: Optional[moa_split.SplitCriterion] = None
 
     def java_object(self) -> moa_split.SplitCriterion:
@@ -65,4 +66,6 @@ def _split_criterion_to_cli_str(split_criterion: Union[str, SplitCriterion]) ->
     elif isinstance(split_criterion, str):
         return split_criterion.strip().strip("() ")
     else:
-        raise TypeError(f"Expected a string or SplitCriterion, got {type(split_criterion)}")
+        raise TypeError(
+            f"Expected a string or SplitCriterion, got {type(split_criterion)}"
+        )
diff --git a/src/capymoa/learner/ssl/classifier/CPSSDS.py b/src/capymoa/ssl/classifier/_CPSSDS.py
similarity index 98%
rename from src/capymoa/learner/ssl/classifier/CPSSDS.py
rename to src/capymoa/ssl/classifier/_CPSSDS.py
index 8f55fb30..45303ee8 100644
--- a/src/capymoa/learner/ssl/classifier/CPSSDS.py
+++ b/src/capymoa/ssl/classifier/_CPSSDS.py
@@ -6,9 +6,9 @@
 from river.naive_bayes import GaussianNB
 from river.tree import HoeffdingTreeClassifier
 
-from capymoa.learner.ssl.classifier.batch import BatchClassifierSSL
+from capymoa.ssl.classifier._batch import BatchClassifierSSL
 from capymoa.stream import Schema
-from capymoa.stream.instance import Instance
+from capymoa.instance import Instance
 
 
 def shuffle_split(
diff --git a/src/capymoa/ssl/classifier/__init__.py b/src/capymoa/ssl/classifier/__init__.py
new file mode 100644
index 00000000..573fe5e3
--- /dev/null
+++ b/src/capymoa/ssl/classifier/__init__.py
@@ -0,0 +1,5 @@
+from ._cpssds import CPSSDS
+from ._osnn import OSNN
+from ._batch import BatchClassifierSSL
+
+__all__ = ["BatchClassifierSSL", "CPSSDS", "OSNN"]
diff --git a/src/capymoa/learner/ssl/classifier/batch.py b/src/capymoa/ssl/classifier/_batch.py
similarity index 92%
rename from src/capymoa/learner/ssl/classifier/batch.py
rename to src/capymoa/ssl/classifier/_batch.py
index 37a0c785..09ec351e 100644
--- a/src/capymoa/learner/ssl/classifier/batch.py
+++ b/src/capymoa/ssl/classifier/_batch.py
@@ -1,12 +1,11 @@
 from abc import ABC, abstractmethod
-from typing import Any
 
 import numpy as np
 from numpy.typing import NDArray
 
-from capymoa.learner import ClassifierSSL
-from capymoa.stream.instance import Instance, LabeledInstance
-from capymoa.stream.stream import Schema
+from capymoa.base import ClassifierSSL
+from capymoa.instance import Instance, LabeledInstance
+from capymoa.stream._stream import Schema
 from capymoa.type_alias import FeatureVector
 
 
diff --git a/src/capymoa/ssl/classifier/_cpssds.py b/src/capymoa/ssl/classifier/_cpssds.py
new file mode 100644
index 00000000..2d3b7e35
--- /dev/null
+++ b/src/capymoa/ssl/classifier/_cpssds.py
@@ -0,0 +1,296 @@
+import typing as t
+from typing import Dict, Literal
+
+import numpy as np
+from river.base import Classifier
+from river.naive_bayes import GaussianNB
+from river.tree import HoeffdingTreeClassifier
+
+from ._batch import BatchClassifierSSL
+from capymoa.stream import Schema
+from capymoa.instance import Instance
+
+
+def shuffle_split(
+    split_proportion: float, x: np.ndarray, y: np.ndarray
+) -> t.Tuple[t.Tuple[np.ndarray, np.ndarray], t.Tuple[np.ndarray, np.ndarray]]:
+    """Shuffle and split the data into two parts.
+
+    :param split_proportion: The proportion of the dataset to be included in
+        the first part.
+    :param x: The instances to split.
+    :param y: The labels to split.
+    :raises LengthMismatchError: The length of x and y must be the same.
+    :return: Two tuples containing the instances and labels of the two parts.
+    """
+    assert len(x) == len(y), "x and y must have the same length"
+    indices = np.arange(len(x))
+    np.random.shuffle(indices)
+    split_index = int(len(x) * split_proportion)
+    idx_a = indices[:split_index]
+    idx_b = indices[split_index:]
+    return (x[idx_a], y[idx_a]), (x[idx_b], y[idx_b])
+
+
+def split_by_label_presence(
+    x: np.ndarray, y: np.ndarray
+) -> t.Tuple[t.Tuple[np.ndarray, np.ndarray], np.ndarray]:
+    """Split the data into labeled and unlabeled instances.
+
+    :param x: A batch of instances.
+    :param y: A batch of labels where -1 means that the instance is unlabeled.
+    :raises LengthMismatchError: The length of x and y must be the same.
+    :return:
+        - A tuple containing the labeled instances and labels.
+        - A numpy array containing the unlabeled instances.
+    """
+    assert len(x) == len(y), "x and y must have the same length"
+    labeled_mask = y != -1
+    return (x[labeled_mask], y[labeled_mask]), x[~labeled_mask]
+
+
+def Unlabeling_data(X_train, Y_train, Percentage, chunk_size, class_count):
+    labeled_count = round(Percentage * chunk_size)
+    TLabeled = X_train[0 : labeled_count - 1]
+    Y_TLabeled = Y_train[0 : labeled_count - 1]
+    X_Unlabeled = X_train[labeled_count : Y_train.shape[0] - 1]
+
+    cal_count = round(0.3 * TLabeled.shape[0])
+    X_cal = TLabeled[0 : cal_count - 1]
+    Y_cal = Y_TLabeled[0 : cal_count - 1]
+    X_L = TLabeled[cal_count : TLabeled.shape[0] - 1]
+    Y_L = Y_TLabeled[cal_count : TLabeled.shape[0] - 1]
+
+    return X_Unlabeled, X_L, Y_L, X_cal, Y_cal
+
+
+def Prediction_by_CP(num, classifier, X, Y, X_Unlabeled, class_count, sl):
+    row = X_Unlabeled.shape[0]
+    col = class_count
+    p_values = np.zeros([row, col])
+    labels = np.ones((row, col), dtype=bool)
+    alphas = NCM(num, classifier, X, Y, 1, class_count)
+    for elem in range(row):
+        c = []
+        for o in range(class_count):
+            a_test = NCM(
+                num, classifier, np.array([X_Unlabeled[elem, :]]), o, 2, class_count
+            )
+            idx = np.argwhere(Y == o).flatten()
+            temp = alphas[idx]
+            p = len(temp[temp >= a_test])
+            if idx.shape[0] == 0:
+                s = 0
+            else:
+                s = p / idx.shape[0]
+            c.append(s)
+            if s < sl:
+                labels[elem, int(o)] = False
+        p_values[elem, :] = np.array(c)
+    return p_values, labels
+
+
+def NCM(num, classifier, X, Y, t, class_count):
+    if num == 1:
+        if t == 1:
+            p = np.zeros([X.shape[0], 1])
+            alpha = np.zeros([X.shape[0], 1])
+            for g in range(X.shape[0]):
+                dic_vote = classifier.predict_proba_one(np_to_dict(X[g, :]))
+                vote = np.fromiter(dic_vote.values(), dtype=float)
+                vote_keys = np.fromiter(dic_vote.keys(), dtype=int)
+                Sum = np.sum(vote)
+                keys = np.argwhere(vote_keys == int(Y[g])).flatten()
+                if keys.size == 0:
+                    p[g] = (1) / (Sum + class_count)
+                else:
+                    for key, val in dic_vote.items():
+                        if key == float(Y[g]):
+                            p[g] = (val + 1) / (Sum + class_count)
+                alpha[g] = 1 - p[g]
+
+        else:
+            dic_vote = classifier.predict_proba_one(np_to_dict(X[0, :]))
+            vote = np.fromiter(dic_vote.values(), dtype=float)
+            vote_keys = np.fromiter(dic_vote.keys(), dtype=int)
+            Sum = np.sum(vote)
+            keys = np.argwhere(vote_keys == int(Y)).flatten()
+            if keys.size == 0:
+                p = (1) / (Sum + class_count)
+            else:
+                for key, val in dic_vote.items():
+                    if key == float(Y):
+                        p = (val + 1) / (Sum + class_count)
+            alpha = 1 - p
+
+    else:
+        if t == 1:
+            prediction = predict_many(classifier, X)
+            P = np.max(prediction, axis=1)
+            alpha = 1 - P
+        elif t == 2:
+            prediction = predict_many(classifier, X)
+            # TODO: This is a hacky patch because river tries to be smart and
+            # infer the number of classes from the data. This is silly because
+            # CPSSDS assumes that the number of classes is known. Future work
+            # will replace river with MOA.
+            if prediction.shape[1] <= Y:
+                P = 0
+            else:
+                P = prediction[0, int(Y)]
+            alpha = 1 - P
+    return alpha
+
+
+def Informatives_selection(X_Unlabeled, p_values, labels, class_count):
+    row = X_Unlabeled.shape[0]
+    X = np.empty([1, X_Unlabeled.shape[1]])
+    Y = np.empty([1])
+    for elem in range(row):
+        l = np.argwhere(labels[elem, :] == True).flatten()
+        if len(l) == 1:
+            pp = p_values[elem, l]
+            X = np.append(X, [X_Unlabeled[elem, :]], axis=0)
+            Y = np.append(Y, [l[0]], axis=0)
+    Informatives = X[1 : X.shape[0], :]
+    Y_Informatives = Y[1 : Y.shape[0]]
+    return Informatives, Y_Informatives
+
+
+def Appending_informative_to_nextchunk(
+    X_Currentchunk_Labeled, Y_Currentchunk_Labeled, Informatives, Y_Informatives
+):
+    X = np.append(X_Currentchunk_Labeled, Informatives, axis=0)
+    Y = np.append(Y_Currentchunk_Labeled, Y_Informatives, axis=0)
+    return X, Y
+
+
+def np_to_dict(x):
+    return dict(enumerate(x))
+
+
+def predict_many(classifier: Classifier, x: np.ndarray) -> np.ndarray:
+    """Predict the labels of a batch of instances.
+
+    :param classifier: The classifier to use.
+    :param x: A batch of instances.
+    :return: A numpy array containing the predicted labels.
+    """
+    if len(x) == 0:
+        return np.array([])
+    results = []
+    for x_i in x:
+        y_hat = classifier.predict_proba_one(np_to_dict(x_i))
+        y_hat_skmf = np.array(list(y_hat.values()))
+        results.append(y_hat_skmf)
+    return np.stack(results)
+
+
+class CPSSDS(BatchClassifierSSL):
+    """Conformal prediction for semi-supervised classification on data streams.
+
+    Tanha, J., Samadi, N., Abdi, Y., & Razzaghi-Asl, N. (2021). CPSSDS:
+    Conformal prediction for semi-supervised classification on data streams.
+    Information Sciences, 584, 212–234. https://doi.org/10.1016/j.ins.2021.10.068
+    """
+
+    def __init__(
+        self,
+        base_model: Literal["NaiveBayes", "HoeffdingTree"],
+        batch_size: int,
+        schema: Schema,
+        significance_level: float = 0.98,
+        calibration_split: float = 0.3,
+        random_seed=1,
+    ) -> None:
+        """Constructor for CPSSDS.
+
+        :param base_model: An underlying model which is augmented with
+            self-labeled data from conformal prediction.
+        :param batch_size: The number of instances to train on at a time.
+        :param schema: The schema of the data stream.
+        :param significance_level: Controls the required confidence level for
+            unlabeled instances to be labeled. Must be between 0 and 1. defaults to 0.98
+        :param calibration_split: The proportion of the labeled data to be used
+            for calibration. defaults to 0.3
+        :param random_seed: The random seed to use for reproducibility.
+        :raises ValueError: `base_model` must be either NaiveBayes or HoeffdingTree
+        """
+        super().__init__(batch_size, schema, random_seed)
+        self.significance_level: float = significance_level
+        self.chunk_id = 0
+        self.class_count = schema.get_num_classes()
+        self.calibration_split = calibration_split
+
+        # TODO: These classifiers should be replaced with MOA classifiers
+        if base_model == "NaiveBayes":
+            self.classifier = GaussianNB()
+            self._num = 2
+        elif base_model == "HoeffdingTree":
+            self.classifier = HoeffdingTreeClassifier()
+            self._num = 1
+        else:
+            raise ValueError("`base_model` must be either NaiveBayes or HoeffdingTree")
+
+        # Self-labeled data, initialized as empty
+        self.self_labeled_x: np.array = None
+        self.self_labeled_y: np.array = None
+
+        # Set seed for reproducibility
+        np.random.seed(random_seed)
+
+    def train_on_batch(self, x_batch, y_indices):
+        (x_label, y_label), x_unlabeled = split_by_label_presence(x_batch, y_indices)
+        (x_cal, y_cal), (x_train, y_train) = shuffle_split(
+            self.calibration_split, x_label, y_label
+        )
+
+        # Add self-labeled data to training data
+        if self.self_labeled_x is not None and self.self_labeled_y is not None:
+            x_train = np.concatenate((x_train, self.self_labeled_x))
+            y_train = np.concatenate((y_train, self.self_labeled_y))
+
+        for x_one, y_one in zip(x_train, y_train):
+            self.classifier.learn_one(dict(enumerate(x_one)), y_one)
+
+        assert x_cal.shape[0] > 0, "Calibration data must not be empty"
+        assert x_unlabeled.shape[0] > 0, "Unlabeled data must not be empty"
+        """Issues arise when not enough labeled data is available for calibration.
+        This can be solved by increasing the calibration split or increasing the
+        batch size.
+        """
+
+        # Use conformal prediction to label some unlabeled data
+        p_values, labels = Prediction_by_CP(
+            self._num,
+            self.classifier,
+            x_cal,
+            y_cal,
+            x_unlabeled,
+            self.class_count,
+            self.significance_level,
+        )
+
+        # Add newly labeled data to self-labeled data
+        self.self_labeled_x, self.self_labeled_y = Informatives_selection(
+            x_unlabeled, p_values, labels, self.class_count
+        )
+
+    def instance_to_dict(self, instance: Instance) -> Dict[str, float]:
+        """Convert an instance to a dictionary with the feature names as keys."""
+        return dict(enumerate(instance.x))
+
+    def skmf_to_river(self, x):
+        return dict(enumerate(x))
+
+    def predict(self, instance: Instance):
+        class_index = self.classifier.predict_one(self.instance_to_dict(instance))
+        if class_index is None:
+            return None
+        return class_index
+
+    def predict_proba(self, instance):
+        raise NotImplementedError()
+
+    def __str__(self):
+        return f"CPSSDS(significance_level={self.significance_level})"
diff --git a/src/capymoa/learner/ssl/classifier/OSNN.py b/src/capymoa/ssl/classifier/_osnn.py
similarity index 65%
rename from src/capymoa/learner/ssl/classifier/OSNN.py
rename to src/capymoa/ssl/classifier/_osnn.py
index 8b673138..9ced0e30 100644
--- a/src/capymoa/learner/ssl/classifier/OSNN.py
+++ b/src/capymoa/ssl/classifier/_osnn.py
@@ -6,28 +6,30 @@
 CapyMOA implementation by Botao, Anton
 
 """
+
 import numpy as np
 import random
 import torch.nn as nn
 import torch
 from scipy.spatial.distance import cdist
 
-from capymoa.learner import ClassifierSSL
+from capymoa.base import ClassifierSSL
 
 
 def kernel_fun(a, b, sigma):
     A = torch.sum((a - b) ** 2, dim=1)
-    B = A / (2 * sigma ** 2)
+    B = A / (2 * sigma**2)
     C = torch.exp(-B)
     return C
 
-def Euclidean_Distances(a,b):
-    dis = torch.sqrt(torch.sum((a-b)**2, dim=1))
+
+def Euclidean_Distances(a, b):
+    dis = torch.sqrt(torch.sum((a - b) ** 2, dim=1))
     return dis
 
-class OSNeuralNetwork(nn.Module):
 
-    def __init__(self, num_center, n_out, window_size, beta=1, gamma = 1):
+class OSNeuralNetwork(nn.Module):
+    def __init__(self, num_center, n_out, window_size, beta=1, gamma=1):
         super(OSNeuralNetwork, self).__init__()
         self.n_out = n_out
         self.num_centers = num_center
@@ -66,37 +68,52 @@ def initialize_weights(self):
                 m.bias.data.zero_()
 
     def update_sigma(self):
-        #The width of basis function is set to a proportion β of the mean of the Euclidean distances to the other centers.
+        # The width of basis function is set to a proportion β of the mean of the Euclidean distances to the other centers.
         self.sigma = torch.ones(1, self.num_centers)
         for i in range(self.num_centers):
             dis = Euclidean_Distances(self.centers[i], self.centers)
-            dis = torch.sum(dis)/(self.num_centers)
-            self.sigma[0][i] = dis*self.beta
+            dis = torch.sum(dis) / (self.num_centers)
+            self.sigma[0][i] = dis * self.beta
 
     def window_update(self, data, label):
-        #The window is updated according to random sampling, and the first-in-first-out principle is adopted.
+        # The window is updated according to random sampling, and the first-in-first-out principle is adopted.
         if self.i == 0:
-            self.data_window = torch.zeros([self.window_size, data.size(1)], dtype=torch.float32)
-            self.label_window = torch.zeros([self.window_size, self.n_out], dtype=torch.float32)
+            self.data_window = torch.zeros(
+                [self.window_size, data.size(1)], dtype=torch.float32
+            )
+            self.label_window = torch.zeros(
+                [self.window_size, self.n_out], dtype=torch.float32
+            )
             self.label_index = torch.zeros((self.window_size, 1), dtype=torch.float32)
 
-            self.linear = nn.Sequential(nn.Linear(self.num_centers + data.size(1), self.n_out, bias=True)
-                                        , nn.Sigmoid())
+            self.linear = nn.Sequential(
+                nn.Linear(self.num_centers + data.size(1), self.n_out, bias=True),
+                nn.Sigmoid(),
+            )
 
         for i in range(data.size(0)):
-
-            self.data_window = torch.cat([self.data_window[1:, :], data[i:i+1, :]], dim=0)
-            self.label_window = torch.cat([self.label_window[1:, :], label[i:i+1, :]], dim=0)
+            self.data_window = torch.cat(
+                [self.data_window[1:, :], data[i : i + 1, :]], dim=0
+            )
+            self.label_window = torch.cat(
+                [self.label_window[1:, :], label[i : i + 1, :]], dim=0
+            )
 
             if label[i] != -1:
-                self.label_index = torch.cat([self.label_index[1:, :], torch.ones(1, 1)], dim=0)
+                self.label_index = torch.cat(
+                    [self.label_index[1:, :], torch.ones(1, 1)], dim=0
+                )
             else:
-                self.label_index = torch.cat([self.label_index[1:, :], torch.zeros(1, 1)], dim=0)
+                self.label_index = torch.cat(
+                    [self.label_index[1:, :], torch.zeros(1, 1)], dim=0
+                )
 
             self.i = self.i + 1
 
             if self.i == self.window_size:
-                index = torch.LongTensor(random.sample(range(self.data_window.size(0)), self.num_centers))
+                index = torch.LongTensor(
+                    random.sample(range(self.data_window.size(0)), self.num_centers)
+                )
                 self.centers = torch.index_select(self.data_window, 0, index)
                 self.initialize_weights()
 
@@ -110,17 +127,25 @@ def window_update(self, data, label):
         return update
 
     def center_adjustment(self):
-        #The samples are assigned to the nearest RBF centers, and then each center is updated according to the assigned samples.
-        distances = np.linalg.norm(self.data_window[:, np.newaxis] - self.centers, axis=2)
+        # The samples are assigned to the nearest RBF centers, and then each center is updated according to the assigned samples.
+        distances = np.linalg.norm(
+            self.data_window[:, np.newaxis] - self.centers, axis=2
+        )
         nearest_centers = np.argmin(distances, axis=1)
-        assigned_samples = [self.data_window[nearest_centers == i] for i in range(len(self.centers))]
-        assigned_labels = [self.label_window[nearest_centers == i] for i in range(len(self.centers))]
-        assigned_label_index = [self.label_index[nearest_centers == i] for i in range(len(self.centers))]
+        assigned_samples = [
+            self.data_window[nearest_centers == i] for i in range(len(self.centers))
+        ]
+        assigned_labels = [
+            self.label_window[nearest_centers == i] for i in range(len(self.centers))
+        ]
+        assigned_label_index = [
+            self.label_index[nearest_centers == i] for i in range(len(self.centers))
+        ]
 
         for i in range(self.num_centers):
             if len(assigned_samples) > 0:
-                unlabel_index = torch.squeeze(assigned_label_index[i] == 0., 1)
-                label_index = torch.squeeze(assigned_label_index[i] == 1., 1)
+                unlabel_index = torch.squeeze(assigned_label_index[i] == 0.0, 1)
+                label_index = torch.squeeze(assigned_label_index[i] == 1.0, 1)
 
                 unlabel_sample = assigned_samples[i][unlabel_index]
                 label_sample = assigned_samples[i][label_index]
@@ -133,30 +158,42 @@ def center_adjustment(self):
                     majorit_class = unique[np.argmax(counts)]
                     minorit_class = unique[np.argmin(counts)]
                     if majorit_class == minorit_class:
-                        self.centers[i] = (torch.mean(unlabel_sample, axis=0) + torch.mean(label_sample, axis=0))/2
+                        self.centers[i] = (
+                            torch.mean(unlabel_sample, axis=0)
+                            + torch.mean(label_sample, axis=0)
+                        ) / 2
                     else:
                         majorit_sample = label_sample[labels.flatten() == majorit_class]
                         minorit_sample = label_sample[labels.flatten() == minorit_class]
-                        a = (majorit_sample.sum(dim=0) + minorit_sample.sum(dim=0))/len(label_sample)
+                        a = (
+                            majorit_sample.sum(dim=0) + minorit_sample.sum(dim=0)
+                        ) / len(label_sample)
                         b = torch.mean(unlabel_sample, axis=0)
-                        c = ((len(majorit_sample) - len(minorit_sample))/len(label_sample)) + 1
-                        self.centers[i] = (a + b)/c
+                        c = (
+                            (len(majorit_sample) - len(minorit_sample))
+                            / len(label_sample)
+                        ) + 1
+                        self.centers[i] = (a + b) / c
                 elif len(label_sample) > 0 and len(unlabel_sample == 0):
                     unique, counts = np.unique(labels, return_counts=True)
                     majorit_class = unique[np.argmax(counts)]
                     minorit_class = unique[np.argmin(counts)]
                     majorit_sample = label_sample[labels.flatten() == majorit_class]
                     minorit_sample = label_sample[labels.flatten() == minorit_class]
-                    a = (majorit_sample.sum(dim=0) + minorit_sample.sum(dim=0)) / len(label_sample)
-                    c = ((len(majorit_sample) - len(minorit_sample)) / len(label_sample))
+                    a = (majorit_sample.sum(dim=0) + minorit_sample.sum(dim=0)) / len(
+                        label_sample
+                    )
+                    c = (len(majorit_sample) - len(minorit_sample)) / len(label_sample)
                     self.centers[i] = a / c
             else:
-                self.centers[i] = self.data_window[torch.randint(self.data_window.shape[0], size=(1,))][0]
+                self.centers[i] = self.data_window[
+                    torch.randint(self.data_window.shape[0], size=(1,))
+                ][0]
 
         self.update_sigma()
 
     def pseudo_label(self):
-        #Pseudo-labels for unlabeled samples are calculated based on the true labels of labeled samples and the output of the network on unlabeled samples.
+        # Pseudo-labels for unlabeled samples are calculated based on the true labels of labeled samples and the output of the network on unlabeled samples.
         V = torch.cat([self.data_window, self.centers], dim=0)
         label = np.vstack((self.label_window, np.zeros((self.num_centers, 1))))
         label_index = np.vstack((self.label_index, np.zeros((self.num_centers, 1))))
@@ -167,18 +204,19 @@ def pseudo_label(self):
         nearest_distances = np.sort(distances, axis=1)[:, 1]
         nearest_distances = self.gamma * nearest_distances.reshape(-1, 1)
 
-        S = np.exp(-1 * np.square(distances)/(nearest_distances+1e-8))
+        S = np.exp(-1 * np.square(distances) / (nearest_distances + 1e-8))
         y = np.where(label_index, label, pre.detach().numpy())
-        U = np.dot(S, y)/np.sum(S, axis=1).reshape(-1, 1)
+        U = np.dot(S, y) / np.sum(S, axis=1).reshape(-1, 1)
 
         U = np.where(label_index, label, U)
 
-        self.plabel_window = torch.from_numpy(U[:len(U)-self.num_centers])
+        self.plabel_window = torch.from_numpy(U[: len(U) - self.num_centers])
 
     def return_window(self):
-        #Returns the samples, pseudo-labels and true labels within the windows.
+        # Returns the samples, pseudo-labels and true labels within the windows.
         return self.data_window, self.plabel_window, self.label_index
 
+
 class def_loss(nn.Module):
     def __init__(self, model, lam=0.3, alpha=0.2):
         super().__init__()
@@ -190,14 +228,14 @@ def L2loss(self):
         # l2 regularization on the network weights.
         l2_loss = torch.tensor(0.0, requires_grad=True)
         for name, parma in self.model.named_parameters():
-            if 'bias' not in name:
+            if "bias" not in name:
                 l2_loss = l2_loss + (0.5 * torch.sum(torch.pow(parma, 2)))
         return l2_loss
 
     def forward(self, y_pred, y_true, label_index):
-        #Computes supervised loss for labeled samples and unsupervised loss for unlabeled samples.
-        labeled = torch.squeeze(label_index == 1., 1)
-        unlabeled = torch.squeeze(label_index == 0., 1)
+        # Computes supervised loss for labeled samples and unsupervised loss for unlabeled samples.
+        labeled = torch.squeeze(label_index == 1.0, 1)
+        unlabeled = torch.squeeze(label_index == 0.0, 1)
 
         y_pred_labeled = y_pred[labeled]
         y_true_label = y_true[labeled]
@@ -205,17 +243,24 @@ def forward(self, y_pred, y_true, label_index):
         y_pred_unlabeled = y_pred[unlabeled]
         y_sudo_unlabeled = y_true[unlabeled]
 
-        first_item = -torch.mean(y_true_label * torch.log(y_pred_labeled + 1e-8) + (1 - y_true_label) * torch.log(1 - y_pred_labeled + 1e-8))
-        second_item = -torch.mean(y_sudo_unlabeled * torch.log(y_pred_unlabeled + 1e-8) + (1 - y_sudo_unlabeled) * torch.log(1 - y_pred_unlabeled + 1e-8))
+        first_item = -torch.mean(
+            y_true_label * torch.log(y_pred_labeled + 1e-8)
+            + (1 - y_true_label) * torch.log(1 - y_pred_labeled + 1e-8)
+        )
+        second_item = -torch.mean(
+            y_sudo_unlabeled * torch.log(y_pred_unlabeled + 1e-8)
+            + (1 - y_sudo_unlabeled) * torch.log(1 - y_pred_unlabeled + 1e-8)
+        )
         l2_loss = self.L2loss() / len(y_pred)
 
-        loss = first_item + self.lam*second_item + self.alpha*l2_loss
+        loss = first_item + self.lam * second_item + self.alpha * l2_loss
         return loss
 
 
 class OSNN(ClassifierSSL):
     def __init__(
         self,
+        schema=None,
         num_center=10,
         n_out=1,
         window_size=200,
diff --git a/src/capymoa/stream/PytorchStream.py b/src/capymoa/stream/PytorchStream.py
index 3d664c4f..cf94a494 100644
--- a/src/capymoa/stream/PytorchStream.py
+++ b/src/capymoa/stream/PytorchStream.py
@@ -1,11 +1,8 @@
-from jpype import JObject
-
-import numpy as np
 import torch
 
 from capymoa.stream import Stream, Schema
-from capymoa.stream.stream import _init_moa_stream_and_create_moa_header
-from capymoa.stream.instance import (
+from capymoa.stream._stream import _init_moa_stream_and_create_moa_header
+from capymoa.instance import (
     LabeledInstance,
     RegressionInstance,
 )
diff --git a/src/capymoa/stream/__init__.py b/src/capymoa/stream/__init__.py
index 2e5c4cb0..dc19e74e 100644
--- a/src/capymoa/stream/__init__.py
+++ b/src/capymoa/stream/__init__.py
@@ -1,11 +1,4 @@
-from .stream import (
-    Stream,
-    Schema,
-    ARFFStream,
-    stream_from_file,
-    CSVStream
-)
-from .generator import RandomTreeGenerator
+from ._stream import Stream, Schema, ARFFStream, stream_from_file, CSVStream
 from .PytorchStream import PytorchStream
 
 __all__ = [
@@ -13,7 +6,6 @@
     "Schema",
     "stream_from_file",
     "ARFFStream",
-    "RandomTreeGenerator",
     "PytorchStream",
-    "CSVStream"
+    "CSVStream",
 ]
diff --git a/src/capymoa/stream/stream.py b/src/capymoa/stream/_stream.py
similarity index 86%
rename from src/capymoa/stream/stream.py
rename to src/capymoa/stream/_stream.py
index c8900b5d..15027262 100644
--- a/src/capymoa/stream/stream.py
+++ b/src/capymoa/stream/_stream.py
@@ -15,7 +15,7 @@
 
 # MOA/Java imports
 
-from capymoa.stream.instance import (
+from capymoa.instance import (
     Instance,
     LabeledInstance,
     RegressionInstance,
@@ -35,9 +35,9 @@ class Schema:
     """
 
     def __init__(self, moa_header: InstancesHeader):
-        """Construct a schema by wrapping a :class:`InstancesHeader`.
+        """Construct a schema by wrapping a ``InstancesHeader``.
 
-        To create a schema without an :class:`InstancesHeader` use
+        To create a schema without an ``InstancesHeader`` use
         :meth:`from_custom` method.
 
         :param moa_header: A Java MOA header object.
@@ -218,7 +218,7 @@ def __init__(
     ):
         """Construct a Stream from a MOA stream object.
 
-        Usually, you will want to construct a Stream using the :func:`stream_from_file`
+        Usually, you will want to construct a Stream using the :func:`capymoa.stream.stream_from_file`
         function.
 
         :param moa_stream: The MOA stream object to read instances from. Is None
@@ -429,7 +429,10 @@ def stream_from_file(
         targets = targets.astype(int)
         x_features = x_features[:, :-1]
         return NumpyStream(
-            x_features, targets, dataset_name=dataset_name, enforce_regression=enforce_regression
+            x_features,
+            targets,
+            dataset_name=dataset_name,
+            enforce_regression=enforce_regression,
         )
 
 
@@ -571,18 +574,20 @@ def _add_instances_to_moa_stream(moa_stream, moa_header, X, y):
 
         moa_stream.add(instance)
 
-class CSVStream(Stream):
-    def __init__(self,
-                 csv_file_path,
-                 dtypes: list = None,  # [('column1', np.float64), ('column2', np.int32), ('column3', np.float64), ('column3', str)] reads nomonal attributes as str
-                 values_for_nominal_features={}, # {i: [1,2,3], k: [Aa, BB]}. Key is integer. Values are turned into strings
-                 class_index: int = -1,
-                 values_for_class_label: list = None,
-                 target_attribute_name=None,
-                 enforce_regression=False,
-                 skip_header: bool = False,
-                 delimiter=','):
 
+class CSVStream(Stream):
+    def __init__(
+        self,
+        csv_file_path,
+        dtypes: list = None,  # [('column1', np.float64), ('column2', np.int32), ('column3', np.float64), ('column3', str)] reads nomonal attributes as str
+        values_for_nominal_features={},  # {i: [1,2,3], k: [Aa, BB]}. Key is integer. Values are turned into strings
+        class_index: int = -1,
+        values_for_class_label: list = None,
+        target_attribute_name=None,
+        enforce_regression=False,
+        skip_header: bool = False,
+        delimiter=",",
+    ):
         self.csv_file_path = csv_file_path
         self.values_for_nominal_features = values_for_nominal_features
         self.class_index = class_index
@@ -592,56 +597,96 @@ def __init__(self,
         self.skip_header = skip_header
         self.delimiter = delimiter
 
-        self.dtypes = [] # [('column1', np.float64), ('column2', np.int32), ('column3', np.float64), ('column3', str)] reads nomonal attributes as str
-        if dtypes is None or len(dtypes) == 0: # data definition for each column not provided
-            if len(self.values_for_nominal_features) == 0: # data definition for nominal features are given
+        self.dtypes = (
+            []
+        )  # [('column1', np.float64), ('column2', np.int32), ('column3', np.float64), ('column3', str)] reads nomonal attributes as str
+        if (
+            dtypes is None or len(dtypes) == 0
+        ):  # data definition for each column not provided
+            if (
+                len(self.values_for_nominal_features) == 0
+            ):  # data definition for nominal features are given
                 # need to infer number of columns, then generate full data definition using nominal information
                 # LOADS FIRST TWO ROWS INTO THE MEMORY
-                data = np.genfromtxt(self.csv_file_path, delimiter=self.delimiter, dtype=None, names=True,
-                                     skip_header=0, max_rows=2)
+                data = np.genfromtxt(
+                    self.csv_file_path,
+                    delimiter=self.delimiter,
+                    dtype=None,
+                    names=True,
+                    skip_header=0,
+                    max_rows=2,
+                )
                 if not self.enforce_regression and self.values_for_class_label is None:
                     # LOADS THE FULL FILE INTO THE MEMORY
-                    data = np.genfromtxt(self.csv_file_path, delimiter=self.delimiter, dtype=None, names=True,
-                                         skip_header=1 if skip_header else 0)
+                    data = np.genfromtxt(
+                        self.csv_file_path,
+                        delimiter=self.delimiter,
+                        dtype=None,
+                        names=True,
+                        skip_header=1 if skip_header else 0,
+                    )
                     y = data[data.dtype.names[self.class_index]]
                     self.values_for_class_label = [str(value) for value in np.unique(y)]
                 for i, data_info in enumerate(data.dtype.descr):
                     column_name, data_type = data_info
-                    if self.values_for_nominal_features.get(i) is not None: # i is in nominal feature keys
-                        self.dtypes.append((column_name, 'str'))
+                    if (
+                        self.values_for_nominal_features.get(i) is not None
+                    ):  # i is in nominal feature keys
+                        self.dtypes.append((column_name, "str"))
                     else:
                         self.dtypes.append((column_name, data_type))
-            else: # need to infer data definitions
+            else:  # need to infer data definitions
                 # LOADS THE FULL FILE INTO THE MEMORY
-                data = np.genfromtxt(self.csv_file_path, delimiter=self.delimiter, dtype=None, names=True,
-                                     skip_header=1 if skip_header else 0)
+                data = np.genfromtxt(
+                    self.csv_file_path,
+                    delimiter=self.delimiter,
+                    dtype=None,
+                    names=True,
+                    skip_header=1 if skip_header else 0,
+                )
                 self.dtypes = data.dtype
                 if not self.enforce_regression and self.values_for_class_label is None:
                     y = data[data.dtype.names[self.class_index]]
                     self.values_for_class_label = [str(value) for value in np.unique(y)]
-        else: # data definition for each column are provided
+        else:  # data definition for each column are provided
             self.dtypes = dtypes
 
         self.total_number_of_lines = 0
         if self.skip_header:
             self.n_lines_to_skip = 1
         else:
-            row1_data = np.genfromtxt(self.csv_file_path, delimiter=self.delimiter, dtype=None, names=True, skip_header=0,max_rows=1)
-            row2_data = np.genfromtxt(self.csv_file_path, delimiter=self.delimiter, dtype=None, names=True, skip_header=1, max_rows=1)
+            row1_data = np.genfromtxt(
+                self.csv_file_path,
+                delimiter=self.delimiter,
+                dtype=None,
+                names=True,
+                skip_header=0,
+                max_rows=1,
+            )
+            row2_data = np.genfromtxt(
+                self.csv_file_path,
+                delimiter=self.delimiter,
+                dtype=None,
+                names=True,
+                skip_header=1,
+                max_rows=1,
+            )
             if row1_data.dtype.names != row2_data.dtype.names:
                 self.n_lines_to_skip = 1
             else:
                 self.n_lines_to_skip = 0
 
-        self.__moa_stream_with_only_header, self.moa_header = _init_moa_stream_and_create_moa_header(
-                number_of_instances=1, # we only need this to initialize the MOA header
-                feature_names = [data_info[0] for data_info in  self.dtypes],
-                values_for_nominal_features = self.values_for_nominal_features,
-                values_for_class_label = self.values_for_class_label,
+        self.__moa_stream_with_only_header, self.moa_header = (
+            _init_moa_stream_and_create_moa_header(
+                number_of_instances=1,  # we only need this to initialize the MOA header
+                feature_names=[data_info[0] for data_info in self.dtypes],
+                values_for_nominal_features=self.values_for_nominal_features,
+                values_for_class_label=self.values_for_class_label,
                 dataset_name="CSVDataset",
-                target_attribute_name = self.target_attribute_name,
-                enforce_regression = self.enforce_regression,
+                target_attribute_name=self.target_attribute_name,
+                enforce_regression=self.enforce_regression,
             )
+        )
 
         self.schema = Schema(moa_header=self.moa_header)
         super().__init__(schema=self.schema, CLI=None, moa_stream=None)
@@ -660,15 +705,32 @@ def next_instance(self):
         if not self.has_more_instances():
             return None
         # skip header
-        data = np.genfromtxt(self.csv_file_path, delimiter=self.delimiter, dtype=self.dtypes, names=None, skip_header=self.n_lines_to_skip, max_rows=1)
+        data = np.genfromtxt(
+            self.csv_file_path,
+            delimiter=self.delimiter,
+            dtype=self.dtypes,
+            names=None,
+            skip_header=self.n_lines_to_skip,
+            max_rows=1,
+        )
         # np.genfromtxt() returns a structured https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays
         self.n_lines_to_skip += 1
 
         # data = np.expand_dims(data, axis=0)
         # y = data[[data.dtype.names[self.class_index]]].view('i4')
-        y = rfn.structured_to_unstructured(data[[data.dtype.names[self.class_index]]])[0]
+        y = rfn.structured_to_unstructured(data[[data.dtype.names[self.class_index]]])[
+            0
+        ]
         # X = data[[item for item in data.dtype.names if item != data.dtype.names[self.class_index]]].view('f4')
-        X = rfn.structured_to_unstructured(data[[item for item in data.dtype.names if item != data.dtype.names[self.class_index]]])
+        X = rfn.structured_to_unstructured(
+            data[
+                [
+                    item
+                    for item in data.dtype.names
+                    if item != data.dtype.names[self.class_index]
+                ]
+            ]
+        )
 
         if self.schema.is_classification():
             return LabeledInstance.from_array(self.schema, X, y)
@@ -688,4 +750,4 @@ def get_moa_stream(self):
 
     def restart(self):
         self.total_number_of_lines = 0
-        self.n_lines_to_skip = 1 if self.skip_header else 0
\ No newline at end of file
+        self.n_lines_to_skip = 1 if self.skip_header else 0
diff --git a/src/capymoa/stream/drift.py b/src/capymoa/stream/drift.py
index f60a77c7..81451b0c 100644
--- a/src/capymoa/stream/drift.py
+++ b/src/capymoa/stream/drift.py
@@ -2,7 +2,7 @@
 
 import re
 
-from capymoa.stream.stream import Stream
+from capymoa.stream._stream import Stream
 from capymoa._utils import _get_moa_creation_CLI
 from moa.streams import ConceptDriftStream as MOA_ConceptDriftStream
 
diff --git a/tasks.py b/tasks.py
index fc4e71fe..7c3a9d65 100644
--- a/tasks.py
+++ b/tasks.py
@@ -34,13 +34,14 @@ def all_exist(files: List[str] = None, directories: List[str] = None) -> bool:
 def docs_build(ctx: Context, ignore_warnings: bool = False):
     """Build the documentation using Sphinx."""
     warn = "-W" if not ignore_warnings else ""
+    nitpicky = "-n" if not ignore_warnings else ""
 
     doc_dir = Path("docs/_build")
     doc_dir.mkdir(exist_ok=True, parents=True)
     cpu = cpu_count() // 2
     print("Building documentation...")
 
-    ctx.run(f"python -m sphinx build {warn} --color -E -b html docs {doc_dir}")
+    ctx.run(f"python -m sphinx build {warn} {nitpicky} --color -E -b html docs {doc_dir}")
 
     print("-" * 80)
     print("Documentation is built and available at:")
diff --git a/test_utility/ssl_helpers.py b/test_utility/ssl_helpers.py
deleted file mode 100644
index 560ae67a..00000000
--- a/test_utility/ssl_helpers.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import pytest
-from capymoa.evaluation.evaluation import prequential_SSL_evaluation
-from capymoa.learner import ClassifierSSL
-from capymoa.stream import Stream
-
-def assert_ssl_evaluation(
-    learner: ClassifierSSL,
-    stream: Stream,
-    expectation: float,
-    label_probability: float = 0.01,
-    max_instances: int = 1000,
-):
-    results = prequential_SSL_evaluation(
-        stream=stream,
-        learner=learner,
-        label_probability=label_probability,
-        window_size=10,
-        max_instances=max_instances,
-    )
-
-    assert results["cumulative"].accuracy() == pytest.approx(expectation), \
-        f"Expected accuracy of {expectation} but got {results['cumulative'].accuracy()}" + \
-        f" for learner {learner} on stream {stream}"
diff --git a/tests/test_CPSSDS.py b/tests/test_CPSSDS.py
deleted file mode 100644
index 81cd87cc..00000000
--- a/tests/test_CPSSDS.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from capymoa.datasets.datasets import ElectricityTiny, CovtypeTiny
-from capymoa.learner.ssl.classifier.CPSSDS import CPSSDS
-from test_utility.ssl_helpers import assert_ssl_evaluation
-import pytest
-
-
-@pytest.mark.parametrize(
-    "learner, stream, expectation",
-    [
-        ("NaiveBayes", ElectricityTiny(), 76.6),
-        ("HoeffdingTree", ElectricityTiny(), 66.2),
-        ("NaiveBayes", CovtypeTiny(), 55.7),
-        ("HoeffdingTree", CovtypeTiny(), 53.3),
-    ],
-    ids=[
-        "ElectricityTiny-NaiveBayes",
-        "ElectricityTiny-HoeffdingTree",
-        "CovtypeTiny-NaiveBayes",
-        "CovtypeTiny-HoeffdingTree",
-    ],
-)
-def test_CPSSDS(learner, stream, expectation):
-    assert_ssl_evaluation(
-        CPSSDS(learner, 100, schema=stream.schema),
-        stream,
-        expectation,
-        label_probability=0.5,
-    )
diff --git a/tests/test_OSNN.py b/tests/test_OSNN.py
deleted file mode 100644
index a75c0556..00000000
--- a/tests/test_OSNN.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from capymoa.datasets.datasets import ElectricityTiny, CovtypeTiny
-from test_utility.ssl_helpers import assert_ssl_evaluation
-import pytest
-import importlib
-
-@pytest.mark.parametrize(
-    "stream, expectation",
-    [
-        (ElectricityTiny(), 46.1),
-        (CovtypeTiny(), 26.3),
-    ],
-    ids=["ElectricityTiny", "CovtypeTiny"],
-)
-def test_OSNN(stream, expectation):
-    pytest.importorskip("torch.nn", reason="PyTorch not installed. Skipping test.")
-    OSNN = importlib.import_module("capymoa.learner.ssl.classifier.OSNN").OSNN
-    # The optimizer steps are set to 10 to speed up the test
-    learner = OSNN(optim_steps=10)
-    assert_ssl_evaluation(
-        learner,
-        stream,
-        expectation,
-    )
diff --git a/tests/test_batch.py b/tests/test_batch.py
index ad72451c..b2fc7206 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -1,6 +1,6 @@
-from capymoa.datasets.datasets import ElectricityTiny
-from capymoa.learner.ssl.classifier.batch import BatchClassifierSSL
-from capymoa.stream.stream import Schema, NumpyStream
+from capymoa.datasets._datasets import ElectricityTiny
+from capymoa.ssl.classifier._batch import BatchClassifierSSL
+from capymoa.stream._stream import Schema, NumpyStream
 from capymoa.evaluation.evaluation import prequential_SSL_evaluation
 import numpy as np
 
diff --git a/tests/test_classifiers.py b/tests/test_classifiers.py
index ac8599a3..b619d227 100644
--- a/tests/test_classifiers.py
+++ b/tests/test_classifiers.py
@@ -1,22 +1,23 @@
 from capymoa.evaluation import ClassificationEvaluator, ClassificationWindowedEvaluator
-from capymoa.learner.classifier import (
+from capymoa.classifier import (
     EFDT,
     HoeffdingTree,
     AdaptiveRandomForest,
     OnlineBagging,
     NaiveBayes,
 )
-from capymoa.learner import Classifier, MOAClassifier
+from capymoa.base import Classifier
+from capymoa.base import MOAClassifier
 from capymoa.datasets import ElectricityTiny
 import pytest
 from functools import partial
 from typing import Callable, Optional
-from capymoa.learner.learners import _extract_moa_learner_CLI
-from capymoa.learner.splitcriteria import InfoGainSplitCriterion
+from capymoa.base import _extract_moa_learner_CLI
+from capymoa.splitcriteria import InfoGainSplitCriterion
 
-from capymoa.stream.stream import Schema
+from capymoa.stream._stream import Schema
 
-from capymoa.learner.classifier.sklearn import PassiveAggressiveClassifier
+from capymoa.classifier import PassiveAggressiveClassifier
 
 
 @pytest.mark.parametrize(
@@ -34,8 +35,14 @@
         ),
         (partial(NaiveBayes), 84.0, 91.0, None),
     ],
-    ids=["OnlineBagging", "AdaptiveRandomForest", "HoeffdingTree", "EFDT", "EFDT_gini", "NaiveBayes"],
-
+    ids=[
+        "OnlineBagging",
+        "AdaptiveRandomForest",
+        "HoeffdingTree",
+        "EFDT",
+        "EFDT_gini",
+        "NaiveBayes",
+    ],
 )
 def test_classifiers(
     learner_constructor: Callable[[Schema], Classifier],
@@ -62,7 +69,6 @@ def test_classifiers(
     )
 
     learner: Classifier = learner_constructor(schema=stream.get_schema())
-    # learner = learner_constructor(schema=stream.get_schema())
 
     while stream.has_more_instances():
         instance = stream.next_instance()
@@ -71,6 +77,7 @@ def test_classifiers(
         win_evaluator.update(instance.y_index, prediction)
         learner.train(instance)
 
+    # Check if the accuracy matches the expected value for both evaluator types
     actual_acc = evaluator.accuracy()
     actual_win_acc = win_evaluator.accuracy()
     assert actual_acc == pytest.approx(
@@ -80,11 +87,7 @@ def test_classifiers(
         win_accuracy, abs=0.1
     ), f"Windowed Eval: Expected accuracy of {win_accuracy:0.1f} got {actual_win_acc:0.1f}"
 
+    # Optionally check the CLI string if it was provided
     if isinstance(learner, MOAClassifier) and cli_string is not None:
         cli_str = _extract_moa_learner_CLI(learner).strip("()")
-        assert (
-            cli_str == cli_string
-        ), "CLI does not match expected value"
-
-    # assert evaluator.accuracy() == pytest.approx(accuracy, abs=0.1)
-    # assert win_evaluator.accuracy() == pytest.approx(win_accuracy, abs=0.1)
+        assert cli_str == cli_string, "CLI does not match expected value"
diff --git a/tests/test_regressors.py b/tests/test_regressors.py
index de601c10..14ba21ca 100644
--- a/tests/test_regressors.py
+++ b/tests/test_regressors.py
@@ -1,6 +1,6 @@
 from capymoa.evaluation import RegressionEvaluator, RegressionWindowedEvaluator
 from capymoa.datasets import Fried
-from capymoa.learner.regressor import (
+from capymoa.regressor import (
     KNNRegressor,
     AdaptiveRandomForestRegressor,
     FIMTDD,
@@ -12,7 +12,7 @@
 import pytest
 from functools import partial
 
-from capymoa.learner import Regressor
+from capymoa.base import Regressor
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_ssl_classifiers.py b/tests/test_ssl_classifiers.py
new file mode 100644
index 00000000..79cec583
--- /dev/null
+++ b/tests/test_ssl_classifiers.py
@@ -0,0 +1,64 @@
+from capymoa.datasets._datasets import ElectricityTiny, CovtypeTiny
+from capymoa.ssl.classifier import OSNN, CPSSDS
+import pytest
+
+import pytest
+from capymoa.evaluation.evaluation import prequential_SSL_evaluation
+from capymoa.base import ClassifierSSL
+from capymoa.stream import Stream
+from functools import partial
+
+
+def assert_ssl_evaluation(
+    learner: ClassifierSSL,
+    stream: Stream,
+    expectation: float,
+    label_probability: float = 0.01,
+    max_instances: int = 1000,
+):
+    results = prequential_SSL_evaluation(
+        stream=stream,
+        learner=learner,
+        label_probability=label_probability,
+        window_size=10,
+        max_instances=max_instances,
+    )
+
+    assert results["cumulative"].accuracy() == pytest.approx(expectation), (
+        f"Expected accuracy of {expectation} but got {results['cumulative'].accuracy()}"
+        + f" for learner {learner} on stream {stream}"
+    )
+
+@pytest.mark.parametrize(
+    "learner_constructor, stream_constructor, expectation, label_probability",
+    [
+        (partial(OSNN, optim_steps=10), ElectricityTiny, 46.1, None),
+        (partial(OSNN, optim_steps=10), CovtypeTiny, 26.3, None),
+        (partial(CPSSDS, batch_size=100, base_model="NaiveBayes"), ElectricityTiny, 76.6, 0.5),
+        (partial(CPSSDS, batch_size=100, base_model="HoeffdingTree"), ElectricityTiny, 66.2, 0.5),
+        (partial(CPSSDS, batch_size=100, base_model="NaiveBayes"), CovtypeTiny, 55.7, 0.5),
+        (partial(CPSSDS, batch_size=100, base_model="HoeffdingTree"), CovtypeTiny, 53.3, 0.5),
+    ],
+    ids=[
+        "OSNN_ElectricityTiny", 
+        "OSNN_CovtypeTiny",
+        "CPSSDS_ElectricityTiny-NaiveBayes",
+        "CPSSDS_ElectricityTiny-HoeffdingTree",
+        "CPSSDS_CovtypeTiny-NaiveBayes",
+        "CPSSDS_CovtypeTiny-HoeffdingTree",
+    ],
+)
+def test_ssl_classifiers(learner_constructor, stream_constructor, expectation, label_probability):
+    # The optimizer steps are set to 10 to speed up the test
+    stream = stream_constructor()
+    learner = learner_constructor(schema=stream.get_schema())
+
+    if label_probability is None:
+        label_probability = 0.01
+
+    assert_ssl_evaluation(
+        learner,
+        stream,
+        expectation,
+        label_probability=label_probability,
+    )
diff --git a/tests/test_stream.py b/tests/test_stream.py
index 4801bd90..eb5356d6 100644
--- a/tests/test_stream.py
+++ b/tests/test_stream.py
@@ -1,5 +1,6 @@
 """This module is for testing the speeds of different stream implementations.
 """
+
 import time
 from capymoa.stream import stream_from_file
 from cProfile import Profile
@@ -7,33 +8,41 @@
 import numpy as np
 
 from capymoa.stream import Stream
-from capymoa.stream.instance import Instance
-from capymoa.stream.stream import CSVStream
+from capymoa.instance import Instance
+from capymoa.stream._stream import CSVStream
 import csv
 
+
 def _get_streams() -> List[Stream]:
     return [
         stream_from_file("data/electricity_tiny.csv"),
         stream_from_file("data/electricity_tiny.arff"),
-        CSVStream("data/electricity_tiny.csv")
+        CSVStream("data/electricity_tiny.csv"),
     ]
 
+
 def test_stream_consistency():
     streams = _get_streams()
 
     def _has_more_instance():
         return [stream.has_more_instances() for stream in streams]
-    
+
     def _next_instance():
         return [stream.next_instance() for stream in streams]
-    
+
     i = 0
     while any(_has_more_instance()):
-        assert all(_has_more_instance()), "Not all streams have the same number of instances"
+        assert all(
+            _has_more_instance()
+        ), "Not all streams have the same number of instances"
         i += 1
 
         instances = _next_instance()
         prototype = instances.pop()
         for instance in instances:
-            assert np.allclose(prototype.x, instance.x), f"Streams are not consistent at instance {i}"
-            assert prototype.y_index == instance.y_index, f"Streams are not consistent at instance {i}"
+            assert np.allclose(
+                prototype.x, instance.x
+            ), f"Streams are not consistent at instance {i}"
+            assert (
+                prototype.y_index == instance.y_index
+            ), f"Streams are not consistent at instance {i}"