Skip to content

Commit

Permalink
[MAINT] Update Ctypes in line with scikit-learn: main (#141)
Browse files Browse the repository at this point in the history
* Update submodule and c-types in tree/

---------

Signed-off-by: Adam Li <adam2392@gmail.com>
  • Loading branch information
adam2392 authored Oct 12, 2023
1 parent eb946d4 commit 64b8044
Show file tree
Hide file tree
Showing 31 changed files with 921 additions and 927 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
# - [macos-12, macosx_*, arm64]
- [windows-2019, win, AMD64]

python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11.0-alpha - 3.11.0"]]
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]
# python[0] is used to specify the python versions made by cibuildwheel

env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
- name: Setup build and install scikit-tree
run: |
./spin build -j 2 --forcesubmodule
./spin build -j 2
- name: Ccache performance
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion .spin/cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def setup_submodule(forcesubmodule=False):
This will update the submodule, which then must be commited so that
git knows the submodule needs to be at a certain commit hash.
"""
commit_fpath = "./sktree/_lib/sklearn_fork/commit.txt"
commit_fpath = "./sktree/_lib/commit.txt"
submodule = "./sktree/_lib/sklearn_fork"
commit = ""
current_hash = ""
Expand Down
4 changes: 2 additions & 2 deletions build_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
meson
meson-python
cython>=3.0
cython==0.29.36
ninja
numpy
scikit-learn>=1.3
scikit-learn>=1.3.1
click
rich-click
doit
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ include = [
dependencies = [
'numpy',
'scipy>=1.5.0',
'scikit-learn>=1.3'
'scikit-learn>=1.3.1'
]


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpy>=1.25
scipy
scikit-learn>=1.3
scikit-learn>=1.3.1
1 change: 1 addition & 0 deletions sktree/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
)
from .ensemble._honest_forest import HonestForestClassifier
except ImportError as e:
print(e.msg)
msg = """Error importing scikit-tree: you cannot import scikit-tree while
being in scikit-tree source directory; please exit the scikit-tree source
tree first and relaunch your Python interpreter."""
Expand Down
34 changes: 33 additions & 1 deletion sktree/_lib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,36 @@ foreach py_source: python_sources
'./sklearn/ensemble/' + py_source,
subdir: 'sktree/_lib/sklearn/ensemble'
)
endforeach
endforeach

# TODO: Can remove if included in scikit-learn eventually
# install tree/ submodule
extensions = [
'_quad_tree',
]

foreach ext: extensions
py3.extension_module(ext,
cython_gen_cpp.process('./sklearn/neighbors/' + ext + '.pyx'),
c_args: cython_c_args,
include_directories: [incdir_numpy,],
install: true,
subdir: 'sktree/_lib/sklearn/neighbors/',
)
endforeach

# install tree/ submodule
extensions = [
'_typedefs',
'_random',
]

foreach ext: extensions
py3.extension_module(ext,
cython_gen_cpp.process('./sklearn/utils/' + ext + '.pyx'),
c_args: cython_c_args,
include_directories: [incdir_numpy,],
install: true,
subdir: 'sktree/_lib/sklearn/utils/',
)
endforeach
2 changes: 1 addition & 1 deletion sktree/_lib/sklearn_fork
Submodule sklearn_fork updated 106 files
11 changes: 4 additions & 7 deletions sktree/tree/_marginal.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@ import numpy as np

cimport numpy as cnp

from .._lib.sklearn.tree._tree cimport DOUBLE_t # Type of y, sample_weight
from .._lib.sklearn.tree._tree cimport DTYPE_t # Type of X
from .._lib.sklearn.tree._tree cimport INT32_t # Signed 32 bit integer
from .._lib.sklearn.tree._tree cimport SIZE_t # Type for indices and counters
from .._lib.sklearn.tree._tree cimport UINT32_t # Unsigned 32 bit integer
from .._lib.sklearn.tree._tree cimport BaseTree, Node
from .._lib.sklearn.tree._utils cimport UINT32_t
from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t


cpdef apply_marginal_tree(
BaseTree tree,
object X,
const SIZE_t[:] marginal_indices,
int traversal_method,
const intp_t[:] marginal_indices,
intp_t traversal_method,
unsigned char use_sample_weight,
object random_state
)
56 changes: 28 additions & 28 deletions sktree/tree/_marginal.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ from numpy import float32 as DTYPE

TREE_LEAF = -1
TREE_UNDEFINED = -2
cdef SIZE_t _TREE_LEAF = TREE_LEAF
cdef SIZE_t _TREE_UNDEFINED = TREE_UNDEFINED
cdef intp_t _TREE_LEAF = TREE_LEAF
cdef intp_t _TREE_UNDEFINED = TREE_UNDEFINED


cpdef apply_marginal_tree(
BaseTree tree,
object X,
const SIZE_t[:] marginal_indices,
int traversal_method,
const intp_t[:] marginal_indices,
intp_t traversal_method,
unsigned char use_sample_weight,
object random_state
):
Expand All @@ -41,7 +41,7 @@ cpdef apply_marginal_tree(
marginal_indices : ndarray of shape (n_marginals,)
The indices of the features to marginalize, which
are columns in ``X``.
traversal_method : int
traversal_method : intp_t
The traversal method to use. 0 for 'random', 1 for
'weighted'.
use_sample_weight : unsigned char
Expand All @@ -62,13 +62,13 @@ cpdef apply_marginal_tree(
if X.dtype != DTYPE:
raise ValueError("X.dtype should be np.float32, got %s" % X.dtype)

cdef SIZE_t n_marginals = marginal_indices.shape[0]
cdef intp_t n_marginals = marginal_indices.shape[0]

# sklearn_rand_r random number state
cdef UINT32_t rand_r_state = random_state.randint(0, RAND_R_MAX)

# define a set of all marginal indices
cdef unordered_set[SIZE_t] marginal_indices_map
cdef unordered_set[intp_t] marginal_indices_map

# check all marginal indices are valid, and also convert to an unordered map
for i in range(n_marginals):
Expand All @@ -94,19 +94,19 @@ cpdef apply_marginal_tree(
cdef void _resample_split_node(
BaseTree tree,
Node* node,
unordered_set[SIZE_t] marginal_indices_map,
const DTYPE_t[:, :] X,
const DOUBLE_t[:, ::1] y,
const DOUBLE_t[:] sample_weight,
unordered_set[intp_t] marginal_indices_map,
const float32_t[:, :] X,
const float64_t[:, ::1] y,
const float64_t[:] sample_weight,
) noexcept nogil:
pass


cdef inline cnp.ndarray _apply_dense_marginal(
BaseTree tree,
const DTYPE_t[:, :] X,
unordered_set[SIZE_t] marginal_indices_map,
int traversal_method,
const float32_t[:, :] X,
unordered_set[intp_t] marginal_indices_map,
intp_t traversal_method,
unsigned char use_sample_weight,
UINT32_t* rand_r_state
):
Expand All @@ -122,10 +122,10 @@ cdef inline cnp.ndarray _apply_dense_marginal(
The tree to apply.
X : const ndarray of shape (n_samples, n_features)
The data matrix.
marginal_indices_map : unordered_set[SIZE_t]
marginal_indices_map : unordered_set[intp_t]
The indices of the features to marginalize, which
are columns in ``X``.
traversal_method : int
traversal_method : intp_t
The traversal method to use. 0 for 'random', 1 for
'weighted'.
use_sample_weight : unsigned char
Expand All @@ -135,20 +135,20 @@ cdef inline cnp.ndarray _apply_dense_marginal(
The random number state.
"""
# Extract input
cdef const DTYPE_t[:, :] X_ndarray = X
cdef SIZE_t n_samples = X.shape[0]
cdef DTYPE_t X_i_node_feature
cdef const float32_t[:, :] X_ndarray = X
cdef intp_t n_samples = X.shape[0]
cdef float32_t X_i_node_feature

cdef DTYPE_t n_node_samples, n_right_samples, n_left_samples
cdef double p_left
cdef int is_left
cdef float32_t n_node_samples, n_right_samples, n_left_samples
cdef float64_t p_left
cdef intp_t is_left

# Initialize output
cdef SIZE_t[:] out = np.zeros(n_samples, dtype=np.intp)
cdef intp_t[:] out = np.zeros(n_samples, dtype=np.intp)

# Initialize auxiliary data-structure
cdef Node* node = NULL
cdef SIZE_t i = 0
cdef intp_t i = 0

with nogil:
for i in prange(n_samples):
Expand All @@ -172,7 +172,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
n_right_samples = tree.nodes[node.right_child].n_node_samples

# compute the probabilies for going left and right
p_left = (<double>n_left_samples / n_node_samples)
p_left = (<float64_t>n_left_samples / n_node_samples)

# randomly sample a direction
is_left = rand_weighted_binary(p_left, rand_r_state)
Expand Down Expand Up @@ -202,14 +202,14 @@ cdef inline cnp.ndarray _apply_dense_marginal(
else:
node = &tree.nodes[node.right_child]

out[i] = <SIZE_t>(node - tree.nodes) # node offset
out[i] = <intp_t>(node - tree.nodes) # node offset

return np.asarray(out)


cdef inline int is_element_present(unordered_set[SIZE_t]& my_set, SIZE_t element) noexcept nogil:
cdef inline intp_t is_element_present(unordered_set[intp_t]& my_set, intp_t element) noexcept nogil:
"""Helper function to check presence of element in set."""
cdef unordered_set[SIZE_t].iterator it = my_set.find(element)
cdef unordered_set[intp_t].iterator it = my_set.find(element)

if it != my_set.end():
return 1
Expand Down
Loading

0 comments on commit 64b8044

Please sign in to comment.