diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d750fc01..5def3d5d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -86,7 +86,7 @@ jobs: solution: 'build/*.sln' msbuildArchitecture: 'x64' msbuildArguments: '/p:Configuration=Release /m /nodeReuse:false' - displayName: 'Building XGBoost...' + displayName: 'Building Treelite...' - script: | call $(Agent.BuildDirectory)\CONDA\Scripts\activate cd python @@ -225,3 +225,47 @@ jobs: displayName: 'Submitting code coverage data to CodeCov...' env: CODECOV_TOKEN: afe9868c-2c27-4853-89fa-4bc5d3d2b255 + +- job: win_python_test + dependsOn: win_build + pool: + vmImage: 'vs2017-win2016' + steps: + - checkout: self + submodules: recursive + - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" + displayName: 'Add conda to PATH' + - script: | + call activate + conda install --yes --quiet numpy scipy scikit-learn pandas + displayName: 'Setting up Python environment...' + - task: DownloadPipelineArtifact@0 + inputs: + artifactName: 'python_win_whl' + targetPath: $(System.DefaultWorkingDirectory) + displayName: 'Downloading Treelite Python wheel for Windows...' + - powershell: | + Dir *.whl | Rename-Item -newname { $_.name -replace ".whl", ".zip" } + Expand-Archive *.zip -DestinationPath .\whl_content + New-Item .\lib -ItemType Directory -ea 0 + New-Item .\runtime\native\lib -ItemType Directory -ea 0 + New-Item .\build -ItemType Directory -ea 0 + Move-Item -Path .\whl_content\treelite-*.data\data\treelite\treelite.dll -Destination .\lib + Move-Item -Path .\whl_content\treelite-*.data\data\treelite\treelite_runtime.dll -Destination .\runtime\native\lib + Remove-Item .\whl_content -Force -Recurse + Set-Location -Path .\build + cmake .. -G"Visual Studio 15 2017 Win64" + displayName: 'Installing Treelite into Python environment...' + - script: | + call activate + python -m pip install wheel setuptools xgboost lightgbm pytest pytest-cov + python -m pytest -v --fulltrace tests\python --cov=./ + displayName: 'Running Python tests...' + env: + PYTHONPATH: .\python + - script: | + choco install codecov + codecov + displayName: 'Submitting code coverage data to CodeCov...' + env: + CODECOV_TOKEN: afe9868c-2c27-4853-89fa-4bc5d3d2b255 diff --git a/include/treelite/common.h b/include/treelite/common.h index 9b4019c1..23bea926 100644 --- a/include/treelite/common.h +++ b/include/treelite/common.h @@ -448,7 +448,9 @@ inline bool CompareWithOp(treelite::tl_float lhs, treelite::Operator op, case treelite::Operator::kLE: return lhs <= rhs; case treelite::Operator::kGT: return lhs > rhs; case treelite::Operator::kGE: return lhs >= rhs; - default: LOG(FATAL) << "operator undefined"; + default: + LOG(FATAL) << "operator undefined"; + return false; } } diff --git a/python/treelite/common/util.py b/python/treelite/common/util.py index 72aa87ca..9831b605 100644 --- a/python/treelite/common/util.py +++ b/python/treelite/common/util.py @@ -6,11 +6,10 @@ import ctypes import inspect import time -import shutil import os import sys import site -from .compat import py_str, PY3 +from .compat import py_str class TreeliteVersionNotFound(Exception): """Error thrown by when version file is not found""" @@ -50,22 +49,6 @@ def _load_ver(): class TreeliteError(Exception): """Error thrown by treelite""" -if PY3: - # pylint: disable=W0611 - from tempfile import TemporaryDirectory -else: - import tempfile - class TemporaryDirectory(): - """Context manager for tempfile.mkdtemp()""" - # pylint: disable=R0903 - - def __enter__(self): - self.name = tempfile.mkdtemp() # pylint: disable=W0201 - return self.name - - def __exit__(self, exc_type, exc_value, traceback): - shutil.rmtree(self.name) - def lineno(): """Returns line number""" return inspect.currentframe().f_back.f_lineno diff --git a/python/treelite/contrib/__init__.py b/python/treelite/contrib/__init__.py index e281f847..26b4978a 100644 --- a/python/treelite/contrib/__init__.py +++ b/python/treelite/contrib/__init__.py @@ -4,14 +4,39 @@ Contrib API provides ways to interact with third-party libraries and tools. """ +import sys import os import json import time import shutil +import ctypes from ..common.util import TreeliteError, lineno, log_info from ..libpath import find_lib_path from .util import _libext, _toolchain_exist_check +def expand_windows_path(dirpath): + """ + Expand a short path to full path (only applicable for Windows) + + Parameters + ---------- + dirpath : :py:class:`str ` + Path to expand + + Returns + ------- + fullpath : :py:class:`str ` + Expanded path + """ + if sys.platform == 'win32': + BUFFER_SIZE = 500 + buffer = ctypes.create_unicode_buffer(BUFFER_SIZE) + get_long_path_name = ctypes.windll.kernel32.GetLongPathNameW + get_long_path_name.argtypes = [ctypes.c_wchar_p, ctypes.c_wchar_p, ctypes.c_uint] + get_long_path_name(dirpath, buffer, BUFFER_SIZE) + return buffer.value + return dirpath + def generate_makefile(dirpath, platform, toolchain, options=None): """ Generate a Makefile for a given directory of headers and sources. The @@ -158,6 +183,7 @@ def create_shared(toolchain, dirpath, nthread=None, verbose=False, options=None) if nthread is not None and nthread <= 0: raise TreeliteError('nthread must be positive integer') + dirpath = expand_windows_path(dirpath) if not os.path.isdir(dirpath): raise TreeliteError('Directory {} does not exist'.format(dirpath)) try: diff --git a/python/treelite/contrib/msvc.py b/python/treelite/contrib/msvc.py index c5edcd5b..d15151d2 100644 --- a/python/treelite/contrib/msvc.py +++ b/python/treelite/contrib/msvc.py @@ -5,7 +5,9 @@ from __future__ import absolute_import as _abs import os -from ..common.compat import PY3 +import glob +import re +from distutils.version import StrictVersion from .util import _create_shared_base, _libext LIBEXT = _libext() @@ -14,25 +16,6 @@ def _is_64bit_windows(): return 'PROGRAMFILES(X86)' in os.environ def _varsall_bat_path(): - if PY3: - import winreg # pylint: disable=E0401 - else: - import _winreg as winreg # pylint: disable=E0401 - if _is_64bit_windows(): - key_name = 'SOFTWARE\\Wow6432Node\\Microsoft\\VisualStudio\\SxS\\VS7' - else: - key_name = 'SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VC7' - key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key_name) - i = 0 - vs_installs = [] # list of all Visual Studio installations - while True: - try: - version, location, _ = winreg.EnumValue(key, i) - vs_installs.append((version, location)) - except WindowsError: # pylint: disable=E0602 - break - i += 1 - # if a custom location is given, try that first if 'TREELITE_VCVARSALL' in os.environ: candidate = os.environ['TREELITE_VCVARSALL'] @@ -44,14 +27,52 @@ def _varsall_bat_path(): raise OSError('Environment variable TREELITE_VCVARSALL does not refer '+\ 'to existing vcvarsall.bat') - # scan all detected Visual Studio installations, with most recent first - for version, vcroot in sorted(vs_installs, key=lambda x: x[0], reverse=True): - if version == '15.0': # Visual Studio 2017 revamped directory structure - candidate = os.path.join(vcroot, 'VC\\Auxiliary\\Build\\vcvarsall.bat') + ## Bunch of heuristics to locate vcvarsall.bat + candidate_paths = [] # List of possible paths to vcvarsall.bat + try: + import winreg # pylint: disable=E0401 + if _is_64bit_windows(): + key_name = 'SOFTWARE\\Wow6432Node\\Microsoft\\VisualStudio\\SxS\\VS7' else: - candidate = os.path.join(vcroot, 'VC\\vcvarsall.bat') + key_name = 'SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VC7' + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key_name) + i = 0 + while True: + try: + version, vcroot, _ = winreg.EnumValue(key, i) + if StrictVersion(version) >= StrictVersion('15.0'): + # Visual Studio 2017 revamped directory structure + candidate_paths.append(os.path.join(vcroot, 'VC\\Auxiliary\\Build\\vcvarsall.bat')) + else: + candidate_paths.append(os.path.join(vcroot, 'VC\\vcvarsall.bat')) + except WindowsError: # pylint: disable=E0602 + break + i += 1 + except FileNotFoundError: + pass # No registry key found + except ImportError: + pass # No winreg module + + for candidate in candidate_paths: if os.path.isfile(candidate): return candidate + + # If registry method fails, try a bunch of pre-defined paths + + # Visual Studio 2017 and higher + for vcroot in glob.glob('C:\\Program Files (x86)\\Microsoft Visual Studio\\*') + \ + glob.glob('C:\\Program Files\\Microsoft Visual Studio\\*'): + if re.fullmatch(r'[0-9]+', os.path.basename(vcroot)): + for candidate in glob.glob(vcroot + '\\*\\VC\\Auxiliary\\Build\\vcvarsall.bat'): + if os.path.isfile(candidate): + return candidate + # Previous versions of Visual Studio + pattern = '\\Microsoft Visual Studio*\\VC\\vcvarsall.bat' + for candidate in glob.glob('C:\\Program Files (x86)' + pattern) + \ + glob.glob('C:\\Program Files' + pattern): + if os.path.isfile(candidate): + return candidate + raise OSError('vcvarsall.bat not found; please specify its full path in '+\ 'the environment variable TREELITE_VCVARSALL') diff --git a/python/treelite/frontend.py b/python/treelite/frontend.py index d20671a9..cb9a1870 100644 --- a/python/treelite/frontend.py +++ b/python/treelite/frontend.py @@ -5,8 +5,9 @@ import collections import shutil import os +from tempfile import TemporaryDirectory from .common.compat import STRING_TYPES -from .common.util import c_str, TreeliteError, TemporaryDirectory +from .common.util import c_str, TreeliteError from .core import _LIB, c_array, _check_call from .contrib import create_shared, generate_makefile, _toolchain_exist_check @@ -124,7 +125,7 @@ def export_lib(self, toolchain, libpath, params=None, compiler='ast_native', shutil.move('/temporary/directory/mymodel.dll', './mymodel.dll') """ _toolchain_exist_check(toolchain) - with TemporaryDirectory() as temp_dir: + with TemporaryDirectory(dir=os.path.dirname(libpath)) as temp_dir: self.compile(temp_dir, params, compiler, verbose) temp_libpath = create_shared(toolchain, temp_dir, nthread, verbose, options) diff --git a/runtime/native/CMakeLists.txt b/runtime/native/CMakeLists.txt index c4a2eb9f..5e90f028 100644 --- a/runtime/native/CMakeLists.txt +++ b/runtime/native/CMakeLists.txt @@ -96,5 +96,5 @@ else() LIBRARY DESTINATION lib) endif() -install(DIRECTORY include/treelite DESTINATION include +install(DIRECTORY include/treelite DESTINATION runtime/native/include FILES_MATCHING PATTERN "*.h") diff --git a/src/compiler/ast_native.cc b/src/compiler/ast_native.cc index 5045234a..d96b89d9 100644 --- a/src/compiler/ast_native.cc +++ b/src/compiler/ast_native.cc @@ -381,15 +381,32 @@ class ASTNativeCompiler : public Compiler { } array_th_len = formatter.str(); } - PrependToBuffer(dest, - fmt::format(native::qnode_template, - "array_threshold"_a = array_threshold, - "array_th_begin"_a = array_th_begin, - "array_th_len"_a = array_th_len, - "total_num_threshold"_a = total_num_threshold), 0); - AppendToBuffer(dest, - fmt::format(native::quantize_loop_template, - "num_feature"_a = num_feature_), indent); + if (!array_threshold.empty() && !array_th_begin.empty() && !array_th_len.empty()) { + PrependToBuffer(dest, + fmt::format(native::qnode_template, + "total_num_threshold"_a = total_num_threshold), 0); + AppendToBuffer(dest, + fmt::format(native::quantize_loop_template, + "num_feature"_a = num_feature_), indent); + } + if (!array_threshold.empty()) { + PrependToBuffer(dest, + fmt::format("static const double threshold[] = {{\n" + "{array_threshold}\n" + "}};\n", "array_threshold"_a = array_threshold), 0); + } + if (!array_th_begin.empty()) { + PrependToBuffer(dest, + fmt::format("static const int th_begin[] = {{\n" + "{array_th_begin}\n" + "}};\n", "array_th_begin"_a = array_th_begin), 0); + } + if (!array_th_len.empty()) { + PrependToBuffer(dest, + fmt::format("static const int th_len[] = {{\n" + "{array_th_len}\n" + "}};\n", "array_th_len"_a = array_th_len), 0); + } CHECK_EQ(node->children.size(), 1); WalkAST(node->children[0], dest, indent); } @@ -424,20 +441,42 @@ class ASTNativeCompiler : public Compiler { [this](const OutputNode* node) { return RenderOutputStatement(node); }, &array_nodes, &array_cat_bitmap, &array_cat_begin, &output_switch_statement, &common_comp_op); + if (!array_nodes.empty()) { + AppendToBuffer("header.h", + fmt::format("extern const struct Node {node_array_name}[];\n", + "node_array_name"_a = node_array_name), 0); + AppendToBuffer("arrays.c", + fmt::format("const struct Node {node_array_name}[] = {{\n" + "{array_nodes}\n" + "}};\n", + "node_array_name"_a = node_array_name, + "array_nodes"_a = array_nodes), 0); + } + + if (!array_cat_bitmap.empty()) { + AppendToBuffer("header.h", + fmt::format("extern const uint64_t {cat_bitmap_name}[];\n", + "cat_bitmap_name"_a = cat_bitmap_name), 0); + AppendToBuffer("arrays.c", + fmt::format("const uint64_t {cat_bitmap_name}[] = {{\n" + "{array_cat_bitmap}\n" + "}};\n", + "cat_bitmap_name"_a = cat_bitmap_name, + "array_cat_bitmap"_a = array_cat_bitmap), 0); + } + + if (!array_cat_begin.empty()) { + AppendToBuffer("header.h", + fmt::format("extern const size_t {cat_begin_name}[];\n", + "cat_begin_name"_a = cat_begin_name), 0); + AppendToBuffer("arrays.c", + fmt::format("const size_t {cat_begin_name}[] = {{\n" + "{array_cat_begin}\n" + "}};\n", + "cat_begin_name"_a = cat_begin_name, + "array_cat_begin"_a = array_cat_begin), 0); + } - AppendToBuffer("header.h", - fmt::format(native::code_folder_arrays_declaration_template, - "node_array_name"_a = node_array_name, - "cat_bitmap_name"_a = cat_bitmap_name, - "cat_begin_name"_a = cat_begin_name), 0); - AppendToBuffer("arrays.c", - fmt::format(native::code_folder_arrays_template, - "node_array_name"_a = node_array_name, - "array_nodes"_a = array_nodes, - "cat_bitmap_name"_a = cat_bitmap_name, - "array_cat_bitmap"_a = array_cat_bitmap, - "cat_begin_name"_a = cat_begin_name, - "array_cat_begin"_a = array_cat_begin), 0); if (array_nodes.empty()) { /* folded code consists of a single leaf node */ AppendToBuffer(dest, @@ -445,7 +484,7 @@ class ASTNativeCompiler : public Compiler { "{output_switch_statement}\n", "output_switch_statement"_a = output_switch_statement), indent); - } else { + } else if (!array_cat_bitmap.empty() && !array_cat_begin.empty()) { AppendToBuffer(dest, fmt::format(native::eval_loop_template, "node_array_name"_a = node_array_name, @@ -455,6 +494,14 @@ class ASTNativeCompiler : public Compiler { "comp_op"_a = OpName(common_comp_op), "output_switch_statement"_a = output_switch_statement), indent); + } else { + AppendToBuffer(dest, + fmt::format(native::eval_loop_template_without_categorical_feature, + "node_array_name"_a = node_array_name, + "data_field"_a = (param.quantize > 0 ? "qvalue" : "fvalue"), + "comp_op"_a = OpName(common_comp_op), + "output_switch_statement"_a + = output_switch_statement), indent); } } diff --git a/src/compiler/native/code_folder_template.h b/src/compiler/native/code_folder_template.h index 419b580e..3e7393a8 100644 --- a/src/compiler/native/code_folder_template.h +++ b/src/compiler/native/code_folder_template.h @@ -31,26 +31,20 @@ while (nid >= 0) {{ /* negative nid implies leaf */ {output_switch_statement} )TREELITETEMPLATE"; -const char* code_folder_arrays_template = +const char* eval_loop_template_without_categorical_feature = R"TREELITETEMPLATE( -const struct Node {node_array_name}[] = {{ -{array_nodes} -}}; - -const uint64_t {cat_bitmap_name}[] = {{ -{array_cat_bitmap} -}}; - -const size_t {cat_begin_name}[] = {{ -{array_cat_begin} -}}; -)TREELITETEMPLATE"; +nid = 0; +while (nid >= 0) {{ /* negative nid implies leaf */ + fid = {node_array_name}[nid].split_index; + if (data[fid].missing == -1) {{ + cond = {node_array_name}[nid].default_left; + }} else {{ + cond = (data[fid].{data_field} {comp_op} {node_array_name}[nid].threshold); + }} + nid = cond ? {node_array_name}[nid].left_child : {node_array_name}[nid].right_child; +}} -const char* code_folder_arrays_declaration_template = -R"TREELITETEMPLATE( -extern const struct Node {node_array_name}[]; -extern const uint64_t {cat_bitmap_name}[]; -extern const size_t {cat_begin_name}[]; +{output_switch_statement} )TREELITETEMPLATE"; } // namespace native diff --git a/src/compiler/native/qnode_template.h b/src/compiler/native/qnode_template.h index 0ea549e2..047d5ffc 100644 --- a/src/compiler/native/qnode_template.h +++ b/src/compiler/native/qnode_template.h @@ -14,16 +14,6 @@ namespace native { const char* qnode_template = R"TREELITETEMPLATE( -static const double threshold[] = {{ -{array_threshold} -}}; -static const int th_begin[] = {{ -{array_th_begin} -}}; -static const int th_len[] = {{ -{array_th_len} -}}; - #include /* diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index ae49d2dd..4eb4e3a6 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -72,13 +72,14 @@ def test_basic(self): model = treelite.Model.load(model_path, model_format='xgboost') make_annotation(model=model, dtrain_path='letor/mq2008.train', annotation_path='./annotation.json') - run_pipeline_test(model=model, dtest_path='letor/mq2008.test', - libname_fmt='./mq2008{}', - expected_prob_path=None, - expected_margin_path='letor/mq2008.test.pred', - multiclass=False, use_annotation='./annotation.json', - use_quantize=1, use_parallel_comp=700, - use_toolchains=['gcc']) + if os_platform() != 'windows': + run_pipeline_test(model=model, dtest_path='letor/mq2008.test', + libname_fmt='./mq2008{}', + expected_prob_path=None, + expected_margin_path='letor/mq2008.test.pred', + multiclass=False, use_annotation='./annotation.json', + use_quantize=1, use_parallel_comp=700, + use_toolchains=['msvc' if os_platform() == 'windows' else 'gcc']) run_pipeline_test(model=model, dtest_path='letor/mq2008.test', libname_fmt='./mq2008{}', expected_prob_path=None, @@ -86,6 +87,7 @@ def test_basic(self): multiclass=False, use_elf=is_linux, use_compiler='failsafe') + @pytest.mark.skipif(os_platform() == 'windows', reason='Make unavailable on Windows') def test_srcpkg(self): """Test feature to export a source tarball""" model_path = os.path.join(dpath, 'mushroom/mushroom.model') diff --git a/tests/python/test_lightgbm_integration.py b/tests/python/test_lightgbm_integration.py index 3c7c8f6d..38b2744f 100644 --- a/tests/python/test_lightgbm_integration.py +++ b/tests/python/test_lightgbm_integration.py @@ -10,7 +10,7 @@ from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from util import os_compatible_toolchains, libname, assert_almost_equal,\ - run_pipeline_test + run_pipeline_test, os_platform dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/')) @@ -121,6 +121,7 @@ def test_categorical_data(self): use_quantize=use_quantize, use_parallel_comp=use_parallel_comp) + @pytest.mark.skipif(os_platform() == 'windows', reason='MSVC cannot handle long if conditional') def test_sparse_categorical_model(self): """ LightGBM is able to produce categorical splits directly, so that diff --git a/tests/python/test_model_builder.py b/tests/python/test_model_builder.py index f39e8064..1d23960a 100644 --- a/tests/python/test_model_builder.py +++ b/tests/python/test_model_builder.py @@ -8,7 +8,7 @@ from sklearn.datasets import load_iris import treelite import treelite.runtime -from util import run_pipeline_test, make_annotation, \ +from util import run_pipeline_test, make_annotation, os_platform, \ libname, os_compatible_toolchains, assert_almost_equal dpath = os.path.abspath(os.path.join(os.getcwd(), 'tests/examples/')) @@ -1424,6 +1424,7 @@ def process_tree(sklearn_tree): batch = treelite.runtime.Batch.from_npy2d(X) out_prob = predictor.predict(batch) assert_almost_equal(out_prob, expected_prob) + del predictor # Test round-trip with Protobuf model.export_protobuf('./my.buffer') @@ -1435,6 +1436,7 @@ def process_tree(sklearn_tree): batch = treelite.runtime.Batch.from_npy2d(X) out_prob = predictor.predict(batch) assert_almost_equal(out_prob, expected_prob) + del predictor def test_node_insert_delete(self): """Test ability to add and remove nodes""" @@ -1460,7 +1462,8 @@ def test_node_insert_delete(self): model = builder.commit() libpath = libname('./libtest{}') - model.export_lib(toolchain='gcc', libpath=libpath, verbose=True) + toolchain = 'msvc' if os_platform() == 'windows' else 'gcc' + model.export_lib(toolchain=toolchain, libpath=libpath, verbose=True) predictor = treelite.runtime.Predictor(libpath=libpath) for f0 in [-0.5, 0.5, 1.5, np.nan]: for f1 in [0, 1, 2, 3, 4, np.nan]: