Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update duckdb, apply patches, move to extension ci tools #127

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Checks: '-*,clang-diagnostic-*,bugprone-*,performance-*,google-explicit-constructor,google-build-using-namespace,google-runtime-int,misc-definitions-in-headers,modernize-use-nullptr,modernize-use-override,-bugprone-macro-parentheses,readability-braces-around-statements,-bugprone-branch-clone,readability-identifier-naming,hicpp-exception-baseclass,misc-throw-by-value-catch-by-reference,-bugprone-signed-char-misuse,-bugprone-misplaced-widening-cast,-bugprone-sizeof-expression,-bugprone-easily-swappable-parameters,google-global-names-in-headers,llvm-header-guard,misc-definitions-in-headers,modernize-use-emplace,modernize-use-bool-literals,-performance-inefficient-string-concatenation,-performance-no-int-to-ptr,readability-container-size-empty,cppcoreguidelines-pro-type-cstyle-cast,-llvm-header-guard,-performance-enum-size,cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-slicing,cppcoreguidelines-rvalue-reference-param-not-moved,cppcoreguidelines-virtual-class-destructor,-readability-identifier-naming,-bugprone-exception-escape,-bugprone-unused-local-non-trivial-variable,-bugprone-empty-catch'
WarningsAsErrors: '*'
HeaderFilterRegex: 'src/include/duckdb/.*'
FormatStyle: none
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.TypedefCase
value: lower_case
- key: readability-identifier-naming.TypedefSuffix
value: _t
- key: readability-identifier-naming.FunctionCase
value: CamelCase
- key: readability-identifier-naming.MemberCase
value: lower_case
- key: readability-identifier-naming.ParameterCase
value: lower_case
- key: readability-identifier-naming.ConstantCase
value: aNy_CasE
- key: readability-identifier-naming.ConstantParameterCase
value: lower_case
- key: readability-identifier-naming.NamespaceCase
value: lower_case
- key: readability-identifier-naming.MacroDefinitionCase
value: UPPER_CASE
- key: readability-identifier-naming.StaticConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.ConstantMemberCase
value: aNy_CasE
- key: readability-identifier-naming.StaticVariableCase
value: UPPER_CASE
- key: readability-identifier-naming.ClassConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.EnumConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.ConstexprVariableCase
value: aNy_CasE
- key: readability-identifier-naming.StaticConstantCase
value: UPPER_CASE
- key: readability-identifier-naming.TemplateTemplateParameterCase
value: UPPER_CASE
- key: readability-identifier-naming.TypeTemplateParameterCase
value: UPPER_CASE
- key: readability-identifier-naming.VariableCase
value: lower_case
- key: modernize-use-emplace.SmartPointers
value: '::duckdb::shared_ptr;::duckdb::unique_ptr;::std::auto_ptr;::duckdb::weak_ptr'
- key: cppcoreguidelines-rvalue-reference-param-not-moved.IgnoreUnnamedParams
value: true

6 changes: 4 additions & 2 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,20 @@ concurrency:
jobs:
duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.0
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
duckdb_version: main
extension_name: sqlite_scanner
ci_tools_version: main

duckdb-stable-deploy:
name: Deploy extension binaries
needs: duckdb-stable-build
uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.1.0
uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main
secrets: inherit
with:
duckdb_version: main
extension_name: sqlite_scanner
ci_tools_version: main
deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
deploy_versioned: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
path = duckdb
url = https://github.com/duckdb/duckdb
branch = main
[submodule "extension-ci-tools"]
path = extension-ci-tools
url = https://github.com/duckdb/extension-ci-tools.git
121 changes: 13 additions & 108 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,120 +1,25 @@
.PHONY: all clean format debug release duckdb_debug duckdb_release pull update wasm_mvp wasm_eh wasm_threads

all: release

MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
PROJ_DIR := $(dir $(MKFILE_PATH))

TEST_PATH="/test/unittest"
DUCKDB_PATH="/duckdb"

DUCKDB_SRCDIR ?= "./duckdb/"

# For non-MinGW windows the path is slightly different
ifeq ($(OS),Windows_NT)
ifneq ($(CXX),g++)
TEST_PATH="/test/Release/unittest.exe"
DUCKDB_PATH="/Release/duckdb.exe"
endif
endif

#### OSX config
OSX_BUILD_FLAG=
ifneq (${OSX_BUILD_ARCH}, "")
OSX_BUILD_FLAG=-DOSX_BUILD_ARCH=${OSX_BUILD_ARCH}
endif

#### VCPKG config
VCPKG_TOOLCHAIN_PATH?=
ifneq ("${VCPKG_TOOLCHAIN_PATH}", "")
TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DVCPKG_MANIFEST_DIR='${PROJ_DIR}' -DVCPKG_BUILD=1 -DCMAKE_TOOLCHAIN_FILE='${VCPKG_TOOLCHAIN_PATH}'
endif
ifneq ("${VCPKG_TARGET_TRIPLET}", "")
TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DVCPKG_TARGET_TRIPLET='${VCPKG_TARGET_TRIPLET}'
endif

#### Enable Ninja as generator
ifeq ($(GEN),ninja)
GENERATOR=-G "Ninja" -DFORCE_COLORED_OUTPUT=1
endif
PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

# Configuration of extension
EXT_NAME=sqlite_scanner
EXT_CONFIG=${PROJ_DIR}extension_config.cmake

#### Configuration for this extension
EXTENSION_NAME=SQLITE_SCANNER
EXTENSION_FLAGS=\
-DDUCKDB_EXTENSION_NAMES="${EXT_NAME}" \
-DDUCKDB_EXTENSION_${EXTENSION_NAME}_PATH="$(PROJ_DIR)" \
-DDUCKDB_EXTENSION_${EXTENSION_NAME}_SHOULD_LINK=0 \
-DDUCKDB_EXTENSION_${EXTENSION_NAME}_LOAD_TESTS=1 \
-DDUCKDB_EXTENSION_${EXTENSION_NAME}_INCLUDE_PATH="$(PROJ_DIR)src/include" \
-DDUCKDB_EXTENSION_${EXTENSION_NAME}_TEST_PATH=$(PROJ_DIR)test \

BUILD_FLAGS=-DEXTENSION_STATIC_BUILD=1 -DBUILD_EXTENSIONS="tpch" ${OSX_BUILD_FLAG} -DDUCKDB_EXPLICIT_PLATFORM='${DUCKDB_PLATFORM}'

ifeq ($(DUCKDB_PLATFORM_RTOOLS),1)
BUILD_FLAGS:=${BUILD_FLAGS} -DCMAKE_CXX_FLAGS="-DDUCKDB_PLATFORM_RTOOLS=1"
endif
# Include the Makefile from extension-ci-tools
include extension-ci-tools/makefiles/duckdb_extension.Makefile

CLIENT_FLAGS :=
pull:
git submodule init
git submodule update --recursive --remote

clean:
rm -rf build
cd duckdb && make clean

# Main build
debug:
mkdir -p build/debug && \
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S ./duckdb/ -B build/debug && \
cmake --build build/debug --config Debug

release:
mkdir -p build/release && \
cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S ./duckdb/ -B build/release && \
cmake --build build/release --config Release

# Setup the sqlite3 tpch database
data/db/tpch.db: release
command -v sqlite3 || (command -v brew && brew install sqlite) || (command -v choco && choco install sqlite -y) || (command -v apt-get && apt-get install -y sqlite3) || echo "no sqlite3"
command -v sqlite3 || (command -v brew && brew install sqlite) || (command -v choco && choco install sqlite -y) || (command -v apt-get && apt-get install -y sqlite3) || (command -v apk && apk add sqlite) || echo "no sqlite3"
./build/release/$(DUCKDB_PATH) < data/sql/tpch-export.duckdb || tree ./build/release || echo "neither tree not duck"
sqlite3 data/db/tpch.db < data/sql/tpch-create.sqlite

# Main tests
test: test_release
test_release: release data/db/tpch.db
# Override the test target implementations from the duckdb_extension.Makefile
test_release_internal: data/db/tpch.db
SQLITE_TPCH_GENERATED=1 ./build/release/$(TEST_PATH) "$(PROJ_DIR)test/*"
test_debug: debug data/db/tpch.db
SQLITE_TPCH_GENERATED=1 ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*"

format:
cp duckdb/.clang-format .
find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i
cmake-format -i CMakeLists.txt
rm .clang-format

update:
git submodule update --remote --merge

VCPKG_EMSDK_FLAGS=-DVCPKG_CHAINLOAD_TOOLCHAIN_FILE=$(EMSDK)/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake
WASM_COMPILE_TIME_COMMON_FLAGS=-DWASM_LOADABLE_EXTENSIONS=1 -DBUILD_EXTENSIONS_ONLY=1 -DSKIP_EXTENSIONS="parquet;json" $(VCPKG_EMSDK_FLAGS)
WASM_CXX_MVP_FLAGS=
WASM_CXX_EH_FLAGS=$(WASM_CXX_MVP_FLAGS) -fwasm-exceptions -DWEBDB_FAST_EXCEPTIONS=1
WASM_CXX_THREADS_FLAGS=$(WASM_COMPILE_TIME_EH_FLAGS) -DWITH_WASM_THREADS=1 -DWITH_WASM_SIMD=1 -DWITH_WASM_BULK_MEMORY=1 -pthread
WASM_LINK_TIME_FLAGS=-O3 -sSIDE_MODULE=2 -sEXPORTED_FUNCTIONS="_${EXT_NAME}_version,_${EXT_NAME}_init"

wasm_mvp:
mkdir -p build/wasm_mvp
emcmake cmake $(GENERATOR) $(EXTENSION_FLAGS) $(WASM_COMPILE_TIME_COMMON_FLAGS) -Bbuild/wasm_mvp -DCMAKE_CXX_FLAGS="$(WASM_CXX_MVP_FLAGS)" -S $(DUCKDB_SRCDIR) -DDUCKDB_EXPLICIT_PLATFORM=wasm_mvp -DDUCKDB_CUSTOM_PLATFORM=wasm_mvp
emmake make -j8 -Cbuild/wasm_mvp

wasm_eh:
mkdir -p build/wasm_eh
emcmake cmake $(GENERATOR) $(EXTENSION_FLAGS) $(WASM_COMPILE_TIME_COMMON_FLAGS) -Bbuild/wasm_eh -DCMAKE_CXX_FLAGS="$(WASM_CXX_EH_FLAGS)" -S $(DUCKDB_SRCDIR) -DDUCKDB_EXPLICIT_PLATFORM=wasm_eh -DDUCKDB_CUSTOM_PLATFORM=wasm_eh
emmake make -j8 -Cbuild/wasm_eh
test_debug_internal: data/db/tpch.db
SQLITE_TPCH_GENERATED=1 ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*"

wasm_threads:
mkdir -p ./build/wasm_threads
emcmake cmake $(GENERATOR) $(EXTENSION_FLAGS) $(WASM_COMPILE_TIME_COMMON_FLAGS) -Bbuild/wasm_threads -DCMAKE_CXX_FLAGS="$(WASM_CXX_THREADS_FLAGS)" -S $(DUCKDB_SRCDIR) -DDUCKDB_EXPLICIT_PLATFORM=wasm_threads -DDUCKDB_CUSTOM_PLATFORM=wasm_threads
emmake make -j8 -Cbuild/wasm_threads
test_reldebug_internal: data/db/tpch.db
SQLITE_TPCH_GENERATED=1 ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*"
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 3705 files
1 change: 1 addition & 0 deletions extension-ci-tools
Submodule extension-ci-tools added at 4317e3
11 changes: 11 additions & 0 deletions extension_config.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# This file is included by DuckDB's build system. It specifies which extension to load

# Extension from this repo
duckdb_extension_load(sqlite_scanner
SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}
LOAD_TESTS
)

# Any extra extensions that should be built
# e.g.: duckdb_extension_load(json)
duckdb_extension_load(tpch)
3 changes: 3 additions & 0 deletions src/include/sqlite_scanner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

namespace duckdb {
class SQLiteDB;
class TableCatalogEntry;

struct SqliteBindData : public TableFunctionData {
string file_name;
Expand All @@ -26,6 +27,8 @@ struct SqliteBindData : public TableFunctionData {

optional_idx rows_per_group = 122880;
SQLiteDB *global_db;

optional_ptr<TableCatalogEntry> table;
};

class SqliteScanFunction : public TableFunction {
Expand Down
3 changes: 2 additions & 1 deletion src/sqlite_db.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ SQLiteDB SQLiteDB::Open(const string &path, const SQLiteOpenOptions &options, bo
// default busy time-out of 5 seconds
if (options.busy_timeout > 0) {
if (options.busy_timeout > NumericLimits<int>::Maximum()) {
throw std::runtime_error("busy_timeout out of range - must be within valid range for type int");
throw std::runtime_error("busy_timeout out of range - must be within "
"valid range for type int");
}
rc = sqlite3_busy_timeout(result.db, int(options.busy_timeout));
if (rc != SQLITE_OK) {
Expand Down
36 changes: 24 additions & 12 deletions src/sqlite_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,19 +274,20 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu
FlatVector::GetData<date_t>(out_vec)[out_idx] =
Date::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val));
} else {
throw NotImplementedException(
"Unimplemented SQLite type for column of type DATE\n* SET sqlite_all_varchar=true to "
"load all columns as VARCHAR and skip type conversions");
throw NotImplementedException("Unimplemented SQLite type for column of type DATE\n* SET "
"sqlite_all_varchar=true to "
"load all columns as VARCHAR and skip type conversions");
}
break;
case LogicalTypeId::TIMESTAMP:
// SQLite does not have a timestamp type - but it has "conventions"
// See https://www.sqlite.org/lang_datefunc.html
// The conventions are:
// A text string that is an ISO 8601 date/time value
// The number of days including fractional days since -4713-11-24 12:00:00
// The number of seconds including fractional seconds since 1970-01-01 00:00:00
// for now we only support ISO-8601 and unix timestamps
// The number of days including fractional days since -4713-11-24
// 12:00:00 The number of seconds including fractional seconds since
// 1970-01-01 00:00:00 for now we only support ISO-8601 and unix
// timestamps
if (sqlite_column_type == SQLITE_INTEGER) {
// unix timestamp
FlatVector::GetData<timestamp_t>(out_vec)[out_idx] = ConvertTimestampInteger(val);
Expand All @@ -297,9 +298,9 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu
FlatVector::GetData<timestamp_t>(out_vec)[out_idx] =
Timestamp::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val));
} else {
throw NotImplementedException(
"Unimplemented SQLite type for column of type TIMESTAMP\n* SET sqlite_all_varchar=true to "
"load all columns as VARCHAR and skip type conversions");
throw NotImplementedException("Unimplemented SQLite type for column of type TIMESTAMP\n* SET "
"sqlite_all_varchar=true to "
"load all columns as VARCHAR and skip type conversions");
}
break;
case LogicalTypeId::BLOB:
Expand All @@ -315,10 +316,20 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu
}
}

static string SqliteToString(const FunctionData *bind_data_p) {
D_ASSERT(bind_data_p);
static InsertionOrderPreservingMap<string> SqliteToString(TableFunctionToStringInput &input) {
D_ASSERT(input.bind_data);
InsertionOrderPreservingMap<string> result;
auto &bind_data = input.bind_data->Cast<SqliteBindData>();
result["Table"] = bind_data.table_name;
result["File"] = bind_data.file_name;
return result;
}

BindInfo SqliteBindInfo(const optional_ptr<FunctionData> bind_data_p) {
BindInfo info(ScanType::EXTERNAL);
auto &bind_data = bind_data_p->Cast<SqliteBindData>();
return StringUtil::Format("%s:%s", bind_data.file_name, bind_data.table_name);
info.table = bind_data.table;
return info;
}

/*
Expand All @@ -338,6 +349,7 @@ SqliteScanFunction::SqliteScanFunction()
SqliteInitGlobalState, SqliteInitLocalState) {
cardinality = SqliteCardinality;
to_string = SqliteToString;
get_bind_info = SqliteBindInfo;
projection_pushdown = true;
}

Expand Down
6 changes: 4 additions & 2 deletions src/sqlite_stmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ void SQLiteStatement::CheckTypeMatches(const SqliteBindData &bind_data, sqlite3_
auto message = "Invalid type in column \"" + column_name + "\": column was declared as " +
SQLiteUtils::TypeToString(expected_type) + ", found \"" + value_as_text + "\" of type \"" +
SQLiteUtils::TypeToString(sqlite_column_type) + "\" instead.";
message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR and skip type conversions";
message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR "
"and skip type conversions";
throw Exception(ExceptionType::MISMATCH_TYPE, message);
}
}
Expand All @@ -80,7 +81,8 @@ void SQLiteStatement::CheckTypeIsFloatOrInteger(sqlite3_value *val, int sqlite_c
auto value_as_text = string((const char *)sqlite3_value_text(val));
auto message = "Invalid type in column \"" + column_name + "\": expected float or integer, found \"" +
value_as_text + "\" of type \"" + SQLiteUtils::TypeToString(sqlite_column_type) + "\" instead.";
message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR and skip type conversions";
message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR "
"and skip type conversions";
throw Exception(ExceptionType::MISMATCH_TYPE, message);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/storage/sqlite_table_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ TableFunction SQLiteTableEntry::GetScanFunction(ClientContext &context, unique_p
result->global_db = &db;
result->rows_per_group = optional_idx();
}
result->table = this;

bind_data = std::move(result);
return static_cast<TableFunction>(SqliteScanFunction());
Expand Down
3 changes: 3 additions & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"dependencies": []
}
Loading