diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..c8468d3 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,52 @@ +Checks: '-*,clang-diagnostic-*,bugprone-*,performance-*,google-explicit-constructor,google-build-using-namespace,google-runtime-int,misc-definitions-in-headers,modernize-use-nullptr,modernize-use-override,-bugprone-macro-parentheses,readability-braces-around-statements,-bugprone-branch-clone,readability-identifier-naming,hicpp-exception-baseclass,misc-throw-by-value-catch-by-reference,-bugprone-signed-char-misuse,-bugprone-misplaced-widening-cast,-bugprone-sizeof-expression,-bugprone-easily-swappable-parameters,google-global-names-in-headers,llvm-header-guard,misc-definitions-in-headers,modernize-use-emplace,modernize-use-bool-literals,-performance-inefficient-string-concatenation,-performance-no-int-to-ptr,readability-container-size-empty,cppcoreguidelines-pro-type-cstyle-cast,-llvm-header-guard,-performance-enum-size,cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-slicing,cppcoreguidelines-rvalue-reference-param-not-moved,cppcoreguidelines-virtual-class-destructor,-readability-identifier-naming,-bugprone-exception-escape,-bugprone-unused-local-non-trivial-variable,-bugprone-empty-catch' +WarningsAsErrors: '*' +HeaderFilterRegex: 'src/include/duckdb/.*' +FormatStyle: none +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.EnumCase + value: CamelCase + - key: readability-identifier-naming.TypedefCase + value: lower_case + - key: readability-identifier-naming.TypedefSuffix + value: _t + - key: readability-identifier-naming.FunctionCase + value: CamelCase + - key: readability-identifier-naming.MemberCase + value: lower_case + - key: readability-identifier-naming.ParameterCase + value: lower_case + - key: readability-identifier-naming.ConstantCase + value: aNy_CasE + - key: readability-identifier-naming.ConstantParameterCase + value: lower_case + - key: readability-identifier-naming.NamespaceCase + value: lower_case + - key: readability-identifier-naming.MacroDefinitionCase + value: UPPER_CASE + - key: readability-identifier-naming.StaticConstantCase + value: UPPER_CASE + - key: readability-identifier-naming.ConstantMemberCase + value: aNy_CasE + - key: readability-identifier-naming.StaticVariableCase + value: UPPER_CASE + - key: readability-identifier-naming.ClassConstantCase + value: UPPER_CASE + - key: readability-identifier-naming.EnumConstantCase + value: UPPER_CASE + - key: readability-identifier-naming.ConstexprVariableCase + value: aNy_CasE + - key: readability-identifier-naming.StaticConstantCase + value: UPPER_CASE + - key: readability-identifier-naming.TemplateTemplateParameterCase + value: UPPER_CASE + - key: readability-identifier-naming.TypeTemplateParameterCase + value: UPPER_CASE + - key: readability-identifier-naming.VariableCase + value: lower_case + - key: modernize-use-emplace.SmartPointers + value: '::duckdb::shared_ptr;::duckdb::unique_ptr;::std::auto_ptr;::duckdb::weak_ptr' + - key: cppcoreguidelines-rvalue-reference-param-not-moved.IgnoreUnnamedParams + value: true + diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index b65d2f6..5e7ae1b 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,18 +14,20 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: duckdb_version: main extension_name: sqlite_scanner + ci_tools_version: main duckdb-stable-deploy: name: Deploy extension binaries needs: duckdb-stable-build - uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.1.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: duckdb_version: main extension_name: sqlite_scanner + ci_tools_version: main deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} deploy_versioned: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} diff --git a/.gitmodules b/.gitmodules index bb8e599..dd490ea 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,6 @@ path = duckdb url = https://github.com/duckdb/duckdb branch = main +[submodule "extension-ci-tools"] + path = extension-ci-tools + url = https://github.com/duckdb/extension-ci-tools.git diff --git a/Makefile b/Makefile index af9ae32..e85d93b 100644 --- a/Makefile +++ b/Makefile @@ -1,120 +1,25 @@ -.PHONY: all clean format debug release duckdb_debug duckdb_release pull update wasm_mvp wasm_eh wasm_threads - -all: release - -MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) -PROJ_DIR := $(dir $(MKFILE_PATH)) - -TEST_PATH="/test/unittest" -DUCKDB_PATH="/duckdb" - -DUCKDB_SRCDIR ?= "./duckdb/" - -# For non-MinGW windows the path is slightly different -ifeq ($(OS),Windows_NT) -ifneq ($(CXX),g++) - TEST_PATH="/test/Release/unittest.exe" - DUCKDB_PATH="/Release/duckdb.exe" -endif -endif - -#### OSX config -OSX_BUILD_FLAG= -ifneq (${OSX_BUILD_ARCH}, "") - OSX_BUILD_FLAG=-DOSX_BUILD_ARCH=${OSX_BUILD_ARCH} -endif - -#### VCPKG config -VCPKG_TOOLCHAIN_PATH?= -ifneq ("${VCPKG_TOOLCHAIN_PATH}", "") - TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DVCPKG_MANIFEST_DIR='${PROJ_DIR}' -DVCPKG_BUILD=1 -DCMAKE_TOOLCHAIN_FILE='${VCPKG_TOOLCHAIN_PATH}' -endif -ifneq ("${VCPKG_TARGET_TRIPLET}", "") - TOOLCHAIN_FLAGS:=${TOOLCHAIN_FLAGS} -DVCPKG_TARGET_TRIPLET='${VCPKG_TARGET_TRIPLET}' -endif - -#### Enable Ninja as generator -ifeq ($(GEN),ninja) - GENERATOR=-G "Ninja" -DFORCE_COLORED_OUTPUT=1 -endif +PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) +# Configuration of extension EXT_NAME=sqlite_scanner +EXT_CONFIG=${PROJ_DIR}extension_config.cmake -#### Configuration for this extension -EXTENSION_NAME=SQLITE_SCANNER -EXTENSION_FLAGS=\ --DDUCKDB_EXTENSION_NAMES="${EXT_NAME}" \ --DDUCKDB_EXTENSION_${EXTENSION_NAME}_PATH="$(PROJ_DIR)" \ --DDUCKDB_EXTENSION_${EXTENSION_NAME}_SHOULD_LINK=0 \ --DDUCKDB_EXTENSION_${EXTENSION_NAME}_LOAD_TESTS=1 \ --DDUCKDB_EXTENSION_${EXTENSION_NAME}_INCLUDE_PATH="$(PROJ_DIR)src/include" \ --DDUCKDB_EXTENSION_${EXTENSION_NAME}_TEST_PATH=$(PROJ_DIR)test \ - -BUILD_FLAGS=-DEXTENSION_STATIC_BUILD=1 -DBUILD_EXTENSIONS="tpch" ${OSX_BUILD_FLAG} -DDUCKDB_EXPLICIT_PLATFORM='${DUCKDB_PLATFORM}' - -ifeq ($(DUCKDB_PLATFORM_RTOOLS),1) - BUILD_FLAGS:=${BUILD_FLAGS} -DCMAKE_CXX_FLAGS="-DDUCKDB_PLATFORM_RTOOLS=1" -endif +# Include the Makefile from extension-ci-tools +include extension-ci-tools/makefiles/duckdb_extension.Makefile -CLIENT_FLAGS := -pull: - git submodule init - git submodule update --recursive --remote - -clean: - rm -rf build - cd duckdb && make clean - -# Main build -debug: - mkdir -p build/debug && \ - cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ${BUILD_FLAGS} -S ./duckdb/ -B build/debug && \ - cmake --build build/debug --config Debug - -release: - mkdir -p build/release && \ - cmake $(GENERATOR) $(FORCE_COLOR) $(EXTENSION_FLAGS) ${CLIENT_FLAGS} -DEXTENSION_STATIC_BUILD=1 -DCMAKE_BUILD_TYPE=Release ${BUILD_FLAGS} -S ./duckdb/ -B build/release && \ - cmake --build build/release --config Release +# Setup the sqlite3 tpch database data/db/tpch.db: release - command -v sqlite3 || (command -v brew && brew install sqlite) || (command -v choco && choco install sqlite -y) || (command -v apt-get && apt-get install -y sqlite3) || echo "no sqlite3" + command -v sqlite3 || (command -v brew && brew install sqlite) || (command -v choco && choco install sqlite -y) || (command -v apt-get && apt-get install -y sqlite3) || (command -v apk && apk add sqlite) || echo "no sqlite3" ./build/release/$(DUCKDB_PATH) < data/sql/tpch-export.duckdb || tree ./build/release || echo "neither tree not duck" sqlite3 data/db/tpch.db < data/sql/tpch-create.sqlite -# Main tests -test: test_release -test_release: release data/db/tpch.db +# Override the test target implementations from the duckdb_extension.Makefile +test_release_internal: data/db/tpch.db SQLITE_TPCH_GENERATED=1 ./build/release/$(TEST_PATH) "$(PROJ_DIR)test/*" -test_debug: debug data/db/tpch.db - SQLITE_TPCH_GENERATED=1 ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*" - -format: - cp duckdb/.clang-format . - find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i - cmake-format -i CMakeLists.txt - rm .clang-format -update: - git submodule update --remote --merge - -VCPKG_EMSDK_FLAGS=-DVCPKG_CHAINLOAD_TOOLCHAIN_FILE=$(EMSDK)/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake -WASM_COMPILE_TIME_COMMON_FLAGS=-DWASM_LOADABLE_EXTENSIONS=1 -DBUILD_EXTENSIONS_ONLY=1 -DSKIP_EXTENSIONS="parquet;json" $(VCPKG_EMSDK_FLAGS) -WASM_CXX_MVP_FLAGS= -WASM_CXX_EH_FLAGS=$(WASM_CXX_MVP_FLAGS) -fwasm-exceptions -DWEBDB_FAST_EXCEPTIONS=1 -WASM_CXX_THREADS_FLAGS=$(WASM_COMPILE_TIME_EH_FLAGS) -DWITH_WASM_THREADS=1 -DWITH_WASM_SIMD=1 -DWITH_WASM_BULK_MEMORY=1 -pthread -WASM_LINK_TIME_FLAGS=-O3 -sSIDE_MODULE=2 -sEXPORTED_FUNCTIONS="_${EXT_NAME}_version,_${EXT_NAME}_init" - -wasm_mvp: - mkdir -p build/wasm_mvp - emcmake cmake $(GENERATOR) $(EXTENSION_FLAGS) $(WASM_COMPILE_TIME_COMMON_FLAGS) -Bbuild/wasm_mvp -DCMAKE_CXX_FLAGS="$(WASM_CXX_MVP_FLAGS)" -S $(DUCKDB_SRCDIR) -DDUCKDB_EXPLICIT_PLATFORM=wasm_mvp -DDUCKDB_CUSTOM_PLATFORM=wasm_mvp - emmake make -j8 -Cbuild/wasm_mvp - -wasm_eh: - mkdir -p build/wasm_eh - emcmake cmake $(GENERATOR) $(EXTENSION_FLAGS) $(WASM_COMPILE_TIME_COMMON_FLAGS) -Bbuild/wasm_eh -DCMAKE_CXX_FLAGS="$(WASM_CXX_EH_FLAGS)" -S $(DUCKDB_SRCDIR) -DDUCKDB_EXPLICIT_PLATFORM=wasm_eh -DDUCKDB_CUSTOM_PLATFORM=wasm_eh - emmake make -j8 -Cbuild/wasm_eh +test_debug_internal: data/db/tpch.db + SQLITE_TPCH_GENERATED=1 ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*" -wasm_threads: - mkdir -p ./build/wasm_threads - emcmake cmake $(GENERATOR) $(EXTENSION_FLAGS) $(WASM_COMPILE_TIME_COMMON_FLAGS) -Bbuild/wasm_threads -DCMAKE_CXX_FLAGS="$(WASM_CXX_THREADS_FLAGS)" -S $(DUCKDB_SRCDIR) -DDUCKDB_EXPLICIT_PLATFORM=wasm_threads -DDUCKDB_CUSTOM_PLATFORM=wasm_threads - emmake make -j8 -Cbuild/wasm_threads +test_reldebug_internal: data/db/tpch.db + SQLITE_TPCH_GENERATED=1 ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*" diff --git a/duckdb b/duckdb index 367aa8d..d707b44 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 367aa8db1cc622c46661d762f9cafdd88263040e +Subproject commit d707b4432b74b51f8a176c533b98e24d48f4d165 diff --git a/extension-ci-tools b/extension-ci-tools new file mode 160000 index 0000000..4317e39 --- /dev/null +++ b/extension-ci-tools @@ -0,0 +1 @@ +Subproject commit 4317e39099f4b71d614f00d044aaec651bec6fc9 diff --git a/extension_config.cmake b/extension_config.cmake new file mode 100644 index 0000000..c0c251d --- /dev/null +++ b/extension_config.cmake @@ -0,0 +1,11 @@ +# This file is included by DuckDB's build system. It specifies which extension to load + +# Extension from this repo +duckdb_extension_load(sqlite_scanner + SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} + LOAD_TESTS +) + +# Any extra extensions that should be built +# e.g.: duckdb_extension_load(json) +duckdb_extension_load(tpch) \ No newline at end of file diff --git a/src/include/sqlite_scanner.hpp b/src/include/sqlite_scanner.hpp index c6230a5..f79006e 100644 --- a/src/include/sqlite_scanner.hpp +++ b/src/include/sqlite_scanner.hpp @@ -13,6 +13,7 @@ namespace duckdb { class SQLiteDB; +class TableCatalogEntry; struct SqliteBindData : public TableFunctionData { string file_name; @@ -26,6 +27,8 @@ struct SqliteBindData : public TableFunctionData { optional_idx rows_per_group = 122880; SQLiteDB *global_db; + + optional_ptr table; }; class SqliteScanFunction : public TableFunction { diff --git a/src/sqlite_db.cpp b/src/sqlite_db.cpp index 433ae67..8beb3ff 100644 --- a/src/sqlite_db.cpp +++ b/src/sqlite_db.cpp @@ -49,7 +49,8 @@ SQLiteDB SQLiteDB::Open(const string &path, const SQLiteOpenOptions &options, bo // default busy time-out of 5 seconds if (options.busy_timeout > 0) { if (options.busy_timeout > NumericLimits::Maximum()) { - throw std::runtime_error("busy_timeout out of range - must be within valid range for type int"); + throw std::runtime_error("busy_timeout out of range - must be within " + "valid range for type int"); } rc = sqlite3_busy_timeout(result.db, int(options.busy_timeout)); if (rc != SQLITE_OK) { diff --git a/src/sqlite_scanner.cpp b/src/sqlite_scanner.cpp index e5b50c3..109ba4a 100644 --- a/src/sqlite_scanner.cpp +++ b/src/sqlite_scanner.cpp @@ -274,9 +274,9 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu FlatVector::GetData(out_vec)[out_idx] = Date::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); } else { - throw NotImplementedException( - "Unimplemented SQLite type for column of type DATE\n* SET sqlite_all_varchar=true to " - "load all columns as VARCHAR and skip type conversions"); + throw NotImplementedException("Unimplemented SQLite type for column of type DATE\n* SET " + "sqlite_all_varchar=true to " + "load all columns as VARCHAR and skip type conversions"); } break; case LogicalTypeId::TIMESTAMP: @@ -284,9 +284,10 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu // See https://www.sqlite.org/lang_datefunc.html // The conventions are: // A text string that is an ISO 8601 date/time value - // The number of days including fractional days since -4713-11-24 12:00:00 - // The number of seconds including fractional seconds since 1970-01-01 00:00:00 - // for now we only support ISO-8601 and unix timestamps + // The number of days including fractional days since -4713-11-24 + // 12:00:00 The number of seconds including fractional seconds since + // 1970-01-01 00:00:00 for now we only support ISO-8601 and unix + // timestamps if (sqlite_column_type == SQLITE_INTEGER) { // unix timestamp FlatVector::GetData(out_vec)[out_idx] = ConvertTimestampInteger(val); @@ -297,9 +298,9 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu FlatVector::GetData(out_vec)[out_idx] = Timestamp::FromCString((const char *)sqlite3_value_text(val), sqlite3_value_bytes(val)); } else { - throw NotImplementedException( - "Unimplemented SQLite type for column of type TIMESTAMP\n* SET sqlite_all_varchar=true to " - "load all columns as VARCHAR and skip type conversions"); + throw NotImplementedException("Unimplemented SQLite type for column of type TIMESTAMP\n* SET " + "sqlite_all_varchar=true to " + "load all columns as VARCHAR and skip type conversions"); } break; case LogicalTypeId::BLOB: @@ -315,10 +316,20 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu } } -static string SqliteToString(const FunctionData *bind_data_p) { - D_ASSERT(bind_data_p); +static InsertionOrderPreservingMap SqliteToString(TableFunctionToStringInput &input) { + D_ASSERT(input.bind_data); + InsertionOrderPreservingMap result; + auto &bind_data = input.bind_data->Cast(); + result["Table"] = bind_data.table_name; + result["File"] = bind_data.file_name; + return result; +} + +BindInfo SqliteBindInfo(const optional_ptr bind_data_p) { + BindInfo info(ScanType::EXTERNAL); auto &bind_data = bind_data_p->Cast(); - return StringUtil::Format("%s:%s", bind_data.file_name, bind_data.table_name); + info.table = bind_data.table; + return info; } /* @@ -338,6 +349,7 @@ SqliteScanFunction::SqliteScanFunction() SqliteInitGlobalState, SqliteInitLocalState) { cardinality = SqliteCardinality; to_string = SqliteToString; + get_bind_info = SqliteBindInfo; projection_pushdown = true; } diff --git a/src/sqlite_stmt.cpp b/src/sqlite_stmt.cpp index 6d76171..06086b3 100644 --- a/src/sqlite_stmt.cpp +++ b/src/sqlite_stmt.cpp @@ -69,7 +69,8 @@ void SQLiteStatement::CheckTypeMatches(const SqliteBindData &bind_data, sqlite3_ auto message = "Invalid type in column \"" + column_name + "\": column was declared as " + SQLiteUtils::TypeToString(expected_type) + ", found \"" + value_as_text + "\" of type \"" + SQLiteUtils::TypeToString(sqlite_column_type) + "\" instead."; - message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR and skip type conversions"; + message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR " + "and skip type conversions"; throw Exception(ExceptionType::MISMATCH_TYPE, message); } } @@ -80,7 +81,8 @@ void SQLiteStatement::CheckTypeIsFloatOrInteger(sqlite3_value *val, int sqlite_c auto value_as_text = string((const char *)sqlite3_value_text(val)); auto message = "Invalid type in column \"" + column_name + "\": expected float or integer, found \"" + value_as_text + "\" of type \"" + SQLiteUtils::TypeToString(sqlite_column_type) + "\" instead."; - message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR and skip type conversions"; + message += "\n* SET sqlite_all_varchar=true to load all columns as VARCHAR " + "and skip type conversions"; throw Exception(ExceptionType::MISMATCH_TYPE, message); } } diff --git a/src/storage/sqlite_table_entry.cpp b/src/storage/sqlite_table_entry.cpp index 0a9f850..fd17c2d 100644 --- a/src/storage/sqlite_table_entry.cpp +++ b/src/storage/sqlite_table_entry.cpp @@ -44,6 +44,7 @@ TableFunction SQLiteTableEntry::GetScanFunction(ClientContext &context, unique_p result->global_db = &db; result->rows_per_group = optional_idx(); } + result->table = this; bind_data = std::move(result); return static_cast(SqliteScanFunction()); diff --git a/vcpkg.json b/vcpkg.json new file mode 100644 index 0000000..0b8fa67 --- /dev/null +++ b/vcpkg.json @@ -0,0 +1,3 @@ +{ + "dependencies": [] +} \ No newline at end of file