diff --git a/.circleci/config.yml b/.circleci/config.yml index b393f995b..15b97f042 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,7 +29,7 @@ jobs: - deps- - run: name: resolve deps - command: clojure -P + command: clojure -A:dev:test:build -P - save_cache: key: deps-{{ checksum "deps.edn" }} paths: @@ -59,19 +59,64 @@ jobs: at: /home/circleci - run: name: clean - command: clojure -Sthreads 1 -T:build clean + command: clojure -T:build clean - run: name: compile - command: clojure -Sthreads 1 -T:build compile + command: clojure -T:build compile - run: name: jar - command: clojure -Sthreads 1 -T:build jar + command: clojure -T:build jar - persist_to_workspace: root: /home/circleci/ paths: - .m2 - .npm - replikativ + native-image: + machine: + image: ubuntu-2004:202010-01 + resource_class: large + steps: + - attach_workspace: + at: /home/circleci + - run: + name: install graalvm + command: | + cd /home/circleci + wget https://github.com/graalvm/graalvm-ce-builds/releases/download/vm-22.2.0/graalvm-ce-java17-linux-amd64-22.2.0.tar.gz + tar -xzf graalvm-ce-java17-linux-amd64-22.2.0.tar.gz + export PATH=/home/circleci/graalvm-ce-java17-22.2.0/bin:$PATH + export JAVA_HOME=/home/circleci/graalvm-ce-java17-22.2.0/ + gu install native-image + - run: + name: install clojure + command: | + cd /home/circleci + curl -O https://download.clojure.org/install/linux-install-1.11.1.1165.sh + chmod +x linux-install-1.11.1.1165.sh + ./linux-install-1.11.1.1165.sh --prefix /home/circleci/clojure + - run: + name: build native-image + command: | + cd /home/circleci/replikativ + export PATH=/home/circleci/graalvm-ce-java17-22.2.0/bin:/home/circleci/clojure/bin:$PATH + export JAVA_HOME=/home/circleci/graalvm-ce-java17-22.2.0/ + ./bin/build-native-image + - persist_to_workspace: + root: /home/circleci/ + paths: + - replikativ/dhi + native-image-test: + executor: tools/clojurecli + steps: + - attach_workspace: + at: /home/circleci + - run: + name: Run native-image test + command: | + cd /home/circleci/replikativ + ./bin/run-native-image-tests + no_output_timeout: 5m backward-compatibility-test: executor: tools/clojurecli steps: @@ -96,7 +141,7 @@ jobs: at: /home/circleci - run: name: deploy - command: clojure -Sthreads 1 -T:build deploy + command: clojure -T:build deploy workflows: build-test-and-deploy: @@ -108,6 +153,10 @@ workflows: context: dockerhub-deploy requires: - setup + - native-image: + context: dockerhub-deploy + requires: + - setup - tools/format: context: dockerhub-deploy requires: @@ -116,14 +165,22 @@ workflows: context: dockerhub-deploy requires: - build + - native-image + - native-image-test: + context: dockerhub-deploy + requires: + - build + - native-image - backward-compatibility-test: context: dockerhub-deploy requires: - build + - native-image - tools/integrationtest: context: dockerhub-deploy requires: - build + - native-image - deploy: context: - dockerhub-deploy @@ -136,6 +193,7 @@ workflows: - tools/format - tools/unittest - backward-compatibility-test + - native-image-test - tools/integrationtest - tools/release: context: @@ -145,4 +203,4 @@ workflows: branches: only: main requires: - - deploy \ No newline at end of file + - deploy diff --git a/.dir-locals.el b/.dir-locals.el index bb4dbce3a..54ea570e7 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -1,2 +1,2 @@ ((clojure-mode . ((cider-preferred-build-tool . clojure-cli) - (cider-clojure-cli-aliases . ":dev:test")))) + (cider-clojure-cli-aliases . ":dev:test:libdatahike")))) diff --git a/.gitignore b/.gitignore index 94edcf2c1..1861e6d68 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,7 @@ out/ /.settings .classpath /.clj-kondo +/datahike +datahike.build_artifacts.txt tests.user.edn +/libdatahike/target \ No newline at end of file diff --git a/bin/build-native-image b/bin/build-native-image new file mode 100755 index 000000000..df3c57f84 --- /dev/null +++ b/bin/build-native-image @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +if ! $(builtin type -P native-image &> /dev/null); then + echo "PATH does not contain native-image. Make sure to add your GraalVM to it." + exit 1 +else + clojure -T:build compile + clojure -M:native-cli --no-fallback "-J-Xmx4g" +fi diff --git a/bin/run-native-image-tests b/bin/run-native-image-tests new file mode 100755 index 000000000..f6d74c267 --- /dev/null +++ b/bin/run-native-image-tests @@ -0,0 +1,41 @@ +#!/bin/bash + +TMPSTORE=/tmp/dh-test-store + +trap "rm -rf $TMPSTORE" EXIT + +./dhi delete-database bin/testconfig.edn +./dhi create-database bin/testconfig.edn + +./dhi database-exists bin/testconfig.edn + +./dhi benchmark conn:bin/testconfig.edn 0 100000 10000 +./dhi transact conn:bin/testconfig.edn '[[:db/add -1 :name "Judea"]]' +QUERY_OUT=`./dhi query '[:find (count ?e) . :where [?e :name _]]' db:bin/testconfig.edn` + +if [ $QUERY_OUT -eq 100001 ] +then + echo "Test successful." +else + echo "Exception: Query did not return correct value." +fi + +# test history input parsing +./dhi query '[:find (count ?e) . :where [?e :name _]]' history:bin/testconfig.edn +./dhi query '[:find (count ?e) . :where [?e :name _]]' since:0:bin/testconfig.edn +./dhi query '[:find (count ?e) . :where [?e :name _]]' asof:0:bin/testconfig.edn + +# other calls +./dhi pull db:bin/testconfig.edn "[:db/id, :name]" "1" +./dhi pull-many db:bin/testconfig.edn "[:db/id, :name]" "[1]" +./dhi entity db:bin/testconfig.edn "1" +./dhi datoms db:bin/testconfig.edn "{:index :eavt :components [1]}" +./dhi schema db:bin/testconfig.edn +./dhi reverse-schema db:bin/testconfig.edn +./dhi metrics db:bin/testconfig.edn + +# test serialization +./dhi query '[:find ?e . :where [?e :name ?n]]' db:myconfig.edn --format cbor >> /tmp/test +./dhi query '[:find ?i :in $ ?i . :where [?e :name ?n]]' db:myconfig.edn cbor:/tmp/test # => 1 + +./dhi delete-database bin/testconfig.edn diff --git a/bin/testconfig.edn b/bin/testconfig.edn new file mode 100644 index 000000000..b316134b9 --- /dev/null +++ b/bin/testconfig.edn @@ -0,0 +1,4 @@ +{:store {:backend :file + :path "/tmp/dh-test-store" } + :keep-history? true + :schema-flexibility :read} diff --git a/build.clj b/build.clj index 0b07b1d57..b764044b8 100644 --- a/build.clj +++ b/build.clj @@ -11,6 +11,8 @@ (def class-dir "target/classes") (def basis (b/create-basis {:project "deps.edn"})) (def jar-file (format "target/%s-%s.jar" (name lib) version)) +(def native-shared-library-file (format "target/%s-%s-native-shared-library.jar" + (name lib) version)) (defn clean [_] @@ -21,7 +23,8 @@ (b/javac {:src-dirs ["java"] :class-dir class-dir :basis basis - :javac-opts ["-source" "8" "-target" "8"]})) + :javac-opts ["-source" "8" "-target" "8"]}) + (spit "resources/DATAHIKE_VERSION" version)) (defn jar [_] @@ -37,6 +40,25 @@ (b/jar {:class-dir class-dir :jar-file jar-file})) +(defn native-shared-library [_] + (clean nil) + (b/delete {:path "libdatahike/target"}) + (compile nil) + (b/copy-dir {:src-dirs ["src" "resources" "libdatahike/src"] + :target-dir class-dir}) + (let [basis (b/create-basis {:project "deps.edn" + :aliases [:libdatahike]})] + (b/compile-clj {:basis basis + :src-dirs ["src"] + :class-dir class-dir}) + (b/compile-clj {:basis basis + :src-dirs ["libdatahike/src"] + :class-dir class-dir}) + (b/uber {:class-dir class-dir + :uber-file native-shared-library-file + :basis basis + :main 'datahike.cli}))) + (defn deploy "Don't forget to set CLOJARS_USERNAME and CLOJARS_PASSWORD env vars." [_] diff --git a/deps.edn b/deps.edn index 37178f158..80c974c0e 100644 --- a/deps.edn +++ b/deps.edn @@ -73,12 +73,24 @@ :ffix {:extra-deps {cljfmt/cljfmt {:mvn/version "0.8.0"}} :main-opts ["-m" "cljfmt.main" "fix"]} - :build {:deps {io.github.clojure/tools.build {:git/tag "v0.8.1" :git/sha "7d40500"} slipset/deps-deploy {:mvn/version "0.2.0"} borkdude/gh-release-artifact {:git/url "https://github.com/borkdude/gh-release-artifact" :sha "a83ee8da47d56a80b6380cbb6b4b9274048067bd"} babashka/babashka.curl {:mvn/version "0.1.2"} babashka/fs {:mvn/version "0.1.4"} - cheshire/cheshire {:mvn/version "5.10.2"}} - :ns-default build}}} + cheshire/cheshire {:mvn/version "5.11.0"}} + :ns-default build} + :libdatahike {:main-opts ["-e" "(set! *warn-on-reflection* true)"] + :extra-paths ["libdatahike/src"] + :extra-deps {cheshire/cheshire {:mvn/version "5.11.0"}}} + :native-cli {:main-opts ["-e" "(set! *warn-on-reflection* true)" + "-m" "clj.native-image" "datahike.cli" + "--initialize-at-build-time" + "-H:Name=dhi"] + :jvm-opts ["-Dclojure.compiler.direct-linking=true"] + :extra-deps + {cheshire/cheshire {:mvn/version "5.11.0"} + clj.native-image/clj.native-image + {:git/url "https://github.com/taylorwood/clj.native-image.git" + :sha "7708e7fd4572459c81f6a6b8e44c96f41cdd92d4"}}}}} diff --git a/doc/cli.md b/doc/cli.md new file mode 100644 index 000000000..fcf301c16 --- /dev/null +++ b/doc/cli.md @@ -0,0 +1,128 @@ +# Command line interface + +*This is work in progress and subject to change.* + +We provide the `datahike` native executable to access Datahike databases from +the command line. + + +# Example usage + +First you need to download the precompiled binary, or build it yourself, and put +it on your executable path. + +To access a database you need to provide the usual configuration for Datahike. +Put this into a file `myconfig.edn`. + +```clojure +{:store {:backend :file + :path "/home/USERNAME/dh-shared-db" + :config {:in-place? true}} + :keep-history? true + :schema-flexibility :read} +``` + +Now you can invoke some of our core API functions on the database. Let us add a +fact to the database (be careful to use single ' if you do not want your shell +to substitute parts of your Datalog ;) ): + +```bash +$ datahike transact db:myconfig.edn '[[:db/add -1 :name "Linus"]]' + ``` + +And retrieve it: + +```bash +$ datahike query '[:find ?n . :where [?e :name ?n]]' db:myconfig.edn +"Linus" # prints the name +``` + +By prefixing the path with `db:` to the query engine you can pass multiple db +configuration files and join over arbitrary many databases. Everything else is +read in as `edn` and passed to the query engine as well. + + +Provided the filestore is configured with `{:in-place? true}` you can even write +to the same database without a dedicated daemon from different shells, + + +```bash +$ datahike benchmark db:myconfig.edn 0 50000 100 +"Elapsed time: 116335.589411 msecs" +``` + +Here we use a provided benchmark helper which transacts facts of the form `[eid +:name (random-team-member)]` for `eid=0,...,50000` into the store. `100` denotes +the batch size for each transaction, so here we chunk the 50000 facts into 500 +transactions. + +In a second shell you can now simultaneously add facts in a different range + +```bash +$ datahike benchmark db:myconfig.edn 50000 100000 100 +``` + + +To check that everything has been added and no write operations have overwritten +each other. + + +```bash +$ datahike query '[:find (count ?e) . :in $ :where [?e :name ?n]]' db:myconfig.edn +100000 # check :) +``` + +# Memory model + +The persistent semantics of Datahike work more like `git` and less like similar +mutable databases such as SQLite or Datalevin. In particular you can always read +and retain snapshots (copies) of the database for free, no matter what else is +happening in the system. The current version is tested with memory and file +storage, but hopefully many other backends will also work with the +`native-image`. + +In principle this shared memory access should even work while having a JVM +server, e.g. datahike-server, serving the same database. Note that all reads can +happen in parallel, only the writers experience congestion around exclusive file +locks here. This access pattern does not provide highest throughput, but is +extremely flexible and easy to start with. + +## Forking and pulling + +Forking is easy, it is enough to copy the folder of the store (even if the +database is currently being written to). The only thing you need to take care of +is to copy the DB root first and place it into the target directory last, it is +the file `0594e3b6-9635-5c99-8142-412accf3023b.ksv`. Then you can use e.g. +`rsync` (or `git`) to copy all other (immutable) files into your new folder. In +the end you copy the root file in there as well, making sure that all files it +is referencing are reachable. Note that this will ensure that you only copy new +data each time. + +## Merging + +Now here comes the cool part. You do not need anything more for merging than +Datalog itself. You can use a query like this to extract all new facts that are +in `db1` but not in `db2` like this: + +```bash +datahike query '[:find ?e ?a ?v ?t :in $ $2 :where [$ ?e ?a ?v ?t] (not [$2 ?e ?a ?v ?t])]' db:config1.edn db:config2.edn +``` + +Since we cannot update transaction metadata, we should filter out +`:db/txInstant`s. We can also use a trick to add `:db/add` to each element in +the results, yielding valid transactions that we can then feed into `db2`. + + +```bash +datahike query '[:find ?db-add ?e ?a ?v ?t :in $ $2 ?db-add :where [$ ?e ?a ?v ?t] [(not= :db/txInstant ?a)] (not [$2 ?e ?a ?v ?t])]' db:config1.edn db:config2.edn ":db/add" | transact db:config2.edn +``` + +Note that this very simple strategy assumes that the entity ids that have been +added to `db1` do not overlap with potentially new ones added to `db2`. You can +encode conflict resolution strategies and id mappings with Datalog as well and +we are exploring several such strategies at the moment. This strategy is fairly +universal, as [CRDTs can be expressed in pure +Datalog](https://speakerdeck.com/ept/data-structures-as-queries-expressing-crdts-using-datalog). +While it is not the most efficient way to merge, we plan to provide fast paths +for common patterns in Datalog. Feel free to contact us if you are interested in +complex merging strategies or have related cool ideas. diff --git a/java/src/datahike/java/Datahike.java b/java/src/datahike/java/Datahike.java index b21fa1df6..dd1283819 100644 --- a/java/src/datahike/java/Datahike.java +++ b/java/src/datahike/java/Datahike.java @@ -3,15 +3,18 @@ import clojure.java.api.Clojure; import clojure.lang.IFn; import clojure.lang.Keyword; -import clojure.lang.PersistentVector; +import clojure.lang.APersistentVector; +import clojure.lang.APersistentMap; import clojure.lang.RT; import java.util.*; -import static datahike.java.Util.deref; +import static datahike.java.Util.derefFn; /** * Defines methods for interacting with a Datahike database. + * + * The documentation for the configuration can be constructed either with {@code datahike.java.Util.ednFromString} or piecewise with the {@code map} and {@code kwd} (keyword) constructors in the Util class, e.g. as {@code map(kwd(":store"), map(kwd(":backend"), kwd(":mem")), kwd(":name"), "test-empty-db-java")} */ public class Datahike { /** @@ -34,6 +37,7 @@ public class Datahike { private static final IFn pullFn = Clojure.var("datahike.api", "pull"); private static final IFn releaseFn = Clojure.var("datahike.api", "release"); private static final IFn pullManyFn = Clojure.var("datahike.api", "pull-many"); + private static final IFn datomsFn = Clojure.var("datahike.api", "datoms"); private static final IFn seekDatomsFn = Clojure.var("datahike.api", "seek-datoms"); private static final IFn tempIdFn = Clojure.var("datahike.api", "tempid"); private static final IFn entityFn = Clojure.var("datahike.api", "entity"); @@ -43,6 +47,9 @@ public class Datahike { private static final IFn withFn = Clojure.var("datahike.api", "with"); private static final IFn dbWithFn = Clojure.var("datahike.api", "db-with"); private static final IFn databaseExistsFn = Clojure.var("datahike.api", "database-exists?"); + private static final IFn schemaFn = Clojure.var("datahike.api", "schema"); + private static final IFn reverseSchemaFn = Clojure.var("datahike.api", "reverse-schema"); + private static final IFn metricsFn = Clojure.var("datahike.api", "metrics"); /** * Forbids instances creation. @@ -55,81 +62,47 @@ private Datahike() {} * @param conn a connection to the database. * @return a dereferenced version of the database. */ - public static Object dConn(Object conn) { - return deref.invoke(conn); + public static Object deref(Object conn) { + return derefFn.invoke(conn); }; /** - * Returns true if a database exists for the given map configuration or the given uri. + * Returns true if a database exists. * - * @param uri_or_map an uri or a configuration map. + * @param config a database configuration map. * @return true if database exists */ - public static boolean databaseExists(Object uri_or_map) { - return (boolean) databaseExistsFn.invoke(uri_or_map); + public static boolean databaseExists(APersistentMap config) { + return (boolean) databaseExistsFn.invoke(config); }; /** * Deletes a database. * - * @param uri the database uri. + * @param config a database configuration map. */ - public static void deleteDatabase(String uri) { - deleteDatabaseFn.invoke(uri); + public static void deleteDatabase(APersistentMap config) { + deleteDatabaseFn.invoke(config); } /** - * Creates a database at the given 'uri'. + * Creates a database. * - * @param uri the database uri. + * @param config a database config. */ - public static void createDatabase(String uri) { - createDatabaseFn.invoke(uri); + public static void createDatabase(APersistentMap config) { + createDatabaseFn.invoke(config); } /** - * Creates a database at the given 'uri' and parameterizes its creation through argument 'args'. + * Creates connection to the Datahike database. + * To access the current database value use {@link #deref(Object) deref}. * - * @param uri - * the location of the database creation - * - * @param args - * If providing initial data to transact pass the keyword 'k(":initial-tx")' as the first argument followed by the data. Data could for instance be a schema as shown in the following example: - * Datahike.createDatabase(uri, k(":initial-tx"), schema); - * - * Use keyword 'k(":schema-on-read")' to define whether the database uses a schema or not. By default, the database requires a schema. Example: - * Datahike.createDatabase(uri, k(":schema-on-read"), false); - * - * With keyword k(':temporal-index') at true you can query the data from any point in time. To not use this feature, write: - * Datahike.createDatabase(uri, k(":temporal-index"), false); - * - */ - public static void createDatabase(String uri, Object... args) { - List argsCopy = new ArrayList(Arrays.asList(args)); - argsCopy.add(0, uri); - createDatabaseFn.applyTo(RT.seq(argsCopy)); - } - - /** - * Connects to the Datahike database given by 'uri'. - * - * Example: {@code conn = Datahike.connect("datahike:file:///Users/paul/temp/datahike-db");} - * - * @param uri the database uri + * @param config the database configuration * @return a connection to the database. */ - public static Object connect(String uri) { - return connectFn.invoke(uri); - } - - /** - * Returns the current state of the database. - * - * @param conn the connection to the database - * @return the latest state of the database - */ - public static Object db(Object conn) { - return dbFn.invoke(conn); + public static Object connect(APersistentMap config) { + return connectFn.invoke(config); } /** @@ -139,10 +112,10 @@ public static Object db(Object conn) { * @param inputs the arguments to the query, such as the database(s) and/or additional inputs. * @return the result of the query */ - public static Set q(String query, Object... inputs) { + public static Object q(String query, Object... inputs) { List argsCopy = new ArrayList(Arrays.asList(inputs)); argsCopy.add(0, Clojure.read(query)); - return (Set)qFn.applyTo(RT.seq(argsCopy)); + return qFn.applyTo(RT.seq(argsCopy)); } /** @@ -152,77 +125,64 @@ public static Set q(String query, Object... inputs) { * @param txData the data to be transacted. * @return a transaction report. */ - public static Object transact(Object conn, PersistentVector txData) { - return transactFn.invoke(conn, txData); + public static Map transact(Object conn, Iterable txData) { + return (Map)transactFn.invoke(conn, txData); } /** * Returns the full historical state of a database. * - * @param dConn a database + * @param db a database * @return the full history of the database */ - public static Object history(Object dConn) { return historyFn.invoke(dConn); }; + public static Object history(Object db) { return historyFn.invoke(db); }; /** * Returns the database state at the given date. * - * @param dConn the database + * @param db the database * @param date either java.util.Date or Epoch Time as long * @return the database state at the given date */ - public static Object asOf(Object dConn, Date date) { - return asOfFn.invoke(dConn, date); + public static Object asOf(Object db, Date date) { + return asOfFn.invoke(db, date); } /** * Returns the state of the database since the given date. * Be aware: the database contains only the datoms that were added since the date. * - * @param dConn the database + * @param db the database * @param date either java.util.Date or Epoch Time as long * @return the state of the database since the given date. */ - public static Object since(Object dConn, Date date) { - return sinceFn.invoke(dConn, date); + public static Object since(Object db, Date date) { + return sinceFn.invoke(db, date); } /** * Fetches data from database using recursive declarative description. * @see docs.datomic.com/on-prem/pull.html * - * @param dConn the database - * @param selector the criteria for the pull query - * @param eid the entity id + * @param db the database + * @param selector the scoping for the pull query + * @param eid the entity id to pull * @return the result of the query as map */ - public static Map pull(Object dConn, String selector, int eid) { - return (Map) pullFn.invoke(dConn, Clojure.read(selector), eid); - } - - /** - * Fetches data from database using recursive declarative description. - * @see docs.datomic.com/on-prem/pull.html - * - * @param dConn the database - * @param selector the criteria for the pull query - * @param eid an entity id - * @return the result of the query as a map - */ - public static Map pull(Object dConn, String selector, PersistentVector eid) { - return (Map) pullFn.invoke(dConn, Clojure.read(selector), eid); + public static APersistentMap pull(Object db, String selector, long eid) { + return (APersistentMap)pullFn.invoke(db, Clojure.read(selector), eid); } /** * Same as pull but takes multiple entities as input. * - * @param dConn the database + * @param db the database * @param selector the criteria for the pull query - * @param eids a vector of entity ids + * @param eids an iterable of entity ids * @return the result of the query as a list of maps */ - public static List pullMany(Object dConn, String selector, PersistentVector eids) { - return (List) pullManyFn.invoke(dConn, Clojure.read(selector), eids); + public static List pullMany(Object db, String selector, Iterable eids) { + return (List)pullManyFn.invoke(db, Clojure.read(selector), eids); } /** @@ -234,58 +194,69 @@ public static void release(Object conn) { releaseFn.invoke(conn); } + /** + * Returns datoms of the given index. + * + * @param db the database + * @param index a keyword describing the type of index. E.g. {@code kwd(":eavt")} + * @return the list of datoms + */ + public static List datoms(Object db, Keyword index) { + return (List)datomsFn.invoke(db, index); + } + /** * Returns datoms starting from specified components criteria and including rest of the database until the end of the index. * - * @param dConn the database - * @param index a keyword describing the type of index. E.g. {@code k(":eavt")} + * @param db the database + * @param index a keyword describing the type of index. E.g. {@code kwd(":eavt")} * @param c1 the first element of a datom used as the criteria for filtering * @return the list of datoms */ - public static List seekdatoms(Object dConn, Keyword index, Object c1) { - return (List)seekDatomsFn.invoke(dConn, index, c1); + public static List seekdatoms(Object db, Keyword index, Object c1) { + return (List)seekDatomsFn.invoke(db, index, c1); } /** * Returns datoms starting from specified components criteria and including rest of the database until the end of the index. * - * @param dConn the database - * @param index a keyword describing the type of index. E.g. {@code k(":eavt")} + * @param db the database + * @param index a keyword describing the type of index. E.g. {@code kwd(":eavt")} * @param c1 the first element of a datom used as criteria for filtering * @param c2 the second element of a datom used as criteria for filtering * @return the list of datoms */ - public static List seekdatoms(Object dConn, Keyword index, Object c1, Object c2) { - return (List)seekDatomsFn.invoke(dConn, index, c1, c2); + public static List seekdatoms(Object db, Keyword index, Object c1, Object c2) { + return (List)seekDatomsFn.invoke(db, index, c1, c2); } /** * Returns datoms starting from specified components criteria and including rest of the database until the end of the index. * - * @param dConn the database - * @param index a keyword describing the type of index. E.g. {@code k(":eavt")} + * @param db the database + * @param index a keyword describing the type of index. E.g. {@code kwd(":eavt")} * @param c1 the first element of a datom used as criteria for filtering * @param c2 the second element of a datom used as criteria for filtering * @param c3 the third element of a datom used as criteria for filtering * @return the list of datoms */ - public static List seekdatoms(Object dConn, Keyword index, Object c1, Object c2, Object c3) { - return (List)seekDatomsFn.invoke(dConn, index, c1, c2, c3); + public static List seekdatoms(Object db, Keyword index, Object c1, Object c2, Object c3) { + return (List)seekDatomsFn.invoke(db, index, c1, c2, c3); } /** * Returns datoms starting from specified components criteria and including rest of the database until the end of the index. * - * @param dConn the database - * @param index a keyword describing the type of index. E.g. {@code k(":eavt")} + * @param db the database + * @param index a keyword describing the type of index. E.g. {@code kwd(":eavt")} * @param c1 the first element of a datom used as criteria for filtering * @param c2 the second element of a datom used as criteria for filtering * @param c3 the third element of a datom used as criteria for filtering * @param c4 the fourth element of a datom used as criteria for filtering * @return the list of datoms */ - public static List seekdatoms(Object dConn, Keyword index, Object c1, Object c2, Object c3, Object c4) { - return (List)seekDatomsFn.invoke(dConn, index, c1, c2, c3, c4); + public static List seekdatoms(Object db, Keyword index, Object c1, Object c2, Object c3, Object c4) { + return (List)seekDatomsFn.invoke(db, index, c1, c2, c3, c4); } @@ -316,12 +287,12 @@ public static Long tempId(Keyword k, Long i) { /** * Returns the entity corresponding to the entity id 'eid'. * - * @param dConn the database + * @param db the database * @param eid an entity id * @return an entity */ - public static IEntity entity(Object dConn, int eid) { - return (IEntity)entityFn.invoke(dConn, eid); + public static IEntity entity(Object db, long eid) { + return (IEntity)entityFn.invoke(db, eid); } /** @@ -337,50 +308,61 @@ public static Object entityDb(IEntity entity) { /** * Returns true if the database is filtered using 'filter'. * - * @param dConn a database + * @param db a database * @return true if database is filtered, false otherwise. */ - public static boolean isFiltered(Object dConn) { - return (Boolean)isFilteredFn.invoke(dConn); + public static boolean isFiltered(Object db) { + return (Boolean)isFilteredFn.invoke(db); } /** * Filters the database given a predicate. * - * Example: {@code Object filteredDB = Datahike.filter(dConn(conn), Clojure.read("(fn [_ datom] (not= :age (:a datom)))"));} + * Example: {@code Object filteredDB = Datahike.filter(deref(conn), Clojure.read("(fn [_ datom] (not= :age (:a datom)))"));} * - * @param dConn a database - * @param pred the predicate used for filtering + * @param db a database + * @param pred the predicate Clojure function used for filtering * @return a filtered version of the database */ - public static Object filter(Object dConn, Object pred) { - return filterFn.invoke(dConn, pred); + public static Object filter(Object db, Object pred) { + return filterFn.invoke(db, pred); } - - // TODO: Not fully usable right now. Needs the missing definition of an ITxReport and implementation of - // dbAfter, dbBefore, etc... on the Clojure side /** - * Applies transaction to an immutable db value, returning a transaction report. + * Applies transaction to an immutable db value, returning new immutable db value. * - * @param dConn a database + * @param db a database * @param txData the transaction data - * @return + * @return a new immutable db value with the transaction applied. */ -/* - public static Object with(Object dConn, Object txData) { - return withFn.invoke(dConn, txData); + public static Object dbWith(Object db, Object txData) { + return dbWithFn.invoke(db, txData); } -*/ /** - * Applies transaction to an immutable db value, returning new immutable db value. + * Return the schema of the database * - * @param dConn a database - * @param txData the transaction data - * @return a new immutable db value with the transaction applied. + * @param db a database + */ + public static APersistentMap schema(Object db) { + return (APersistentMap)schemaFn.invoke(db); + } + + /** + * Return the reverse schema of the database + * + * @param db a database + */ + public static APersistentMap reverseSchema(Object db) { + return (APersistentMap)reverseSchemaFn.invoke(db); + } + + /** + * Return the metrics of the database + * + * @param db a database */ - public static Object dbWith(Object dConn, Object txData) { - return dbWithFn.invoke(dConn, txData); + public static APersistentMap metrics(Object db) { + return (APersistentMap)metricsFn.invoke(db); } } diff --git a/java/src/datahike/java/DatahikeTest.java b/java/src/datahike/java/DatahikeTest.java index 1d938218b..daaadb513 100644 --- a/java/src/datahike/java/DatahikeTest.java +++ b/java/src/datahike/java/DatahikeTest.java @@ -2,13 +2,15 @@ import clojure.java.api.Clojure; import clojure.lang.PersistentHashSet; +import clojure.lang.APersistentMap; +import clojure.lang.APersistentVector; import clojure.lang.PersistentVector; import org.junit.Test; import java.util.*; import java.util.stream.Stream; -import static datahike.java.Datahike.dConn; +import static datahike.java.Datahike.deref; import static datahike.java.Util.*; import static org.junit.Assert.*; @@ -20,7 +22,6 @@ public class DatahikeTest { - private String uri = "datahike:mem://test-empty-db-java"; private Object conn; private Object schema = Clojure.read(" [{:db/ident :name\n" + @@ -31,49 +32,55 @@ public class DatahikeTest { " {:db/ident :age\n" + " :db/valueType :db.type/long\n" + " :db/cardinality :db.cardinality/one}]"); + + private APersistentMap config = map(kwd(":store"), map(kwd(":backend"), kwd(":mem")), + kwd(":name"), "test-empty-db-java", + kwd(":initial-tx"), this.schema); + private Date firstDate; private String query; @org.junit.Before public void setUp() throws Exception { - Datahike.deleteDatabase(uri); + Datahike.deleteDatabase(config); } @org.junit.After public void tearDown() throws Exception { - Datahike.deleteDatabase(uri); + Datahike.deleteDatabase(config); } private void transactOnce() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); + Datahike.createDatabase(config); - conn = Datahike.connect(uri); + conn = Datahike.connect(config); query = "[:find ?n ?a :where [?e :name ?n] [?e :age ?a]]"; Datahike.transact(conn, vec( - map(k(":name"), "Alice", k(":age"), 25L), - map(k(":name"), "Bob", k(":age"), 30L))); + map(kwd(":name"), "Alice", kwd(":age"), 25L), + map(kwd(":name"), "Bob", kwd(":age"), 30L))); } - @org.junit.Test - public void db() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - conn = Datahike.connect(uri); - assertEquals(Datahike.db(conn), dConn(conn)); - } + // @org.junit.Test + /* public void db() { + Datahike.createDatabase(config); + conn = Datahike.connect(config); + assertEquals(Datahike.db(conn), deref(conn)); + } */ @org.junit.Test public void databaseExists() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - assertTrue(Datahike.databaseExists(uri)); - assertFalse(Datahike.databaseExists("datahike:mem://not-exists")); + Datahike.createDatabase(config); + assertTrue(Datahike.databaseExists(config)); + APersistentMap configNotExisting = (APersistentMap)ednFromString("{:store {:backend :mem :id \"it-does-not-exist\"}}"); + assertFalse(Datahike.databaseExists(configNotExisting)); } @org.junit.Test public void queryWithDBAndInput() { transactOnce(); query = "[:find ?n ?a :in $ [?n] :where [?e :name ?n] [?e :age ?a]]"; - Set res = Datahike.q(query, dConn(conn), Clojure.read("[\"Alice\"]")); + Set res = (Set)Datahike.q(query, deref(conn), Clojure.read("[\"Alice\"]")); Object[] names = res.stream().map(vec -> vec.get(0)).toArray(); assertTrue(names[0].equals("Alice")); } @@ -82,7 +89,7 @@ public void queryWithDBAndInput() { public void queryWithJavaArrayInput() { transactOnce(); query = "[:find ?n ?a :in $ [?n] :where [?e :name ?n] [?e :age ?a]]"; - Set res = Datahike.q(query, dConn(conn), new String[] {"Alice"}); + Set res = (Set)Datahike.q(query, deref(conn), new String[] {"Alice"}); Object[] names = res.stream().map(vec -> vec.get(0)).toArray(); assertTrue(names[0].equals("Alice")); } @@ -90,29 +97,29 @@ public void queryWithJavaArrayInput() { @org.junit.Test public void queryWithLocalInputDB() { Object input = Clojure.read("[[1 :name 'Ivan'] [1 :age 19] [1 :aka \"dragon_killer_94\"] [1 :aka '-=autobot=-']]"); - Set res = Datahike.q("[:find ?n ?a :where [?e :aka \"dragon_killer_94\"] [?e :name ?n] [?e :age ?a]]", input); + Set res = (Set)Datahike.q("[:find ?n ?a :where [?e :aka \"dragon_killer_94\"] [?e :name ?n] [?e :age ?a]]", input); assertTrue(res.size() == 1); } @org.junit.Test public void queryWithDB() { - Datahike.createDatabase(uri); - conn = Datahike.connect(uri); + Datahike.createDatabase(config); + conn = Datahike.connect(config); // Transacting new schema - Datahike.transact(conn, vec(map(k(":db/ident"), k(":name"), - k(":db/valueType"), k(":db.type/string"), - k(":db/cardinality"), k(":db.cardinality/one")))); + Datahike.transact(conn, vec(map(kwd(":db/ident"), kwd(":name"), + kwd(":db/valueType"), kwd(":db.type/string"), + kwd(":db/cardinality"), kwd(":db.cardinality/one")))); // Transacting with schema presence - Datahike.transact(conn, vec(map(k(":name"), "Alice"))); + Datahike.transact(conn, vec(map(kwd(":name"), "Alice"))); - Object dConn = deref.invoke(conn); + Object deref = Datahike.deref(conn); - Set res = Datahike.q("[:find ?e :where [?e :name]]", dConn); + Set res = (Set)Datahike.q("[:find ?e :where [?e :name]]", deref); assertTrue(res.size() == 1); - res = Datahike.q("[:find ?v :where [_ :name ?v]]", dConn); + res = (Set)Datahike.q("[:find ?v :where [_ :name ?v]]", deref); assertEquals(PersistentHashSet.create(Arrays.asList(PersistentVector.create("Alice"))), res); } @@ -120,7 +127,7 @@ public void queryWithDB() { public void history() { transactOnce(); - Set res = Datahike.q((String) query, Datahike.history(dConn(conn))); + Set res = (Set)Datahike.q((String) query, Datahike.history(deref(conn))); Object[] names = res.stream().map(pv -> pv.get(0)).toArray(); assertEquals(new String[] {"Alice", "Bob"}, names); } @@ -129,15 +136,21 @@ public void history() { public void asOfAndSince() { transactOnce(); + // Make sure transaction has older timestamp than firstDate + try { + Thread.sleep(10); + } catch (InterruptedException e) { + } + firstDate = new Date(); Datahike.transact(conn, vec(map( - k(":db/id"), vec(k(":name"), "Alice"), - k(":age"), 30L))); + kwd(":db/id"), vec(kwd(":name"), "Alice"), + kwd(":age"), 30L))); - Set res = Datahike.q(query, Datahike.asOf(dConn(conn), firstDate)); + Set res = (Set)Datahike.q(query, Datahike.asOf(deref(conn), firstDate)); assertEquals(2, res.size()); - res = Datahike.q(query, Datahike.since(dConn(conn), firstDate)); + res = (Set)Datahike.q(query, Datahike.since(deref(conn), firstDate)); // 0, because :name was transacted before the first date assertEquals(0, res.size()); } @@ -146,40 +159,36 @@ public void asOfAndSince() { public void pullAndPullMany() { transactOnce(); - Datahike.transact(conn, vec(map(k(":db/id"), 10, - k(":name"), "Joe", - k(":age"), 50L))); - Map res = Datahike.pull(dConn(conn), "[*]", 10); - assertEquals("Joe", res.get(k(":name"))); - - res = Datahike.pull(dConn(conn), "[*]", vec(k(":name"), "Alice")); - assertEquals("Alice", res.get(k(":name"))); - + Datahike.transact(conn, vec(map(kwd(":db/id"), 10, + kwd(":name"), "Joe", + kwd(":age"), 50L))); + Map res = Datahike.pull(deref(conn), "[*]", 10); + assertEquals("Joe", res.get(kwd(":name"))); - Datahike.transact(conn, vec(map(k(":db/id"), 20, - k(":name"), "Jane", - k(":age"), 25L))); - List list = Datahike.pullMany(dConn(conn), "[*]", vec(10, 20)); + Datahike.transact(conn, vec(map(kwd(":db/id"), 20, + kwd(":name"), "Jane", + kwd(":age"), 25L))); + List list = Datahike.pullMany(deref(conn), "[*]", vec(10, 20)); assertEquals(2, list.size()); } @Test public void release() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - conn = Datahike.connect(uri); + Datahike.createDatabase(config); + conn = Datahike.connect(config); Datahike.release(conn); } @Test public void seekDatoms() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - conn = Datahike.connect(uri); + Datahike.createDatabase(config); + conn = Datahike.connect(config); - Datahike.transact(conn, (PersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]")); - List res = Datahike.seekdatoms(dConn( conn), k(":eavt"), 10); + Datahike.transact(conn, (APersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]")); + List res = Datahike.seekdatoms(deref( conn), kwd(":eavt"), 10); res.stream().map(vec -> {assertTrue((int)vec.get(0) >= 10); return null;}); - res = Datahike.seekdatoms(dConn( conn), k(":eavt"), 10, k(":name")); + res = Datahike.seekdatoms(deref( conn), kwd(":eavt"), 10, kwd(":name")); res.stream().map(vec -> { int entityId = (int)vec.get(0); assertTrue(entityId == 10 && vec.get(1).equals(":name") || @@ -187,7 +196,7 @@ public void seekDatoms() { return null; }); - res = Datahike.seekdatoms(dConn( conn), k(":eavt"), 30, k(":name"), "Sergey"); + res = Datahike.seekdatoms(deref( conn), kwd(":eavt"), 30, kwd(":name"), "Sergey"); res.stream().map(vec -> { int entityId = (int)vec.get(0); assertTrue(entityId == 30 && vec.get(1).equals(":name") && vec.get(2).equals("Sergey") || @@ -198,31 +207,31 @@ public void seekDatoms() { @Test public void tempId() { - Long id = Datahike.tempId(k(":db.part/user")); + Long id = Datahike.tempId(kwd(":db.part/user")); assertTrue(id < 0); - id = Datahike.tempId(k(":db.part/user"), -10000L); + id = Datahike.tempId(kwd(":db.part/user"), -10000L); assertEquals(-10000L, (long)id); } @Test public void entity() { transactOnce(); - Datahike.transact(conn, vec(map(k(":db/id"), 10, - k(":name"), "Joe", - k(":age"), 50L))); + Datahike.transact(conn, vec(map(kwd(":db/id"), 10, + kwd(":name"), "Joe", + kwd(":age"), 50L))); - IEntity entity = Datahike.entity(dConn(conn), 10); - Object res = entity.valAt(k(":name")); + IEntity entity = Datahike.entity(deref(conn), 10); + Object res = entity.valAt(kwd(":name")); assertEquals("Joe", res); } @Test public void entityDb() { transactOnce(); - Datahike.transact(conn, vec(map(k(":db/id"), 10, - k(":name"), "Joe", - k(":age"), 50L))); - IEntity entity = Datahike.entity(dConn(conn), 10); + Datahike.transact(conn, vec(map(kwd(":db/id"), 10, + kwd(":name"), "Joe", + kwd(":age"), 50L))); + IEntity entity = Datahike.entity(deref(conn), 10); Object db = Datahike.entityDb(entity); assertNotNull(db); @@ -230,34 +239,23 @@ public void entityDb() { @Test public void filterAndIsFiltered() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - conn = Datahike.connect(uri); - assertFalse(Datahike.isFiltered(dConn(conn))); + Datahike.createDatabase(config); + conn = Datahike.connect(config); + assertFalse(Datahike.isFiltered(deref(conn))); - Datahike.transact(conn, (PersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]")); - Object filteredDB = Datahike.filter(dConn(conn), Clojure.read("(fn [_ datom] (not= :age (:a datom)))")); + Datahike.transact(conn, (APersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]")); + Object filteredDB = Datahike.filter(deref(conn), Clojure.read("(fn [_ datom] (not= :age (:a datom)))")); assertTrue(Datahike.isFiltered(filteredDB)); } - // TODO: restore when 'with' implementation is ready. - /*@Test - public void with() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - conn = Datahike.connect(uri); - - PersistentVector txData = (PersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]"); - Object res = Datahike.with(dConn(conn), txData); - // TODO: assert. But needs to define an ITxReport first. - }*/ - @Test public void dbWith() { - Datahike.createDatabase(uri, k(":initial-tx"), schema); - conn = Datahike.connect(uri); - PersistentVector txData = (PersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]"); - Object dbAfter = Datahike.dbWith(dConn(conn), txData); + Datahike.createDatabase(config); + conn = Datahike.connect(config); + APersistentVector txData = (APersistentVector)Clojure.read("[{:db/id 10 :name \"Petr\" :age 44} {:db/id 20 :name \"Ivan\" :age 25} {:db/id 30 :name \"Sergey\" :age 11}]"); + Object dbAfter = Datahike.dbWith(deref(conn), txData); query = "[:find ?a :in $ :where [?e :age ?a]]"; - Set res = Datahike.q(query, dbAfter); + Set res = (Set)Datahike.q(query, dbAfter); assertTrue(res.size() == 3); } diff --git a/java/src/datahike/java/Util.java b/java/src/datahike/java/Util.java index 13597c809..d88467d49 100644 --- a/java/src/datahike/java/Util.java +++ b/java/src/datahike/java/Util.java @@ -2,31 +2,39 @@ import clojure.java.api.Clojure; import clojure.lang.IFn; -import clojure.lang.PersistentHashMap; -import clojure.lang.PersistentVector; +import clojure.lang.APersistentMap; +import clojure.lang.APersistentVector; import clojure.lang.Keyword; import clojure.lang.RT; public class Util { - public static final IFn deref = Clojure.var("clojure.core", "deref"); + public static final IFn derefFn = Clojure.var("clojure.core", "deref"); private static final IFn hashMapFn = Clojure.var("clojure.core", "hash-map"); private static final IFn vectorFn = Clojure.var("clojure.core", "vec"); + private static final IFn readStringFn = Clojure.var("clojure.edn", "read-string"); /** Converts a keyword given as a string into a clojure keyword */ - public static Keyword k(String str) { + public static Keyword kwd(String str) { return (Keyword)Clojure.read(str); } /** Creates a new clojure HashMap from the arguments of the form: key1, val1, key2, val2, ... */ // TODO: parse each arg and if it starts with : then convert it into a clj keyword - public static PersistentHashMap map(Object... keyVals) { - return (PersistentHashMap) hashMapFn.applyTo(RT.seq(keyVals)); + public static APersistentMap map(Object... keyVals) { + return (APersistentMap) hashMapFn.applyTo(RT.seq(keyVals)); } /** Creates a new clojure vector */ - public static PersistentVector vec(Object... items) { + public static APersistentVector vec(Object... items) { // No need to use 'applyTo here, as 'vectorFn taking a collection as an argument will produce a clj vector // containing *each* of the collection elements. - return (PersistentVector) vectorFn.invoke(items); + return (APersistentVector) vectorFn.invoke(items); + } + + + + /** Read string into edn data structure. */ + public static Object ednFromString(String str) { + return readStringFn.invoke(str); } } diff --git a/libdatahike/compile-cpp b/libdatahike/compile-cpp new file mode 100755 index 000000000..e7d175fc4 --- /dev/null +++ b/libdatahike/compile-cpp @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -eo pipefail + +g++ libdatahike/src/test_cpp.cpp -L libdatahike/target -I libdatahike/target -ldatahike -o libdatahike/target/test_cpp + +echo "Created libdatahike/target/test_cpp." diff --git a/libdatahike/compile-libdatahike b/libdatahike/compile-libdatahike new file mode 100755 index 000000000..59671d3b0 --- /dev/null +++ b/libdatahike/compile-libdatahike @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -eo pipefail + +if [ -z "$GRAALVM_HOME" ]; then + echo "GRAALVM_HOME not set" + exit 1 +fi + +echo "Building uber jar for native library." +clj -T:build native-shared-library +DATAHIKE_VERSION=$(cat resources/DATAHIKE_VERSION) +DATAHIKE_JAR=target/datahike-$DATAHIKE_VERSION-native-shared-library.jar + +SVM_JAR=$(find "$GRAALVM_HOME" | grep svm.jar) + +echo "Compiling LibDatahike native bindings Java class." +$GRAALVM_HOME/bin/javac -cp "$DATAHIKE_JAR:$SVM_JAR" libdatahike/src/datahike/impl/LibDatahike.java + +echo "Compiling shared library through native image." +$GRAALVM_HOME/bin/native-image \ + -jar $DATAHIKE_JAR \ + -cp libdatahike/src \ + -H:Name=libdatahike \ + --shared \ + -H:+ReportExceptionStackTraces \ + -J-Dclojure.spec.skip-macros=true \ + -J-Dclojure.compiler.direct-linking=true \ + "-H:IncludeResources=DATAHIKE_VERSION" \ + --initialize-at-build-time \ + -H:Log=registerResource: \ + --verbose \ + --no-fallback \ + --no-server \ + "-J-Xmx5g" + +rm -rf libdatahike/target +mkdir -p libdatahike/target + +mv graal_isolate_dynamic.h libdatahike.h graal_isolate.h libdatahike.so libdatahike_dynamic.h libdatahike/target diff --git a/libdatahike/src/datahike/impl/LibDatahike.java b/libdatahike/src/datahike/impl/LibDatahike.java new file mode 100644 index 000000000..175f76e52 --- /dev/null +++ b/libdatahike/src/datahike/impl/LibDatahike.java @@ -0,0 +1,285 @@ +package datahike.impl; + +import org.graalvm.nativeimage.c.function.CEntryPoint; +import org.graalvm.nativeimage.c.function.InvokeCFunctionPointer; +import org.graalvm.nativeimage.c.function.CFunctionPointer; +import org.graalvm.nativeimage.c.type.CCharPointer; +import org.graalvm.nativeimage.c.type.CCharPointerPointer; +import org.graalvm.nativeimage.c.type.CTypeConversion; +import org.graalvm.word.PointerBase; +import com.oracle.svm.core.c.CConst; +import datahike.java.Datahike; +import datahike.java.Util; +import datahike.impl.libdatahike; +import clojure.lang.IPersistentVector; +import clojure.lang.APersistentMap; +import clojure.lang.Keyword; +import java.io.StringWriter; +import java.io.PrintWriter; +import java.util.Date; +import java.util.List; +import java.util.Arrays; +import java.util.Base64; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.nio.charset.StandardCharsets; + +public final class LibDatahike { + + // C representations + + private static @CConst CCharPointer toException(Exception e) { + StringWriter sw = new StringWriter(); + e.printStackTrace(new PrintWriter(sw)); + String st = sw.toString(); + return toCCharPointer("exception:".concat(e.toString()).concat("\nStacktrace:\n").concat(st)); + } + + public static APersistentMap readConfig(CCharPointer db_config) { + return (APersistentMap)Util.ednFromString(CTypeConversion.toJavaString(db_config)); + } + + public static @CConst CCharPointer toCCharPointer(String result) { + CTypeConversion.CCharPointerHolder holder = CTypeConversion.toCBytes(result.getBytes(StandardCharsets.UTF_8)); + CCharPointer value = holder.get(); + return value; + } + + public static @CConst CCharPointer toCCharPointer(byte[] result) { + CTypeConversion.CCharPointerHolder holder = CTypeConversion.toCBytes(Base64.getEncoder().encode(result)); + CCharPointer value = holder.get(); + return value; + } + + private static Object loadInput(@CConst CCharPointer input_format, + @CConst CCharPointer raw_input) { + String format = CTypeConversion.toJavaString(input_format); + String formats[] = format.split(":"); + switch (formats[0]) { + case "db": return Datahike.deref(Datahike.connect(readConfig(raw_input))); + case "history": return Datahike.history(Datahike.deref(Datahike.connect(readConfig(raw_input)))); + case "since": return Datahike.since(Datahike.deref(Datahike.connect(readConfig(raw_input))), + new Date(Long.parseLong(formats[1]))); + case "asof": return Datahike.asOf(Datahike.deref(Datahike.connect(readConfig(raw_input))), + new Date(Long.parseLong(formats[1]))); + case "json": return libdatahike.parseJSON(CTypeConversion.toJavaString(raw_input)); + case "edn": return libdatahike.parseEdn(CTypeConversion.toJavaString(raw_input)); + case "cbor": return libdatahike.parseCBOR(CTypeConversion.toJavaString(raw_input).getBytes()); + default: throw new IllegalArgumentException("Input format not supported: " + format); + } + } + + private static Object[] loadInputs(@CConst long num_inputs, + @CConst CCharPointerPointer input_formats, + @CConst CCharPointerPointer raw_inputs) { + Object[] inputs = new Object[(int)num_inputs]; + for (int i=0; i % schema :db/valueType valtypes) (keys schema)))) + +(def ^:private filter-kw-attrs + (partial filter-value-type-attrs #{:db.type/keyword :db.type/value :db.type/cardinality :db.type/unique})) + +(def keyword-valued-schema-attrs (filter-kw-attrs s/implicit-schema-spec)) + +(defn- int-obj-to-long [i] + (if (some? i) + (long i) + (throw (ex-info "Cannot store nil as a value")))) + +(defn- xf-val [f v] + (if (vector? v) (map f v) (f v))) + +(declare handle-id-or-av-pair) + +(defn- xf-ref-val [v valtype-attrs-map db] + (if (vector? v) + (walk/prewalk #(handle-id-or-av-pair % valtype-attrs-map db) v) + v)) + +(defn keywordize-string [s] + (if (string? s) (keyword s) s)) + +(defn ident-for [db a] + (if (and (number? a) (some? db)) (.-ident-for db a) a)) + +(defn cond-xf-val + [a-ident v {:keys [ref-attrs long-attrs keyword-attrs symbol-attrs] :as valtype-attrs-map} db] + (cond + (contains? ref-attrs a-ident) (xf-ref-val v valtype-attrs-map db) + (contains? long-attrs a-ident) (xf-val int-obj-to-long v) + (contains? keyword-attrs a-ident) (xf-val keyword v) + (contains? symbol-attrs a-ident) (xf-val symbol v) + :else v)) + +(defn handle-id-or-av-pair + ([v valtype-attrs-map] + (handle-id-or-av-pair v valtype-attrs-map nil)) + ([v valtype-attrs-map db] + (if (and (vector? v) (= (count v) 2)) + (let [a (keywordize-string (first v))] + [a (cond-xf-val (ident-for db a) (nth v 1) valtype-attrs-map db)]) + v))) + +(defn- xf-tx-data-map [m valtype-attrs-map db] + (into {} + (map (fn [[a v]] + [a (if (= :db/id a) + (handle-id-or-av-pair [a v] valtype-attrs-map db) + (cond-xf-val a v valtype-attrs-map db))]) + m))) + +(defn- xf-tx-data-vec [tx-vec valtype-attrs-map db] + (let [op (first tx-vec) + [e a v] (rest tx-vec) + a (keywordize-string a)] + (vec (filter some? (list (keyword op) + (handle-id-or-av-pair e valtype-attrs-map db) + a + (cond-xf-val (ident-for db a) v valtype-attrs-map db)))))) + +(defn get-valtype-attrs-map [schema] + (let [ref-valued-attrs (filter-value-type-attrs #{:db.type/ref} schema) + long-valued-attrs (filter-value-type-attrs #{:db.type/long} schema) + kw-valued-attrs (clojure.set/union keyword-valued-schema-attrs (filter-kw-attrs schema)) + sym-valued-attrs (filter-value-type-attrs #{:db.type/symbol} schema)] + {:ref-attrs ref-valued-attrs + :long-attrs long-valued-attrs + :keyword-attrs kw-valued-attrs + :symbol-attrs sym-valued-attrs})) + +(defn xf-data-for-tx [tx-data db] + (let [valtype-attrs-map (get-valtype-attrs-map (:schema db))] + (map #(let [xf-fn (cond (map? %) xf-tx-data-map + (vector? %) xf-tx-data-vec + ; Q: Is this error appropriate? + :else (throw (ex-info "Only maps and vectors allowed in :tx-data and :tx-meta" + {:event :handlers/transact :data tx-data})))] + (xf-fn % valtype-attrs-map db)) + tx-data))) + +;; ============= exported helpers ============= + +(defn -parseJSON [s] + (ch/parse-string s keyword)) + +(defn -parseEdn [s] + (edn/read-string s)) + +(defn -parseCBOR [^bytes b] + (cbor/decode b)) + +(defn -toJSONString [obj] + (ch/generate-string obj)) + +(defn -toEdnString [obj] + (pr-str obj)) + +(defn ^bytes -toCBOR [edn] + (cbor/encode edn)) + +(defn -JSONAsTxData [tx-data db] + (xf-data-for-tx (ch/parse-string tx-data keyword) db)) + +(defn -datomsToVecs [datoms] + (mapv #(vec (seq ^Datom %)) datoms)) + +(defn -intoMap [edn] + (into {} edn)) diff --git a/libdatahike/src/test_cpp.cpp b/libdatahike/src/test_cpp.cpp new file mode 100644 index 000000000..b2d65ef50 --- /dev/null +++ b/libdatahike/src/test_cpp.cpp @@ -0,0 +1,59 @@ +#include +#include +#include + +void default_reader(char* edn) { + std::cout << "result: " << edn << std::endl; +} + +void database_exists_reader(char* database_exists_result_edn) { + std::cout << "database exists result: " << database_exists_result_edn << std::endl; + std::string true_str = "true"; + assert(true_str.compare(database_exists_result_edn) == 0); +} + +void transact_reader(char* transact_result_edn) { + std::cout << "transact result: " << transact_result_edn << std::endl; +} + +void query_reader(char* query_result_edn) { + std::cout << "query result: " << query_result_edn << std::endl; + std::string expected_q_result = "1"; + assert(expected_q_result.compare(query_result_edn) == 0); +} + + +int main(int argc, char* argv[]) { + graal_isolate_t *isolate = NULL; + graal_isolatethread_t *thread = NULL; + + if (graal_create_isolate(NULL, &isolate, &thread) != 0) { + fprintf(stderr, "Initialization error.\n"); + return 1; + } + + const char *config_str = &argv[1][0]; + + void (*default_reader_pointer)(char*); + default_reader_pointer = &default_reader; + create_database((long)thread, config_str, (const void*)default_reader_pointer); + + void (*database_exists_reader_pointer)(char*); + database_exists_reader_pointer = &database_exists_reader; + database_exists((long)thread, config_str, (const void*)database_exists_reader_pointer); + + char *json_str = &argv[2][0]; + void (*transact_reader_pointer)(char*); + transact_reader_pointer = &transact_reader; + transact((long)thread, config_str, "json", json_str, "edn", (const void*)transact_reader); + + char *query_str = &argv[3][0]; + long num_inputs = 1; + const char *input_format = "db"; + const char *output_format = "edn"; + void (*query_reader_pointer)(char*); + query_reader_pointer = &query_reader; + query((long)thread, query_str, num_inputs, &input_format, &config_str, + output_format, (const void*)query_reader_pointer); + return 0; +} diff --git a/resources/.touch b/resources/.touch new file mode 100644 index 000000000..e69de29bb diff --git a/src/datahike/cli.clj b/src/datahike/cli.clj new file mode 100644 index 000000000..affc64f68 --- /dev/null +++ b/src/datahike/cli.clj @@ -0,0 +1,248 @@ +(ns datahike.cli + (:gen-class) + (:require [clojure.data.json :as json] + [clojure.java.io :as io] + [clojure.pprint :refer [pprint]] + [clojure.string :as str] + [clojure.tools.cli :refer [parse-opts]] + [datahike.api :as d] + [clojure.edn :as edn] + [cheshire.core :as ch] + [clj-cbor.core :as cbor] + [taoensso.timbre :as log]) + (:import [java.util Date])) + +;; This file is following https://github.com/clojure/tools.cli + +(defn usage [options-summary] + (->> ["This is the Datahike command line interface." + "" + "The commands mostly reflect the datahike.api Clojure API. You can find its documentation under api at https://cljdoc.org/d/io.replikativ/datahike/. To instantiate a specific database, you can use db:config_file to access the current database value, conn:config_file to create a mutable connection for manipulation, history:config_file for the historical database over all transactions, since:unix_time_in_ms:config_file to create a database with all facts since the time provided and asof:unix_time_in_ms:config_file to create an snapshot as-of the time provided. To pass in edn data use edn:edn_file and for JSON use json:json_file." + "" + "Usage: dhi [options] action arguments" + "" + "Options:" + options-summary + "" + "Actions:" + " create-database Create database for a provided configuration file, e.g. create-database config_file" + " delete-database Delete database for a provided configuration file, e.g. delete-database config_file" + " database-exists Check whether database exists for a provided configuration file, e.g. database-exists config_file" + " transact Transact transactions, optionally from a file with --tx-file or from STDIN. Exampe: transact conn:config_file \"[{:name \"Peter\" :age 42}]\"" + " query Query the database, e.g. query '[:find (count ?e) . :where [?e :name _]]' db:mydb.edn. You can pass an arbitrary number of data sources to the query." + " benchmark Benchmarks write performance. The arguments are starting eid, ending eid and the batch partitioning of the added synthetic Datoms. The Datoms have the form [eid :name ?randomly-sampled-name]" + " pull Pull data in a map syntax for a specific entity: pull db:mydb.edn \"[:db/id, :name]\" \"1\"." + " pull-many Pull data in a map syntax for a list of entities: pull db:mydb.edn \"[:db/id, :name]\" \"[1,2]\"" + " entity Fetch entity: entity db:mydb.edn \"1\"" + " datoms Fetch all datoms from an index: datoms db:mydb.edn \"{:index :eavt :components [1]}\" " + " schema Fetch schema for a db." + " reverse-schema Fetch reverse schema for a db." + " metrics Fetch metrics for a db." + "" + "Please refer to the manual page for more information."] + (str/join \newline))) + +(defn error-msg [errors] + (str "The following errors occurred while parsing your command:\n\n" + (str/join \newline errors))) + +(def actions #{"create-database" "delete-database" "database-exists" "transact" "query" "benchmark" + "pull" "pull-many" "entity" "datoms" "schema" "reverse-schema" "metrics"}) + +(def cli-options + ;; An option with a required argument + (let [formats #{:json :edn :pretty-json :pprint :cbor}] + [["-f" "--format FORMAT" "Output format for the result." + :default :edn + :parse-fn keyword + :validate [formats (str "Must be one of: " (str/join ", " formats))]] + ["-if" "--input-format FORMAT" "Input format for the transaction." + :default :edn + :parse-fn keyword + :validate [formats (str "Must be one of: " (str/join ", " formats))]] + [nil "--tx-file PATH" "Use this input file for transactions instead of command line or STDIN." + :default nil + :validate [#(.exists (io/file %)) "Transaction file does not exist."]] + ;; A non-idempotent option (:default is applied first) + ["-v" nil "Verbosity level" + :id :verbosity + :default 0 + :update-fn inc] + ;; A boolean option defaulting to nil + ["-h" "--help"]])) + +(defn validate-args + "Validate command line arguments. Either return a map indicating the program + should exit (with a error message, and optional ok status), or a map + indicating the action the program should take and the options provided." + [args] + (let [{:keys [options arguments errors summary]} (parse-opts args cli-options)] + (cond + (:help options) ; help => exit OK with usage summary + {:exit-message (usage summary) :ok? true :options options} + + errors ; errors => exit with description of errors + {:exit-message (error-msg errors) :options options} + + (and (not= "transact" (first arguments)) + (:tx-file options)) + {:exit-message "The option --tx-file is only applicable to the transact action." + :options options} + + (actions (first arguments)) + {:action (keyword (first arguments)) :options options + :arguments (rest arguments)} + + (not (actions (first arguments))) + {:exit-message (str "Unknown action, must be one of: " + (str/join ", " actions)) + :options options} + + :else ; failed custom validation => exit with usage summary + {:exit-message (usage summary) + :options options}))) + +(defn exit [status msg] + (println msg) + (System/exit status)) + +;; format: optional first argument Unix time in ms for history, last file for db +(def input->db + {#"conn:(.+)" #(d/connect (edn/read-string (slurp %))) + #"db:(.+)" #(deref (d/connect (edn/read-string (slurp %)))) + #"history:(.+)" #(d/history @(d/connect (edn/read-string (slurp %)))) + #"since:(.+):(.+)" #(d/since @(d/connect (edn/read-string (slurp %2))) + (Date. ^Long (edn/read-string %1))) + #"asof:(.+):(.+)" #(d/as-of @(d/connect (edn/read-string (slurp %2))) + (Date. ^Long (edn/read-string %1))) + #"cbor:(.+)" #(cbor/decode (io/input-stream %)) + #"edn:(.+)" (comp edn/read-string slurp) + #"json:(.+)" (comp #(ch/parse-string % keyword) slurp)}) + +(defn load-input [s] + (if-let [res + (reduce (fn [_ [p f]] + (let [m (re-matches p s)] + (when (first m) + (reduced (apply f (rest m)))))) + nil + input->db)] + res + (throw (ex-info "Input format not know." {:type :input-format-not-known + :input s})))) + +(defn report [format out] + (case format + :json (println (json/json-str out)) + :pretty-json (json/pprint out) + :edn (println (pr-str out)) + :pprint (pprint out) + :cbor (.write System/out ^bytes (cbor/encode out)))) + +(defn -main [& args] + (let [{:keys [action options arguments exit-message ok?]} + (validate-args args)] + (case (int (:verbosity options)) + 0 ;; default + (log/set-level! :warn) + 1 + (log/set-level! :info) + 2 + (log/set-level! :debug) + 3 + (log/set-level! :trace) + (exit 1 (str "Verbosity level not supported: " (:verbosity options)))) + + (if exit-message + (exit (if ok? 0 1) exit-message) + (case action + :create-database + (report (:format options) + (d/create-database (read-string (slurp (first arguments))))) + + :delete-database + (report (:format options) + (d/delete-database (read-string (slurp (first arguments))))) + + :database-exists + (report (:format options) + (d/database-exists? (read-string (slurp (first arguments))))) + + :transact + (report (:format options) + (:tx-meta + (d/transact (load-input (first arguments)) + (vec ;; TODO support set inputs for transact + (if-let [tf (:tx-file options)] + (load-input tf) + (if-let [s (second arguments)] + (case (:input-format options) + :edn (edn/read-string s) + :pprint (edn/read-string s) + :json (ch/parse-string s keyword) + :pretty-json (ch/parse-string s keyword) + :cbor (cbor/decode s) ;; does this really make sense? + ) + (case (:input-format options) + :edn (edn/read) + :pprint (edn/read) + :json (ch/decode-stream *in* keyword) + :pretty-json :json (ch/decode-stream *in*) + :cbor (cbor/decode *in*)))))))) + + :benchmark + (let [conn (load-input (first arguments)) + args (rest arguments) + tx-data (vec (for [i (range (read-string (first args)) + (read-string (second args)))] + [:db/add (inc i) + :name (rand-nth ["Chrislain" "Christian" + "Jiayi" "Judith" + "Konrad" "Pablo" + "Timo" "Wade"])]))] + (doseq [txs (partition (read-string (nth args 2)) tx-data)] + (time + (d/transact conn txs)))) + + :query + (let [q-args (mapv #(load-input %) (rest arguments)) + out (apply d/q (read-string (first arguments)) + q-args)] + (report (:format options) out)) + + :pull + (let [out (into {} (d/pull (load-input (first arguments)) + (read-string (second arguments)) + (read-string (nth arguments 2))))] + (report (:format options) out)) + + :pull-many + (let [out (mapv #(into {} %) + (d/pull-many (load-input (first arguments)) + (read-string (second arguments)) + (read-string (nth arguments 2))))] + (report (:format options) out)) + + :entity + (let [out (into {} (d/entity (load-input (first arguments)) + (read-string (second arguments))))] + (report (:format options) out)) + + :datoms + (let [out (d/datoms (load-input (first arguments)) + (read-string (second arguments)))] + (report (:format options) out)) + + :schema + (let [out (d/schema (load-input (first arguments)))] + (report (:format options) out)) + + :reverse-schema + (let [out (d/reverse-schema (load-input (first arguments)))] + (report (:format options) out)) + + :metrics + (let [out (d/metrics (load-input (first arguments)))] + (report (:format options) out)))))) + + diff --git a/src/datahike/connector.cljc b/src/datahike/connector.cljc index 3d3e1cdc8..fe06955d9 100644 --- a/src/datahike/connector.cljc +++ b/src/datahike/connector.cljc @@ -8,7 +8,6 @@ [datahike.transactor :as t] [konserve.core :as k] [hasch.core :refer [uuid]] - [superv.async :refer [> (di/-seq (.-eavt db)) - (reduce (fn [m datom] + (reduce (fn [m ^Datom datom] (-> m (update-count-in [:per-attr-counts (dbi/-ident-for db (.-a datom))]) (update-count-in [:per-entity-counts (.-e datom)]))) @@ -856,7 +856,7 @@ (dbi/-keep-history? db) (merge {:temporal-count (di/-count (.-temporal-eavt db)) :temporal-avet-count (->> (di/-seq (.-temporal-eavt db)) - (reduce (fn [m datom] (update-count-in m [(dbi/-ident-for db (.-a datom))])) + (reduce (fn [m ^Datom datom] (update-count-in m [(dbi/-ident-for db (.-a datom))])) {}) sum-indexed-attr-counts)})))) diff --git a/src/datahike/db/utils.cljc b/src/datahike/db/utils.cljc index 78a0c9c13..e59c2c61f 100644 --- a/src/datahike/db/utils.cljc +++ b/src/datahike/db/utils.cljc @@ -194,7 +194,7 @@ (di/-slice temporal-index from to index-type)))) (defn filter-txInstant [datoms pred db] - (let [txInstant (if (:attribute-refs? (.-config db)) + (let [txInstant (if (:attribute-refs? (dbi/-config db)) (dbi/-ref-for db :db/txInstant) :db/txInstant)] (into #{} diff --git a/src/datahike/index/persistent_set.cljc b/src/datahike/index/persistent_set.cljc index 07de19e8d..33f887bd1 100644 --- a/src/datahike/index/persistent_set.cljc +++ b/src/datahike/index/persistent_set.cljc @@ -13,7 +13,7 @@ #?(:clj (:import [datahike.datom Datom] [org.fressian.handlers WriteHandler ReadHandler] [me.tonsky.persistent_sorted_set PersistentSortedSet IStorage Leaf Branch ANode] - [java.util UUID]))) + [java.util UUID List]))) (defn index-type->cmp ([index-type] (index-type->cmp index-type true)) @@ -182,7 +182,7 @@ (defmethod di/empty-index :datahike.index/persistent-set [_index-name store index-type _] (psset/set-branching-factor! BRANCHING_FACTOR) - (let [pset (psset/sorted-set-by (index-type->cmp-quick index-type false))] + (let [^PersistentSortedSet pset (psset/sorted-set-by (index-type->cmp-quick index-type false))] (set! (.-_storage pset) (:storage store)) (with-meta pset {:index-type index-type}))) @@ -197,7 +197,7 @@ (not (arrays/array? datoms)) (arrays/into-array))) _ (arrays/asort arr (index-type->cmp-quick index-type false)) - pset (psset/from-sorted-array (index-type->cmp-quick index-type false) arr)] + ^PersistentSortedSet pset (psset/from-sorted-array (index-type->cmp-quick index-type false) arr)] (set! (.-_storage pset) (:storage store)) (with-meta pset {:index-type index-type}))) @@ -226,7 +226,7 @@ (reify ReadHandler (read [_ reader _tag _component-count] (let [{:keys [keys level addresses]} (.readObject reader)] - (Branch. (int level) keys (seq addresses))))) + (Branch. (int level) ^List keys ^List (seq addresses))))) "datahike.datom.Datom" (reify ReadHandler (read [_ reader _tag _component-count] diff --git a/src/datahike/index/utils.cljc b/src/datahike/index/utils.cljc index ce7f695d2..6d6c43fdb 100644 --- a/src/datahike/index/utils.cljc +++ b/src/datahike/index/utils.cljc @@ -4,12 +4,12 @@ #?(:clj (:import [datahike.datom Datom]))) (defn datom-to-vec [^Datom datom index-type start?] - (let [e (fn [datom] (when-not (or (and start? (= e0 (.-e datom))) - (and (not start?) (= emax (.-e datom)))) - (.-e datom))) - tx (fn [datom] (when-not (or (and start? (= tx0 (.-tx datom))) - (and (not start?) (= txmax (.-tx datom)))) - (.-tx datom))) + (let [e (fn [^Datom datom] (when-not (or (and start? (= e0 (.-e datom))) + (and (not start?) (= emax (.-e datom)))) + (.-e datom))) + tx (fn [^Datom datom] (when-not (or (and start? (= tx0 (.-tx datom))) + (and (not start?) (= txmax (.-tx datom)))) + (.-tx datom))) datom-seq (case index-type :aevt (list (.-a datom) (e datom) (.-v datom) (tx datom)) :avet (list (.-a datom) (.-v datom) (e datom) (tx datom)) diff --git a/src/datahike/store.cljc b/src/datahike/store.cljc index 9893387ad..216d0f6f9 100644 --- a/src/datahike/store.cljc +++ b/src/datahike/store.cljc @@ -95,7 +95,7 @@ ;; file -(defmethod empty-store :file [{:keys [path config]}] +(defmethod empty-store :file [{:keys [path]}] (fs/connect-fs-store path :opts {:sync? true})) (defmethod delete-store :file [{:keys [path]}] diff --git a/src/datahike/tools.cljc b/src/datahike/tools.cljc index 6e6e7f40a..be530706a 100644 --- a/src/datahike/tools.cljc +++ b/src/datahike/tools.cljc @@ -23,7 +23,7 @@ (defmacro case-tree [qs vs] (-case-tree qs vs))) -(defn get-time [] +(defn ^:dynamic get-time [] #?(:clj (java.util.Date.) :cljs (js/Date.))) diff --git a/src/datahike/transactor.cljc b/src/datahike/transactor.cljc index 62551cae7..159139805 100644 --- a/src/datahike/transactor.cljc +++ b/src/datahike/transactor.cljc @@ -1,7 +1,7 @@ (ns ^:no-doc datahike.transactor (:require [superv.async :refer [