diff --git a/src/datahike/array.cljc b/src/datahike/array.cljc index 061ab4f0..33d679dd 100644 --- a/src/datahike/array.cljc +++ b/src/datahike/array.cljc @@ -48,6 +48,25 @@ (n/-order-on-edn-types b)))) (raw-array-compare a b)))) +#?(:clj (defn string-from-bytes + "Represents a byte array as a string. Two byte arrays are said to be equal iff their corresponding values after applying this function are equal. That way, we rely on the equality and hash code implementations of the String class to compare byte arrays." + [x] + (let [n (alength x) + dst (char-array n)] + (dotimes [i n] + (aset dst i (char (aget x i)))) + (String. dst)))) + +(defrecord WrappedBytes [string-repr]) + +(defn wrap-comparable + "This functions is such that `(a= x y)` is equivalent to `(= (wrap-comparable x) (wrap-comparable y))`. This lets us also use these semantics in hash-sets or as keys in maps." + [x] + (if (bytes? x) + (WrappedBytes. #?(:clj (string-from-bytes x) + :cljs x)) + x)) + (defn a= "Extension of Clojure's equality to things we also want to treat like values, e.g. certain array types." diff --git a/src/datahike/db.cljc b/src/datahike/db.cljc index d033677a..50ff2067 100644 --- a/src/datahike/db.cljc +++ b/src/datahike/db.cljc @@ -204,6 +204,10 @@ (-> ((contextual-search-fn context) db pattern) (post-process-datoms db context))) +(defn contextual-batch-search [db pattern-mask batch-fn context] + (-> ((contextual-search-fn context) db pattern-mask batch-fn) + (post-process-datoms db context))) + (defn contextual-datoms [db index-type cs context] (-> (case (dbi/context-temporal? context) true (dbu/temporal-datoms db index-type cs) @@ -316,6 +320,8 @@ (-search-context [db] dbi/base-context) (-search [db pattern context] (contextual-search db pattern context)) + (-batch-search [db pattern-mask batch-fn context] + (contextual-batch-search db pattern-mask batch-fn context)) dbi/IIndexAccess (-datoms [db index-type cs context] @@ -395,6 +401,8 @@ (filter (.-pred db)))) (-search [db pattern context] (dbi/-search unfiltered-db pattern context)) + (-batch-search [db pattern-mask batch-fn context] + (dbi/-batch-search unfiltered-db pattern-mask batch-fn context)) dbi/IIndexAccess (-datoms [db index cs context] @@ -467,6 +475,8 @@ dbi/context-with-history)) (-search [db pattern context] (dbi/-search origin-db pattern context)) + (-batch-search [db pattern-mask batch-fn context] + (dbi/-batch-search origin-db pattern-mask batch-fn context)) dbi/IIndexAccess (-datoms [db index-type cs context] (dbi/-datoms origin-db index-type cs context)) @@ -530,6 +540,8 @@ (as-of-pred time-point))) (-search [db pattern context] (dbi/-search origin-db pattern context)) + (-batch-search [db pattern batch-fn context] + (dbi/-batch-search origin-db pattern batch-fn context)) dbi/IIndexAccess (-datoms [db index-type cs context] @@ -595,6 +607,8 @@ (since-pred time-point))) (-search [db pattern context] (dbi/-search origin-db pattern context)) + (-batch-search [db pattern batch-fn context] + (dbi/-batch-search origin-db pattern batch-fn context)) dbi/IIndexAccess (dbi/-datoms [db index-type cs context] diff --git a/src/datahike/db/interface.cljc b/src/datahike/db/interface.cljc index 00da20c6..f5eb5161 100644 --- a/src/datahike/db/interface.cljc +++ b/src/datahike/db/interface.cljc @@ -70,11 +70,19 @@ (defprotocol ISearch (-search-context [data]) - (-search [data pattern context])) + (-search [data pattern context]) + (-batch-search [data pattern-mask batch-fn context])) (defn search [data pattern] (-search data pattern (-search-context data))) +(defn batch-search [data pattern-mask batch-fn final-xform] + (-batch-search data + pattern-mask + batch-fn + (context-with-xform-after (-search-context data) + final-xform))) + (defprotocol IIndexAccess (-datoms [db index components context]) (-seek-datoms [db index components context]) diff --git a/src/datahike/db/search.cljc b/src/datahike/db/search.cljc index 849b15be..870c6da1 100644 --- a/src/datahike/db/search.cljc +++ b/src/datahike/db/search.cljc @@ -3,14 +3,12 @@ [clojure.core.cache.wrapped :as cw] [datahike.array :refer [a=]] [datahike.constants :refer [e0 tx0 emax txmax]] - [datahike.datom :refer [datom datom-tx datom-added]] + [datahike.datom :refer [datom datom-tx datom-added type-hint-datom]] [datahike.db.utils :as dbu] [datahike.index :as di] [datahike.lru :refer [lru-datom-cache-factory]] - [datahike.tools :refer [case-tree raise]] + [datahike.tools :refer [raise match-vector]] [environ.core :refer [env]]) - #?(:cljs (:require-macros [datahike.datom :refer [datom]] - [datahike.tools :refer [case-tree raise]])) #?(:clj (:import [datahike.datom Datom]))) (def db-caches (cw/lru-cache-factory {} :threshold (:datahike-max-db-caches env 5))) @@ -26,104 +24,225 @@ (defn validate-pattern "Checks if database pattern is valid" - [pattern] - (let [[e a v tx added?] pattern] + [pattern can-have-vars] + (let [bound-var? (if can-have-vars symbol? (fn [_] false)) + [e a v tx added?] pattern] (when-not (or (number? e) (nil? e) + (bound-var? e) (and (vector? e) (= 2 (count e)))) (raise "Bad format for entity-id in pattern, must be a number, nil or vector of two elements." {:error :search/pattern :e e :pattern pattern})) (when-not (or (number? a) (keyword? a) + (bound-var? a) (nil? a)) (raise "Bad format for attribute in pattern, must be a number, nil or a keyword." {:error :search/pattern :a a :pattern pattern})) (when-not (or (not (vector? v)) (nil? v) + (bound-var? v) (and (vector? v) (= 2 (count v)))) (raise "Bad format for value in pattern, must be a scalar, nil or a vector of two elements." {:error :search/pattern :v v :pattern pattern})) (when-not (or (nil? tx) + (bound-var? tx) (number? tx)) (raise "Bad format for transaction ID in pattern, must be a number or nil." {:error :search/pattern :tx tx :pattern pattern})) (when-not (or (nil? added?) - (boolean? added?)) + (boolean? added?) + (bound-var? added?)) (raise "Bad format for added? in pattern, must be a boolean value or nil." {:error :search/pattern :added? added? :pattern pattern})))) -(defn- search-indices - "Assumes correct pattern form, i.e. refs for ref-database" - [eavt aevt avet pattern indexed? temporal-db?] - (validate-pattern pattern) +(defn short-hand->strat-symbol [x] + (case x + 1 :substitute + f :filter + _ nil + :substitute :substitute + :filter :filter + nil nil)) + +(defn datom-expr [[esym asym vsym tsym] + [e-strat a-strat v-strat t-strat] + e-bound + tx-bound] + (let [subst (fn [expr strategy bound] + (case strategy + :substitute expr + bound))] + `(datom ~(subst esym e-strat e-bound) + ~(subst asym a-strat nil) + ~(subst vsym v-strat nil) + ~(subst tsym t-strat tx-bound)))) + +(defn lookup-strategy-sub [index-key eavt-symbols eavt-strats] + (let [[_ _ v-strat t-strat] eavt-strats + [_ _ v-sym t-sym] eavt-symbols + strat-set (set eavt-strats) + has-substitution (contains? strat-set :substitute) + index-expr (symbol index-key) + + lower-datom (datom-expr eavt-symbols eavt-strats 'e0 'tx0) + upper-datom (datom-expr eavt-symbols eavt-strats 'emax 'txmax) + + ;; Either get all datoms or a subset where some values in the + ;; datom are fixed. + lookup-expr (if has-substitution + `(di/-slice ~index-expr + ~lower-datom + ~upper-datom + ~index-key) + `(di/-all ~index-expr)) + + ;; Symbol type-hinted as Datom. + dexpr (type-hint-datom (gensym)) + + ;; Equalities used for filtering (in conjunction) + equalities (remove nil? [(when (= :filter v-strat) + `(a= ~v-sym (.-v ~dexpr))) + (when (= :filter t-strat) + `(= ~t-sym (datom-tx ~dexpr)))]) + added (gensym)] + `{:index-key ~index-key + :strategy-vec ~(vec eavt-strats) + :lookup-fn (fn [~index-expr ~eavt-symbols] + ~(if (seq equalities) + `(filter (fn [~dexpr] (and ~@equalities)) ~lookup-expr) + lookup-expr)) + :backend-fn (fn [~index-expr] + (fn [~@eavt-symbols ~added] + ~lookup-expr))})) + +(defmacro lookup-strategy [index-key & eavt-strats] + {:pre [(keyword? index-key)]} + (let [pattern-symbols '[e a v tx]] + (lookup-strategy-sub index-key + pattern-symbols + (map short-hand->strat-symbol eavt-strats)))) + +(defn- get-search-strategy-impl [e a v t i] + (match-vector [e a v t i] + [e a v t *] (lookup-strategy :eavt 1 1 1 1) + [e a v _ *] (lookup-strategy :eavt 1 1 1 _) + [e a _ t *] (lookup-strategy :eavt 1 1 _ f) + [e a _ _ *] (lookup-strategy :eavt 1 1 _ _) + [e _ v t *] (lookup-strategy :eavt 1 _ f f) + [e _ v _ *] (lookup-strategy :eavt 1 _ f _) + [e _ _ t *] (lookup-strategy :eavt 1 _ _ f) + [e _ _ _ *] (lookup-strategy :eavt 1 _ _ _) + [_ a v t i] (lookup-strategy :avet _ 1 1 f) + [_ a v t _] (lookup-strategy :aevt _ 1 f f) + [_ a v _ i] (lookup-strategy :avet _ 1 1 _) + [_ a v _ _] (lookup-strategy :aevt _ 1 f _) + [_ a _ t *] (lookup-strategy :aevt _ 1 _ f) + [_ a _ _ *] (lookup-strategy :aevt _ 1 _ _) + [_ _ v t *] (lookup-strategy :eavt _ _ f f) + [_ _ v _ *] (lookup-strategy :eavt _ _ f _) + [_ _ _ t *] (lookup-strategy :eavt _ _ _ f) + [_ _ _ _ *] (lookup-strategy :eavt _ _ _ _))) + +(defn empty-lookup-fn + ([_db-index [_e _a _v _tx]] + []) + ([_db-index [_e _a _v _tx] _batch-fn] + [])) + +(defn- get-search-strategy [pattern indexed? temporal-db?] + (validate-pattern pattern true) (let [[e a v tx added?] pattern] - (if (and (not temporal-db?) (false? added?)) - '() - (case-tree [e a (some? v) tx] - [(di/-slice eavt (datom e a v tx) (datom e a v tx) :eavt) ;; e a v tx - (di/-slice eavt (datom e a v tx0) (datom e a v txmax) :eavt) ;; e a v _ - (->> (di/-slice eavt (datom e a nil tx0) (datom e a nil txmax) :eavt) ;; e a _ tx - (filter (fn [^Datom d] (= tx (datom-tx d))))) - (di/-slice eavt (datom e a nil tx0) (datom e a nil txmax) :eavt) ;; e a _ _ - (->> (di/-slice eavt (datom e nil nil tx0) (datom e nil nil txmax) :eavt) ;; e _ v tx - (filter (fn [^Datom d] (and (a= v (.-v d)) - (= tx (datom-tx d)))))) - (->> (di/-slice eavt (datom e nil nil tx0) (datom e nil nil txmax) :eavt) ;; e _ v _ - (filter (fn [^Datom d] (a= v (.-v d))))) - (->> (di/-slice eavt (datom e nil nil tx0) (datom e nil nil txmax) :eavt) ;; e _ _ tx - (filter (fn [^Datom d] (= tx (datom-tx d))))) - (di/-slice eavt (datom e nil nil tx0) (datom e nil nil txmax) :eavt) ;; e _ _ _ - (if indexed? ;; _ a v tx - (->> (di/-slice avet (datom e0 a v tx0) (datom emax a v txmax) :avet) - (filter (fn [^Datom d] (= tx (datom-tx d))))) - (->> (di/-slice aevt (datom e0 a nil tx0) (datom emax a nil txmax) :aevt) - (filter (fn [^Datom d] (and (a= v (.-v d)) - (= tx (datom-tx d))))))) - (if indexed? ;; _ a v _ - (di/-slice avet (datom e0 a v tx0) (datom emax a v txmax) :avet) - (->> (di/-slice aevt (datom e0 a nil tx0) (datom emax a nil txmax) :aevt) - (filter (fn [^Datom d] (a= v (.-v d)))))) - (->> (di/-slice aevt (datom e0 a nil tx0) (datom emax a nil txmax) :aevt) ;; _ a _ tx - (filter (fn [^Datom d] (= tx (datom-tx d))))) - (di/-slice aevt (datom e0 a nil tx0) (datom emax a nil txmax) :aevt) ;; _ a _ _ - (filter (fn [^Datom d] (and (a= v (.-v d)) (= tx (datom-tx d)))) (di/-all eavt)) ;; _ _ v tx - (filter (fn [^Datom d] (a= v (.-v d))) (di/-all eavt)) ;; _ _ v _ - (filter (fn [^Datom d] (= tx (datom-tx d))) (di/-all eavt)) ;; _ _ _ tx - (di/-all eavt)])))) - -(defn search-current-indices [db pattern] - (memoize-for db [:search pattern] - #(let [[_ a _ _] pattern] - (search-indices (:eavt db) - (:aevt db) - (:avet db) - pattern - (dbu/indexing? db a) - false)))) - -(defn search-temporal-indices [db pattern] - (memoize-for db [:temporal-search pattern] - #(let [[_ a _ _ added] pattern - result (search-indices (:temporal-eavt db) - (:temporal-aevt db) - (:temporal-avet db) - pattern + (when-not (and (not temporal-db?) (false? added?)) + (get-search-strategy-impl + (boolean e) + (boolean a) + (some? v) + (boolean tx) + (boolean indexed?))))) + +(defn current-search-strategy [db pattern] + (let [[_ a _ _] pattern] + (when-let [strategy (get-search-strategy pattern (dbu/indexing? db a) - true)] - (case added - true (filter datom-added result) - false (remove datom-added result) - nil result)))) + false)] + (update strategy :index-key #(get db %))))) + +(defn temporal-search-strategy [db pattern] + (let [[_ a _ _ _] pattern] + (when-let [strategy (get-search-strategy + pattern + (dbu/indexing? db a) + true)] + (update strategy :index-key #(case % + :eavt (:temporal-eavt db) + :aevt (:temporal-aevt db) + :avet (:temporal-avet db) + nil))))) + +(defn search-current-indices + ([db pattern] + (memoize-for + db [:search pattern] + #(if-let [{:keys [index-key lookup-fn]} (current-search-strategy db pattern)] + (lookup-fn index-key pattern) + []))) + + ;; For batches + ([db pattern batch-fn] + (if-let [{:keys [index-key strategy-vec backend-fn]} + (current-search-strategy db pattern)] + (batch-fn strategy-vec (backend-fn index-key) identity) + []))) -(defn temporal-search [db pattern] - (dbu/distinct-datoms db - (search-current-indices db pattern) - (search-temporal-indices db pattern))) +(defn added? [[_ _ _ _ added]] + added) + +(defn filter-by-added + ([pattern] + (case (added? pattern) + true (filter datom-added) + false (remove datom-added) + identity)) + ([pattern result] + (case (added? pattern) + true (filter datom-added result) + false (remove datom-added result) + result))) + +(defn search-temporal-indices + ([db pattern] + (validate-pattern pattern false) + (memoize-for db [:temporal-search pattern] + #(if-let [{:keys [index-key lookup-fn]} + (temporal-search-strategy db pattern)] + (let [result (lookup-fn index-key pattern)] + (filter-by-added pattern result)) + []))) + ([db pattern batch-fn] + (validate-pattern pattern true) + (if-let [{:keys [index-key strategy-vec backend-fn]} + (temporal-search-strategy db pattern)] + (batch-fn strategy-vec (backend-fn index-key) + (filter-by-added pattern)) + []))) + +(defn temporal-search + ([db pattern] + (validate-pattern pattern false) + (dbu/distinct-datoms db + (search-current-indices db pattern) + (search-temporal-indices db pattern))) + ([db pattern batch-fn] + (validate-pattern pattern true) + (dbu/distinct-datoms db + (search-current-indices db pattern batch-fn) + (search-temporal-indices db pattern batch-fn)))) (defn temporal-seek-datoms [db index-type cs] (let [index (get db index-type) @@ -155,3 +274,5 @@ (dbu/distinct-datoms db (di/-slice (:avet db) from to :avet) (di/-slice (:temporal-avet db) from to :avet)))) + + diff --git a/src/datahike/query.cljc b/src/datahike/query.cljc index 74344f14..73a3527c 100644 --- a/src/datahike/query.cljc +++ b/src/datahike/query.cljc @@ -6,13 +6,14 @@ [clojure.walk :as walk] [datahike.db.interface :as dbi] [datahike.db.utils :as dbu] + [datahike.array :refer [wrap-comparable]] [datahike.impl.entity :as de] [datahike.lru] [datahike.middleware.query] [datahike.pull-api :as dpa] [datahike.query-stats :as dqs] - [datahike.tools :as dt] [datahike.middleware.utils :as middleware-utils] + [datahike.tools :as dt] [datalog.parser :refer [parse]] [datalog.parser.impl :as dpi] [datalog.parser.impl.proto :as dpip] @@ -29,7 +30,9 @@ FindColl FindRel FindScalar FindTuple PlainSymbol Pull RulesVar SrcVar Variable] [java.lang.reflect Method] - [java.util Date Map]))) + [java.util Date Map HashSet HashSet]))) + +(set! *warn-on-reflection* true) ;; ---------------------------------------------------------------------------- @@ -90,14 +93,11 @@ (defn distinct-tuples "Remove duplicates just like `distinct` but with the difference that it only works on values on which `vec` can be applied and two different objects are considered equal if and only if their results after `vec` has been applied are equal. This means that two different Java arrays are considered equal if and only if their elements are equal." - [tuples] - (first (reduce (fn [[dst seen] tuple] - (let [key (vec tuple)] - (if (seen key) - [dst seen] - [(conj dst tuple) (conj seen key)]))) - [[] #{}] - tuples))) + ([tuples] + (into [] (distinct-tuples) tuples)) + ([] + (let [step ((distinct) (fn [_ _] true))] + (filter #(step false (vec %)))))) (defn seqable? #?@(:clj [^Boolean [x]] @@ -141,7 +141,7 @@ (= (count form) (count pattern)) (every? (fn [[pattern-el form-el]] (looks-like? pattern-el form-el)) (map vector pattern form)))) - :else ;; (predicate? pattern) + :else ;; (predicate? pattern) (pattern form))) (defn source? [sym] @@ -156,7 +156,10 @@ (or (keyword? form) (string? form))) (defn lookup-ref? [form] - (looks-like? [attr? '_] form)) + ;; Using looks-like? here is quite inefficient. + (and (vector? form) + (= 2 (count form)) + (attr? (first form)))) (defn entid? [x] ;; See `dbu/entid for all forms that are accepted (or (attr? x) @@ -530,6 +533,7 @@ :clj (to-array (map #(% tuple) getters)))))))) (defn hash-attrs [key-fn tuples] + ;; Equivalent to group-by except that it uses a list instead of a vector. (loop [tuples tuples hash-table (transient {})] (if-some [tuple (first tuples)] @@ -597,32 +601,47 @@ (filter (fn [[s _]] (free-var? s))) (into {}))) -(defn lookup-pattern-db [context db pattern orig-pattern] - ;; TODO optimize with bound attrs min/max values here - (let [attr->prop (var-mapping orig-pattern ["e" "a" "v" "tx" "added"]) +(defn map-consts [context orig-pattern datoms] + (let [;; Create a map from free var to index + ;; for the positions in the pattern attr->idx (var-mapping orig-pattern (range)) - search-pattern (mapv #(if (symbol? %) nil %) pattern) - datoms (if (first search-pattern) - (if-let [eid (dbu/entid db (first search-pattern))] - (dbi/search db (assoc search-pattern 0 eid)) - []) - (dbi/search db search-pattern)) idx->const (reduce-kv (fn [m k v] (if-let [c (k (:consts context))] (if (= c (get (first datoms) v)) ;; All datoms have the same format and the same value at position v - m ;; -> avoid unnecessary translations + m ;; -> avoid unnecessary translations (assoc m v c)) m)) {} attr->idx)] - (if (empty? idx->const) - (Relation. attr->prop datoms) + (when (seq idx->const) (Relation. attr->idx (map #(reduce (fn [datom [k v]] (assoc datom k v)) (vec (seq %)) idx->const) datoms))))) +(defn replace-symbols-by-nil [pattern] + (mapv #(if (symbol? %) nil %) pattern)) + +(defn resolve-pattern-eid [db search-pattern] + (let [first-p (first search-pattern)] + (if (and (some? first-p) + (not (symbol? first-p))) + (when-let [eid (dbu/entid db first-p)] + (assoc search-pattern 0 eid)) + search-pattern))) + +(defn relation-from-datoms-xform [] + (comp (map (fn [[e a v tx added?]] + [e a v tx added?])) + (distinct-tuples))) + +(defn relation-from-datoms [context orig-pattern datoms] + (or (map-consts context orig-pattern datoms) + (Relation. (var-mapping orig-pattern + (range)) + datoms))) + (defn matches-pattern? [pattern tuple] (loop [tuple tuple pattern pattern] @@ -639,13 +658,6 @@ data (filter #(matches-pattern? pattern %) coll)] (Relation. attr->idx (mapv to-array data)))) ;; FIXME to-array -(defn lookup-pattern [context source pattern orig-pattern] - (cond - (dbu/db? source) - (lookup-pattern-db context source pattern orig-pattern) - :else - (lookup-pattern-coll source pattern orig-pattern))) - (defn collapse-rels [rels new-rel] (loop [rels rels new-rel new-rel @@ -946,8 +958,10 @@ (defn resolve-pattern-lookup-entity-id [source e error-code] (cond + (dbu/numeric-entid? e) e (or (lookup-ref? e) (attr? e)) (dbu/entid-strict source e error-code) - (entid? e) e + ;(entid? e) e + (keyword? e) e (symbol? e) e :else (or error-code (dt/raise "Invalid entid" {:error :entity-id/syntax :entity-id e})))) @@ -970,38 +984,43 @@ added added) pattern))) +(defn good-lookup-refs? [pattern] + (if (coll? pattern) + (not-any? #(= % ::error) pattern) + (not= ::error pattern))) + (defn resolve-pattern-lookup-refs-or-nil "This function works just like `resolve-pattern-lookup-refs` but if there is an error it returns `nil` instead of throwing an exception. This is used to reject patterns with variables substituted for invalid values. -For instance, take the query + For instance, take the query -(d/q '[:find ?e + (d/q '[:find ?e :in $ [?e ...] :where [?e :friend 3]] db [1 2 3 \"A\"]) -in the test `datahike.test.lookup-refs-test/test-lookup-refs-query`. + in the test `datahike.test.lookup-refs-test/test-lookup-refs-query`. -According to this query, the variable `?e` can be either `1`, `2`, `3` or `\"A\"` -but \"A\" is not a valid entity. + According to this query, the variable `?e` can be either `1`, `2`, `3` or `\"A\"` + but \"A\" is not a valid entity. -The query engine will evaluate the pattern `[?e :friend 3]`. For the strategies -`identity` and `select-simple`, no substitution will be performed in this pattern. -Instead, they will ask for all tuples from the database and then filter them, so -the fact that `?e` can be bound to an impossible entity id `\"A\"` is not a problem. + The query engine will evaluate the pattern `[?e :friend 3]`. For the strategies + `identity` and `select-simple`, no substitution will be performed in this pattern. + Instead, they will ask for all tuples from the database and then filter them, so + the fact that `?e` can be bound to an impossible entity id `\"A\"` is not a problem. -But with the strategy `select-all`, the substituted pattern will become + But with the strategy `select-all`, the substituted pattern will become -[\"A\" :friend 3] + [\"A\" :friend 3] -and consequently, the `result` below will take the value `[::error :friend 3]`. -The unit test is currently written to simply ignore illegal illegal entity ids -such as \"A\" and therefore, we handle that by letting this function return nil -in those cases. -" + and consequently, the `result` below will take the value `[::error :friend 3]`. + The unit test is currently written to simply ignore illegal illegal entity ids + such as \"A\" and therefore, we handle that by letting this function return nil + in those cases. + " [source pattern] (let [result (resolve-pattern-lookup-refs source pattern ::error)] - (when (not-any? #(= % ::error) result) + (when (good-lookup-refs? result) result))) (defn dynamic-lookup-attrs [source pattern] @@ -1046,177 +1065,601 @@ in those cases. (defn tuple-var-mapper [rel] (let [attrs (:attrs rel) - key-fn-pairs (into [] (map (juxt identity (partial getter-fn attrs))) (keys attrs))] + key-fn-pairs (into [] + (map (juxt identity (partial getter-fn attrs))) + (keys attrs))] (fn [tuple] (into {} (map (fn [[k f]] [k (f tuple)])) key-fn-pairs)))) -(defn resolve-pattern-vars-for-relation [source pattern rel] - (let [mapper (tuple-var-mapper rel)] - (keep #(resolve-pattern-lookup-refs-or-nil - source - (replace (mapper %) pattern)) - (:tuples rel)))) - (def rel-product-unit (Relation. {} [[]])) -(defn rel-data-key [rel-data] - {:pre [(contains? rel-data :vars)]} - (:vars rel-data)) - -(defn expansion-rel-data - "Given all the relations `rels` from a context and the `vars` found in a pattern, -return a sequence of maps where each map has a relation and the subset of `vars` -mentioned in that relation. Relations that don't mention any vars are omitted." - [rels vars] - (for [{:keys [attrs tuples] :as rel} rels - :let [mentioned-vars (filter attrs vars)] - :when (seq mentioned-vars)] - {:rel rel - :vars mentioned-vars - :tuple-count (count tuples)})) - -;; A *relprod* is a state in the process of -;; selecting what relations to use when expanding -;; the pattern. The word "relprod" is an abbreviation for -;; "relation product". -;; -;; A *strategy* is a function that takes a relprod -;; as input and returns a new relprod as output. -;; The simplest strategy is `identity` meaning that -;; no pattern expansion will happen. -(defn init-relprod [rel-data vars] - {:product rel-product-unit - :include [] - :exclude rel-data - :vars vars}) - -(defn relprod? [x] - (and (map? x) (contains? x :product))) - -(defn relprod-exclude-keys [{:keys [exclude]}] - (map rel-data-key exclude)) - -(defn relprod-vars [relprod & ks] - {:pre [(relprod? relprod)]} +(defn bound-symbol-map + "Given a sequential collection of relations, return a map where every key is a symbol of a variable and every value is a map with the keys `:relation-index` and `:tuple-element-index`. The key `:relation-index` is associated with the index of the relation where the variable occurs and the key `:tuple-element-index` is associated with the index of the location in the clause where the symbol occurs." + [rels] + (into {} (for [[rel-index rel] (map-indexed vector rels) + [sym tup-index] (:attrs rel)] + [sym {:relation-index rel-index + :tuple-element-index tup-index}]))) + +(defn normalize-pattern + "Takes a pattern and returns a new pattern with exactly five elements, filling in any missing ones with nil." + [[e a v tx added?]] + [e a v tx added?]) + +(defn replace-unbound-symbols-by-nil [bsm pattern] + (normalize-pattern + (mapv #(when-not (and (symbol? %) (not (contains? bsm %))) + %) + pattern))) + +(defn search-index-mapping + "Returns a sequence of maps with index-information for a subset of e, a, v, tx. The `strategy-vec` argument is a vector of four elements corresponding to e, a, v, tx respectively. Every such element can be either `:substitute`, `:filter` or `nil` depending on how the corresponding element in the pattern should be used. The `clean-pattern` is argument is a vector with the elements corresponding to e, a, v, tx. The argument `selected-strategy-symbol` can be either `:substitute`, `:filter` or `nil` and is used to filter e, a, v, tx based on the value of `:strategy-vec`." + [{:keys [strategy-vec clean-pattern bsm]} + selected-strategy-symbol] + {:pre [(= 4 (count strategy-vec))]} + (let [pattern (normalize-pattern clean-pattern)] + (for [[pattern-element-index + pattern-var + strategy-symbol] (map vector (range) pattern strategy-vec) + :when (= selected-strategy-symbol strategy-symbol) + :let [m (bsm pattern-var)] + :when m] + (assoc m :pattern-element-index pattern-element-index)))) + +(defn substitution-relation-indices + "Returns the set of indices of relations that have symbols that are substituted for actual values in the pattern before index lookup." + [context] (into #{} - (comp (mapcat #(get relprod %)) - (mapcat :vars)) - ks)) - -(defn relprod-filter [{:keys [product include exclude vars]} predf] - {:pre [product include exclude]} - (let [picked (into [] (filter predf) exclude)] - {:product (reduce ((map :rel) hash-join) product picked) - :include (into include picked) - :exclude (remove predf exclude) - :vars vars})) - -(defn relprod-select-keys [relprod key-set] - {:pre [(relprod? relprod) - (set? key-set) - (every? (set (relprod-exclude-keys relprod)) key-set)]} - (relprod-filter relprod (comp key-set rel-data-key))) - -(defn select-all - "This is a relprod strategy that will result in all -possible combinations of relations substituted in the pattern. - It may be faster or slower than no strategy at all depending -on the data. - -This might be the strategy used by Datomic, -which can be seen if we request 'Query stats' from Datomic: - -https://docs.datomic.com/pro/api/query-stats.html" - [relprod] - {:pre [(relprod? relprod)]} - (relprod-filter relprod (constantly true))) - -(defn select-simple - "This is a relprod strategy that will perform at least as -well as no strategy at all because it will result in at most -one expanded pattern (or none) that is possibly more specific." - [relprod] - {:pre [(relprod? relprod)]} - (relprod-filter relprod #(<= (:tuple-count %) 1))) - -(defn expand-once - "This strategy first performs `relprod-select-simple` and -then does one more expansion with the smallest `:tuple-count`. Just -like `relprod-select-all`, it is not necessarily always faster -than doing no expansion at all." - [relprod] - {:pre [(relprod? relprod)]} - (let [relprod (select-simple relprod) - [r & _] (sort-by :tuple-count (:exclude relprod))] - (if r - (relprod-select-keys relprod #{(rel-data-key r)}) - relprod))) - -(defn expand-constrained-patterns [source context pattern] - (let [vars (collect-vars pattern) - rel-data (expansion-rel-data (:rels context) vars) - strategy (-> context :settings :relprod-strategy) - product (-> (init-relprod rel-data vars) - strategy - :product)] - (resolve-pattern-vars-for-relation source pattern product))) - -(defn lookup-and-sum-pattern-rels [context source patterns clause collect-stats] - (loop [rel (Relation. (var-mapping clause (range)) []) - patterns patterns - lookup-stats []] - (if (empty? patterns) - {:relation (simplify-rel rel) - :lookup-stats lookup-stats} - (let [pattern (first patterns) - added (lookup-pattern context source pattern clause)] - (recur (sum-rel rel added) - (rest patterns) - (when collect-stats - (conj lookup-stats {:pattern pattern :tuple-count (count (:tuples added))}))))))) - -(defn lookup-patterns [context - clause - pattern-before-expansion - patterns-after-expansion] - (let [source *implicit-source* - {:keys [relation lookup-stats]} - (lookup-and-sum-pattern-rels context - source - patterns-after-expansion - clause - (:stats context))] + (map :relation-index) + (search-index-mapping context :substitute))) + +(defn filtering-relation-indices + "Returns the set of indices of relations that have symbols that will be used for filtering the datoms returned from the inex lookup." + [context subst-inds] + (into #{} + (comp (map :relation-index) + (remove subst-inds)) + (search-index-mapping context :filter))) + +(defn index-feature-extractor + "Given a set of indices referring to elements in a sequential container such as a datom or vector, construct a function that returns a value computed from such a sequential container such that two different values returned from that function are equal if and only if their corresponding values at those indices are equal. Optionally takes a function that can remap the selected elements." + ([inds include-empty?] + (index-feature-extractor inds include-empty? (fn [_ x] x))) + ([inds include-empty? replacer] + (let [first-index (first inds)] + (case (count inds) + 0 (when include-empty? + (fn + ([] [nil]) + ([_] nil))) + 1 (fn + ([] [first-index]) + ([x] (wrap-comparable (replacer first-index (nth x first-index))))) + (fn + ([] inds) + ([x] + (mapv #(wrap-comparable (replacer % (nth x %))) inds))))))) + +(defn extend-predicate1 [predicate feature-extractor ref-feature] + (if (nil? feature-extractor) + predicate + (if predicate + (fn [datom] + (let [feature (feature-extractor datom)] + (if (= ref-feature feature) + (predicate datom) + false))) + (fn [datom] + (= ref-feature (feature-extractor datom)))))) + +(defn predicate-from-set [s] + (case (count s) + 0 (fn [_] false) + 1 (let [y (first s)] + (fn [x] (= x y))) + (fn [x] (contains? s x)))) + +(defn extend-predicate [predicate feature-extractor features] + {:pre [(or (set? features) + (instance? HashSet features))]} + (let [this-pred (predicate-from-set features)] + (if (nil? feature-extractor) + predicate + (if predicate + (fn + ([] (conj (predicate) [(feature-extractor) features])) + ([datom] + (let [feature (feature-extractor datom)] + (if (this-pred feature) + (predicate datom) + false)))) + (fn + ([] [(feature-extractor) features]) + ([datom] + (this-pred (feature-extractor datom)))))))) + +(defn resolve-pattern-lookup-ref-at-index + [source clean-attribute pattern-index pattern-value error-code] + (let [a clean-attribute] + (case (int pattern-index) + 0 (resolve-pattern-lookup-entity-id source pattern-value error-code) + 1 (if (and (:attribute-refs? (dbi/-config source)) (keyword? pattern-value)) + (dbi/-ref-for source pattern-value) + pattern-value) + 2 (if (and pattern-value + (attr? a) + (dbu/ref? source a) + (or (lookup-ref? pattern-value) (attr? pattern-value))) + (dbu/entid-strict source pattern-value error-code) + pattern-value) + 3 (if (lookup-ref? pattern-value) + (dbu/entid-strict source pattern-value error-code) + pattern-value) + 4 pattern-value))) + +(defn lookup-ref-replacer + ([context] (lookup-ref-replacer context ::error)) + ([{:keys [source clean-pattern]} error-value] + (let [[_ attribute _ _] clean-pattern] + (if source + (if (dbu/db? source) + (fn [index pattern-value] + (resolve-pattern-lookup-ref-at-index source + attribute + index + pattern-value + error-value)) + (fn [_i x] x)) + (fn [_ x] x))))) + +(defn- generate-substitution-xform-code [pred-expr + datom-predicate-symbol + filter-feature-symbol + pmask + substituted-pattern-and-filter-feature-pairs] + (let [pattern-symbols (repeatedly 5 gensym) + substitution-value-vector (gensym "substitution-value-vector")] + `(fn [step#] + (fn + ([] (step#)) + ([dst-one#] (step# dst-one#)) + + ;; This is a higher-arity step function. + ([dst# ~@pattern-symbols ~datom-predicate-symbol] + + ;; This generates the code that substitutes some of the + ;; incomping values by values from the relation and calls + ;; the next step function in the transducer chain. + (reduce + (fn [dst-inner# [~substitution-value-vector ~filter-feature-symbol]] + (step# dst-inner# + ~@(map (fn [i sym] + (if (nil? i) + sym + `(nth ~substitution-value-vector ~i))) + pmask + pattern-symbols) + ~pred-expr)) + dst# + ~substituted-pattern-and-filter-feature-pairs)))))) + +(defmacro substitution-expansion [substitution-pattern-element-inds + filter-feature-extractor + substituted-pattern-and-filter-feature-pairs] + (let [datom-predicate-symbol (gensym) + filter-feature-symbol (gensym)] + + ;; This code generates a tree of `if`-forms for all ordered subsets of + ;; the sequence `(range 5)` that `substitution-pattern-element-inds`. + ;; can take. At each leaf of the tree, code is generated for that particular + ;; subset. + (dt/range-subset-tree + 5 + substitution-pattern-element-inds + + ;; This function is called at each leaf of the tree. + ;; `pmast` is a boolean sequence + (fn [_pinds pmask] + + ;; `branch-expr` is a function that generates the actual + ;; code given a predicate expression. + (let [branch-expr (fn [pred-expr] + + ;; This is the code for the transducer. + (generate-substitution-xform-code + pred-expr + datom-predicate-symbol + filter-feature-symbol + pmask + substituted-pattern-and-filter-feature-pairs))] + + ;; Generate different code depending on whether or not there is a + ;; `filt-extractor`, meaning that the resulting datoms have to be + ;; filtered. + `(if (nil? ~filter-feature-extractor) + ~(branch-expr datom-predicate-symbol) + ~(branch-expr `(extend-predicate1 ~datom-predicate-symbol + ~filter-feature-extractor + ~filter-feature-symbol)))))))) + +#_(instantiate-substitution-xform substitution-pattern-element-inds + filter-feature-extractor + substituted-pattern-and-filter-feature-pairs) + +(defn instantiate-substitution-xform [substitution-pattern-element-inds + filter-feature-extractor + substituted-pattern-and-filter-feature-pairs] + + ;; Returns a transducer based on the indices in `substitution-pattern-element-inds` + (substitution-expansion substitution-pattern-element-inds + filter-feature-extractor + substituted-pattern-and-filter-feature-pairs)) + +;; The performance improvement of using this macro has been measured, +;; see comment in single-substition-xform. +(defmacro make-vec-lookup-ref-replacer [range-length] + (let [inds (gensym) + replacer (gensym) + tuple (gensym)] + `(fn tree-fn# [~replacer ~inds] + ~(dt/range-subset-tree + range-length inds + (fn replacer-fn# [pinds _mask] + `(fn [~tuple] + (try + ~(mapv (fn [index i] `(~replacer ~index (nth ~tuple ~i))) + pinds + (range)) + (catch Exception e# nil)))))))) + +(def vec-lookup-ref-replacer (make-vec-lookup-ref-replacer 5)) + +;; The performance improvement of using this macro has been measured, +;; see comment in single-substition-xform. +(defmacro basic-index-selector [max-length] + (let [inds (gensym) + + obj (gensym)] + `(fn [~inds] + (case (count ~inds) + ~@(mapcat (fn [length] + (let [index-symbols (vec (repeatedly length gensym))] + [length `(let [~index-symbols ~inds] + (fn [~obj] + ~(mapv + (fn [sym] `(nth ~obj ~sym)) + index-symbols)))])) + (range (inc max-length))))))) + +(def make-basic-index-selector (basic-index-selector 5)) + +(defn single-substitution-xform + "Returns a transducer that substitutes the symbols for a single relation." + [search-context + relation-index + substituted-vars-per-relation + filtered-vars-per-relation] + (let [;; This function maps the value at a pattern at a certain index to + ;; a new value where the lookup-ref has been replaced. If there is an + ;; error, it returns the `::error` value. + lrr (lookup-ref-replacer search-context) + + tuples (:tuples (nth (:rels search-context) relation-index)) + substituted-vars (substituted-vars-per-relation relation-index) + filtered-vars (filtered-vars-per-relation relation-index) + pattern-substitution-inds (map :tuple-element-index substituted-vars) + pattern-filter-inds (map :tuple-element-index filtered-vars) + + ;; This function returns a unique feature for the values at + ;; `pattern-filter-inds` given a pattern. + feature-extractor (index-feature-extractor pattern-filter-inds + true + lrr) + + ;; These are the indices of the locations in the pattern that will be substituted + ;; with values from the tuples in this relation. + substitution-pattern-element-inds (map :pattern-element-index substituted-vars) + + ;; This function maps the value at a pattern at a certain index to + ;; a new value where the lookup-ref has been replaced. If there is an error, + ;; an exception is thrown. + lrr-ex (lookup-ref-replacer search-context nil) + + ;; This constructs a new pattern given a tuple of values that will be inserted + ;; at the `substitution-pattern-element-inds`. + ;; + ;; Precomputing this function moves some work out of the loop + ;; and contributes to about 1½ seconds reduction in + ;; https://gitlab.com/arbetsformedlingen/taxonomy-dev/backend/experimental/datahike-benchmark/ + pattern-from-tuple (vec-lookup-ref-replacer lrr-ex substitution-pattern-element-inds) + + ;; This is a function that simply picks out a subset of the elements from a sequential + ;; collection, at the indices `pattern-subsitution-inds`. + ;; + ;; Precomputing this function moves some work out of the loop + ;; and contributes to about 2 seconds reduction in + ;; https://gitlab.com/arbetsformedlingen/taxonomy-dev/backend/experimental/datahike-benchmark/ + select-pattern-substitution-inds (make-basic-index-selector pattern-substitution-inds) + + ;; This is a list of pairs such that: + ;; + ;; * The first element is a pattern where variables for this relation have been substituted. + ;; * The second element is a feature used for filtering the datoms after querying the backend + ;; + ;; Using a transducer here (with a transient vector under the hood) + ;; is about ½ second faster than a doseq-loop that accumulates to + ;; an ArrayList in the benchmark + ;; https://gitlab.com/arbetsformedlingen/taxonomy-dev/backend/experimental/datahike-benchmark/ + ;; In other words, there is no use writing imperative code here + ;; with Java mutable collections. + substituted-pattern-and-filter-feature-pairs + (into [] + (keep + (fn [tuple] + (let [feature (feature-extractor tuple)] + (when (good-lookup-refs? feature) + (when-let [k (-> tuple + select-pattern-substitution-inds + pattern-from-tuple)] + [k feature]))))) + tuples) + + filter-feature-extractor (index-feature-extractor + (map :pattern-element-index filtered-vars) + false + lrr)] + + ;; This expression will produce an `xform` that performs the substitutions for + ;; this relation. + (instantiate-substitution-xform substitution-pattern-element-inds + filter-feature-extractor + substituted-pattern-and-filter-feature-pairs))) + +(defn search-context? [x] + (assert (map? x)) + (let [{:keys [bsm clean-pattern rels strategy-vec]} x] + (assert bsm) + (assert clean-pattern) + (assert rels) + (assert strategy-vec)) + true) + +(defn compute-per-rel-map [search-context rel-inds strat-symbol] + {:pre [(search-context? search-context)]} + (->> strat-symbol + (search-index-mapping search-context) + (filter (comp rel-inds :relation-index)) + (group-by :relation-index))) + +(defn clean-pattern-before-substitution [pattern subst-map] + (let [subst-pattern-positions (into #{} + (comp cat (map :pattern-element-index)) + (vals subst-map))] + (into [] + (map-indexed (fn [i x] + (cond + (subst-pattern-positions i) x + (symbol? x) nil + :else x))) + pattern))) + +(defn initialization-and-substitution-xform + "Returns a transducer that performs all subsitutions possible given the relations with indices `rel-inds`." + [search-context substituted-relation-inds] + {:pre [(map? search-context) + (set? substituted-relation-inds)]} + (let [;; We refer to relations by their index in the vector in the context. + substituted-vars-per-relation (compute-per-rel-map search-context + substituted-relation-inds + :substitute) + + filtered-vars-per-relation (compute-per-rel-map search-context + substituted-relation-inds + :filter) + + all-substitutions-xform (apply comp + (map (fn [relation-index] + (single-substitution-xform + search-context + relation-index + substituted-vars-per-relation + filtered-vars-per-relation)) + substituted-relation-inds)) + init-coll [[;; This is the initial pattern + (clean-pattern-before-substitution + (:clean-pattern search-context) + substituted-vars-per-relation) + + ;; This is the initial predicate (nil because there is no predicate) + nil]]] + [init-coll all-substitutions-xform])) + +(defn datom-filter-predicate [filtered-relation-inds search-context] + (let [filtered-vars-per-relation (compute-per-rel-map search-context filtered-relation-inds :filter) + rels (:rels search-context)] + (reduce (fn [predicate [relation-index filtered-vars]] + (let [tuples (:tuples (nth rels relation-index)) + pos-inds (map :pattern-element-index filtered-vars) + tup-inds (map :tuple-element-index filtered-vars) + tuple-feature-extractor (index-feature-extractor tup-inds true) + features (into #{} + (map tuple-feature-extractor) + tuples) + datom-feature-extractor + (index-feature-extractor pos-inds false)] + (extend-predicate predicate + datom-feature-extractor + features))) + nil + filtered-vars-per-relation))) + +(defn filter-from-predicate [pred] + (if pred + (filter pred) + identity)) + +(defn backend-xform [backend-fn] + (fn [step] + (fn + ([] (step)) + ([dst] (step dst)) + ([dst e a v tx added? datom-predicate] + (let [inner-step (if datom-predicate + (fn [dst datom] + (if (datom-predicate datom) + (step dst datom) + dst)) + step) + datoms (try + (backend-fn e a v tx added?) + (catch Exception e + (throw e)))] + (reduce inner-step + dst + datoms)))))) + +(defn extend-predicate-for-pattern-constants + [predicate {:keys [strategy-vec clean-pattern] :as search-context}] + (let [inds (for [[i strategy pattern-value] (mapv vector (range) + strategy-vec + clean-pattern) + :when (= :filter strategy) + :when (and (some? pattern-value) + (not (symbol? pattern-value)))] + i) + extractor (index-feature-extractor + inds + false + (lookup-ref-replacer search-context))] + (if extractor + (extend-predicate predicate extractor #{(extractor clean-pattern)}) + predicate))) + +(defn unpack6 [step] + (fn + ([] (step)) + ([dst] (step dst)) + ([dst [[e a v tx added?] filt]] + (step dst e a v tx added? filt)))) + +(defn search-batch-fn + "This function constructs a \"strategy function\" that gets called by `dbi/-batch-search.`" + [search-context] + (fn [strategy-vec backend-fn datom-xform] + (let [search-context (merge search-context {:strategy-vec strategy-vec + :backend-fn backend-fn}) + + ;; Relations with indices `substituted-relation-inds` are used for substituting variables + ;; in the pattern. + substituted-relation-inds (substitution-relation-indices search-context) + + ;; Relations with indices `filtered-relation-inds` are used for filtering the datoms + ;; returned by the search backend. + filtered-relation-inds (filtering-relation-indices search-context substituted-relation-inds) + + [init-coll substitution-xform] (initialization-and-substitution-xform + search-context + substituted-relation-inds) + + filter-xform (-> filtered-relation-inds + (datom-filter-predicate search-context) + (extend-predicate-for-pattern-constants search-context) + filter-from-predicate) + + ;; This transduction will take the initial pattern, + ;; perform all variable substitutions for all combinations + ;; of relations and then look up the datoms in the index. + ;; Finally, the datoms will be filtered for the variables + ;; that were not substituted. + result (into [] + + ;; From the output of `unpack6` + ;; to the input of `backend-xform` + ;; the transducers are higher-arity. That is, + ;; instead of calling `(step acc [[e a v tx added?] pred])`, + ;; they call `(step acc e a v tx added? pred)`. This avoids + ;; the allocation of short-lived vectors and speeds up the + ;; process by about 0.4 seconds in + ;; https://gitlab.com/arbetsformedlingen/taxonomy-dev/backend/experimental/datahike-benchmark/ + + (comp + + ;; Unpack the pattern as arguments to the next step function. + unpack6 + + ;; Substitute variables with values + ;; from tuples in the relations and accumulate the filter predicate. + substitution-xform + + ;; Perform the lookup in the search backend. + (backend-xform backend-fn) + + ;; Filter the datoms returned from the search backend. + filter-xform + + ;; Apply the provided datom-xform on the returned datoms + datom-xform) + init-coll)] + result))) + +(defn lookup-batch-search [source context orig-pattern pattern1] + (let [new-rel (if (dbu/db? source) + (let [rels (vec (:rels context)) + bsm (bound-symbol-map rels) + clean-pattern (->> pattern1 + (replace-unbound-symbols-by-nil bsm) + (resolve-pattern-eid source)) + search-context {:source source + :bsm bsm + :clean-pattern clean-pattern + :rels rels} + + datoms (if clean-pattern + + ;; Make the call to the search backend + (dbi/batch-search + source clean-pattern + (search-batch-fn search-context) + (relation-from-datoms-xform)) + + []) + + new-rel (relation-from-datoms + context orig-pattern datoms)] + new-rel) + (lookup-pattern-coll source pattern1 orig-pattern))] + + ;; This binding is needed for `collapse-rels` to work, and more specifically, + ;; `hash-join` to work, that in turn depends on `getter-fn`. (binding [*lookup-attrs* (if (satisfies? dbi/IDB source) - (dynamic-lookup-attrs source pattern-before-expansion) + (dynamic-lookup-attrs source pattern1) *lookup-attrs*)] - - (cond-> (update context :rels collapse-rels relation) - (:stats context) (assoc :tmp-stats {:type :lookup - :lookup-stats lookup-stats}))))) + (cond-> (update context :rels collapse-rels new-rel) + (:stats context) (assoc :tmp-stats {:type :lookup}))))) (defn -resolve-clause* ([context clause] (-resolve-clause* context clause clause)) ([context clause orig-clause] (condp looks-like? clause - [[symbol? '*]] ;; predicate [(pred ?a ?b ?c)] + [[symbol? '*]] ;; predicate [(pred ?a ?b ?c)] (do (check-all-bound context (identity (filter free-var? (first clause))) orig-clause) (filter-by-pred context clause)) - [[symbol? '*] '_] ;; function [(fn ?a ?b) ?res] + [[symbol? '*] '_] ;; function [(fn ?a ?b) ?res] (bind-by-fn context clause) - [source? '*] ;; source + anything + [source? '*] ;; source + anything (let [[source-sym & rest] clause] (binding [*implicit-source* (get (:sources context) source-sym)] (-resolve-clause context rest clause))) - '[or *] ;; (or ...) + '[or *] ;; (or ...) (let [[_ & branches] clause context' (assoc context :stats []) - contexts (map #(resolve-clause context' %) branches) + contexts (mapv #(resolve-clause context' %) branches) sum-rel (->> contexts (map #(reduce hash-join (:rels %))) (reduce sum-rel))] @@ -1224,13 +1667,13 @@ than doing no expansion at all." (:stats context) (assoc :tmp-stats {:type :or :branches (mapv :stats contexts)}))) - '[or-join [[*] *] *] ;; (or-join [[req-vars] vars] ...) + '[or-join [[*] *] *] ;; (or-join [[req-vars] vars] ...) (let [[_ [req-vars & vars] & branches] clause] (check-all-bound context req-vars orig-clause) (recur context (list* 'or-join (concat req-vars vars) branches) clause)) - '[or-join [*] *] ;; (or-join [vars] ...) - ;; TODO required vars + '[or-join [*] *] ;; (or-join [vars] ...) + ;; TODO required vars (let [[_ vars & branches] clause vars (set vars) join-context (-> context @@ -1247,7 +1690,7 @@ than doing no expansion at all." (:stats context) (assoc :tmp-stats {:type :or-join :branches (mapv #(-> % :stats first) contexts)}))) - '[and *] ;; (and ...) + '[and *] ;; (and ...) (let [[_ & clauses] clause] (if (:stats context) (let [and-context (-> context @@ -1259,7 +1702,7 @@ than doing no expansion at all." :stats (:stats context))) (resolve-context context clauses))) - '[not *] ;; (not ...) + '[not *] ;; (not ...) (let [[_ & clauses] clause negation-vars (collect-vars clauses) _ (check-some-bound context negation-vars orig-clause) @@ -1274,7 +1717,7 @@ than doing no expansion at all." (:stats context) (assoc :tmp-stats {:type :not :branches (:stats negation-context)}))) - '[not-join [*] *] ;; (not-join [vars] ...) + '[not-join [*] *] ;; (not-join [vars] ...) (let [[_ vars & clauses] clause _ (check-all-bound context vars orig-clause) join-rel (reduce hash-join (:rels context)) @@ -1290,20 +1733,19 @@ than doing no expansion at all." (:stats context) (assoc :tmp-stats {:type :not :branches (:stats negation-context)}))) - '[*] ;; pattern + '[*] ;; pattern (let [source *implicit-source* pattern0 (replace (:consts context) clause) - pattern1 (resolve-pattern-lookup-refs source pattern0) - constrained-patterns (expand-constrained-patterns source context pattern1) - context-constrained (lookup-patterns context clause pattern1 constrained-patterns)] - context-constrained)))) + pattern1 (resolve-pattern-lookup-refs source pattern0)] + (lookup-batch-search source context clause pattern1))))) (defn -resolve-clause ([context clause] (-resolve-clause context clause clause)) ([context clause orig-clause] (dqs/update-ctx-with-stats context orig-clause - (fn [context] (-resolve-clause* context clause orig-clause))))) + (fn [context] + (-resolve-clause* context clause orig-clause))))) (defn resolve-clause [context clause] (if (rule? context clause) @@ -1389,13 +1831,17 @@ than doing no expansion at all." (extend-protocol IPostProcess FindRel - (-post-process [_ tuples] (if (seq? tuples) (vec tuples) tuples)) + (-post-process [_ tuples] + (if (seq? tuples) (vec tuples) tuples)) FindColl - (-post-process [_ tuples] (into [] (map first) tuples)) + (-post-process [_ tuples] + (into [] (map first) tuples)) FindScalar - (-post-process [_ tuples] (ffirst tuples)) + (-post-process [_ tuples] + (ffirst tuples)) FindTuple - (-post-process [_ tuples] (first tuples))) + (-post-process [_ tuples] + (first tuples))) (defn- pull [find-elements context resultset] (let [resolved (for [find find-elements] @@ -1421,12 +1867,6 @@ than doing no expansion at all." (vswap! query-cache assoc q qp) qp))) -(defn paginate [offset limit resultset] - (let [subseq (drop (or offset 0) (distinct resultset))] - (if (or (nil? limit) (neg? limit)) - subseq - (take limit subseq)))) - (defn convert-to-return-maps [{:keys [mapping-type mapping-keys]} resultset] (let [mapping-keys (map #(get % :mapping-key) mapping-keys) convert-fn (fn [mkeys] @@ -1440,7 +1880,7 @@ than doing no expansion at all." (->> (-collect context symbols) (map vec))) -(def default-settings {:relprod-strategy expand-once}) +(def default-settings {}) (defn raw-q [{:keys [query args offset limit stats? settings] :as _query-map}] (let [settings (merge default-settings settings) @@ -1453,14 +1893,21 @@ than doing no expansion at all." (Context. [] {} {} {} settings)) (resolve-ins qin args)) ;; TODO utilize parser + all-vars (concat (dpi/find-vars qfind) (map :symbol qwith)) context-out (-q context-in (:where query)) resultset (collect context-out all-vars) find-elements (dpip/find-elements qfind) result-arity (count find-elements)] - (cond->> resultset - (or offset limit) (paginate offset limit) - true set + (cond->> (into #{} + (comp (distinct) + (if offset + (drop offset) + identity) + (if (or (nil? limit) (neg? limit)) + identity + (take limit))) + resultset) (:with query) (mapv #(subvec % 0 result-arity)) (some #(instance? Aggregate %) find-elements) (aggregate find-elements context-in) (some #(instance? Pull %) find-elements) (pull find-elements context-in) diff --git a/src/datahike/tools.cljc b/src/datahike/tools.cljc index b4a62ebc..dd642fdc 100644 --- a/src/datahike/tools.cljc +++ b/src/datahike/tools.cljc @@ -10,19 +10,43 @@ #?(:clj (clojure.lang.Util/hashCombine x y) :cljs (hash-combine x y))) -#?(:clj - (defn- -case-tree [queries variants] - (if queries - (let [v1 (take (/ (count variants) 2) variants) - v2 (drop (/ (count variants) 2) variants)] - (list 'if (first queries) - (-case-tree (next queries) v1) - (-case-tree (next queries) v2))) - (first variants)))) - -#?(:clj - (defmacro case-tree [qs vs] - (-case-tree qs vs))) +(defn -match-vector-class [x] + (case x + _ :negative + * :any + :positive)) + +(defn -match-vector [path pattern-pos pattern-size pattern-symbols pairs] + (cond + (< pattern-pos pattern-size) + (let [groups (group-by (comp -match-vector-class #(nth % pattern-pos) first) pairs) + sub (fn [p pairs] (-match-vector (conj path p) + (inc pattern-pos) + pattern-size + pattern-symbols + pairs))] + (if (= [:any] (keys groups)) + (sub '* (:any groups)) + `(if ~(nth pattern-symbols pattern-pos) + ~(sub 1 (mapcat groups [:positive :any])) + ~(sub '_ (mapcat groups [:negative :any]))))) + + (not= 1 (count pairs)) (throw (ex-info "There should be exactly one expression at leaf" + {:path path})) + :else (-> pairs first second))) + +(defmacro match-vector [input-vector & pattern-expr-pairs] + {:pre [(sequential? pattern-expr-pairs) + (even? (count pattern-expr-pairs))]} + (let [pairs (partition 2 pattern-expr-pairs) + patterns (map first pairs) + _ (assert (every? sequential? patterns)) + pattern-sizes (into #{} (map count) patterns) + _ (assert (= 1 (count pattern-sizes))) + pattern-size (first pattern-sizes) + symbols (repeatedly pattern-size gensym)] + `(let [[~@symbols] ~input-vector] + ~(-match-vector [] 0 pattern-size symbols pairs)))) (defn ^:dynamic get-date [] #?(:clj (Date.) @@ -176,6 +200,7 @@ ~nsym (count ~vsym) ~vars ~vsym] ~(generate [] pairs)))) + (defn- reduce-clauses [resolver context clauses] (loop [context context @@ -209,3 +234,42 @@ ([dst x] (let [k (f x)] (assoc! dst k (conj (get dst k []) x)))))) + +(defn range-subset-tree + "This function generates code for a decision tree that for an input expression `input` that has to represent a sequence of growing integers that is a subset of the integers in the sequence `(range length-length)`. Every leaf in the decision tree corresponds to one of the 2^range-length possible subsequences and the `branch-visitor-fn` is called at every leaf with the first argument being the subsequence and the second argument being a mask." + ([range-length input branch-visitor-fn] + (if (symbol? input) + (range-subset-tree range-length + input + branch-visitor-fn + 0 + [] + (vec (repeat range-length nil))) + (let [sym (gensym)] + `(let [~sym ~input] + ~(range-subset-tree range-length sym branch-visitor-fn))))) + ([range-length input-symbol branch-visitor-fn at acc-inds mask] + {:pre [(number? range-length) + (symbol? input-symbol) + (ifn? branch-visitor-fn) + (number? at) + (vector? acc-inds)]} + (if (= range-length at) + (branch-visitor-fn acc-inds mask) + `(if (empty? ~input-symbol) + ~(branch-visitor-fn acc-inds mask) + (if (= ~at (first ~input-symbol)) + (let [~input-symbol (rest ~input-symbol)] + ~(range-subset-tree range-length + input-symbol + branch-visitor-fn + (inc at) + (conj acc-inds at) + (assoc mask at (count acc-inds)))) + ~(range-subset-tree range-length + input-symbol + branch-visitor-fn + (inc at) + acc-inds + mask)))))) + diff --git a/test/datahike/test/api_test.cljc b/test/datahike/test/api_test.cljc index 3fb795d6..2cf99029 100644 --- a/test/datahike/test/api_test.cljc +++ b/test/datahike/test/api_test.cljc @@ -874,26 +874,3 @@ (deftest test-metrics-attr-refs (test-metrics (assoc metrics-base-cfg :attribute-refs? true))) - -(deftest test-strategies - ;; The main purpose of this test is to check that - ;; the `:relprod-strategy` parameter is not ignored - ;; and that the strategy is actually called. Furthermore, - ;; it checks that the result of a simple query is - ;; correct no matter the choice of strategy. - (doseq [inner-strategy [identity - dq/select-simple - dq/select-all - dq/expand-once]] - (let [strategy-was-called (atom false) - strategy (fn [relprod] - (reset! strategy-was-called true) - (inner-strategy relprod))] - (is (= #{["fries"] ["candy"] ["pie"] ["pizza"]} - (d/q {:query '[:find ?value :where [_ :likes ?value]] - :settings {:relprod-strategy strategy} - :args [#{[1 :likes "fries"] - [2 :likes "candy"] - [3 :likes "pie"] - [4 :likes "pizza"]}]}))) - (is (deref strategy-was-called))))) diff --git a/test/datahike/test/array_test.cljc b/test/datahike/test/array_test.cljc index fb6cc263..962d6126 100644 --- a/test/datahike/test/array_test.cljc +++ b/test/datahike/test/array_test.cljc @@ -3,7 +3,7 @@ #?(:cljs [cljs.test :as t :refer-macros [is deftest testing]] :clj [clojure.test :as t :refer [is deftest testing]]) [clojure.core :refer [byte-array]] - [datahike.array :refer [compare-arrays a=]])) + [datahike.array :refer [compare-arrays a= wrap-comparable]])) (deftest test-array-ordering (testing "Array value indexing support." @@ -22,10 +22,19 @@ (is (neg? (compare-arrays (byte-array [5 2 2 5]) (byte-array [5 2 3 1])))) (is (pos? (compare-arrays (byte-array [6 2 2 5]) (byte-array [5 2 3 5])))))) +(defn a2= [a b] + (= (wrap-comparable a) + (wrap-comparable b))) + (deftest test-extended-equality (testing "Testing extended equality with support for arrays." - ;; some Clojure semantics safety checks - (is (a= 0 0)) - (is (a= "foo" "foo")) - (is (not (a= "foo" "bar"))) - (is (a= [{:a 5} 4 "bar"] [{:a 5} 4 "bar"])))) + (doseq [cmp [a= a2=]] + ;; some Clojure semantics safety checks + (is (cmp 0 0)) + (is (cmp "foo" "foo")) + (is (not (cmp "foo" "bar"))) + (is (cmp [{:a 5} 4 "bar"] [{:a 5} 4 "bar"])) + (is (cmp (byte-array [5 2 3]) + (byte-array [5 2 3]))) + (is (not (cmp (byte-array [5 2 3]) + (byte-array [5 2 4]))))))) diff --git a/test/datahike/test/query_stats_test.cljc b/test/datahike/test/query_stats_test.cljc index b6525d77..dc5c6f10 100644 --- a/test/datahike/test/query_stats_test.cljc +++ b/test/datahike/test/query_stats_test.cljc @@ -25,20 +25,10 @@ (use-fixtures :once (partial with-db config (into test-schema test-data))) -(defn validate-lookup-stats [lookup-stats] - (cond - (not (vector? lookup-stats)) :not-a-vector - (empty? lookup-stats) :empty - (not (every? #(and (map? %) - (vector? (:pattern %)) - (number? (:tuple-count %))) lookup-stats)) :not-every-valid - :else :valid)) - (defn unify-stats [stats] (cw/postwalk #(cond-> % (and (map? %) (contains? % :t)) (assoc :t :measurement) - (and (map? %) (contains? % :lookup-stats)) (update :lookup-stats validate-lookup-stats) (and (symbol? %) (re-find #"__auto__" (name %))) (-> name (str/replace #"__auto__\d*" "_tmp") symbol)) stats)) @@ -52,13 +42,11 @@ :stats [{:clause '[?e :age ?a] :rels [{:bound #{'?a '?e} :rows 6}] :t :measurement - :type :lookup - :lookup-stats :valid} + :type :lookup} {:branches [{:clause '[?e :age 60] :rels [{:bound #{'?a '?e} :rows 1}] :t :measurement - :type :lookup - :lookup-stats :valid}] + :type :lookup}] :clause '(not [?e :age 60]) :rels [{:bound #{'?a '?e} :rows 5}] :t :measurement @@ -81,23 +69,19 @@ :stats [{:clause '[?e :name] :rels [{:bound #{'?e} :rows 6}] :t :measurement - :type :lookup - :lookup-stats :valid} + :type :lookup} {:clause '[?e :age ?a] :rels [{:bound #{'?a '?e} :rows 6}] :t :measurement - :type :lookup - :lookup-stats :valid} + :type :lookup} {:branches [{:clause '[?e :name "Oleg"] :rels [{:bound #{'?e} :rows 2}] :t :measurement - :type :lookup - :lookup-stats :valid} + :type :lookup} {:clause '[?e :age ?a] :rels [{:bound #{'?a '?e} :rows 2}] :t :measurement - :type :lookup - :lookup-stats :valid}] + :type :lookup}] :clause '(not-join [?e] [?e :name "Oleg"] [?e :age ?a]) @@ -124,18 +108,15 @@ :rels [{:bound #{'?a '?e} :rows 6}] :t :measurement - :type :lookup - :lookup-stats :valid} + :type :lookup} {:branches [[{:clause '[?e :name "Ivan"] :rels [{:bound #{'?a '?e} :rows 3}] :t :measurement - :type :lookup - :lookup-stats :valid}] + :type :lookup}] [{:clause '[?e :name "Oleg"] :rels [{:bound #{'?a '?e} :rows 2}] :t :measurement - :type :lookup - :lookup-stats :valid}]] + :type :lookup}]] :clause '(or [?e :name "Ivan"] [?e :name "Oleg"]) :rels [{:bound #{'?a '?e} :rows 5}] @@ -154,12 +135,11 @@ [3 :follow 4] [4 :follow 6] [5 :follow 3]]] - (is (= {:consts {}, :query '{:find [?y ?x], :in [$ %], :where [[_ _ ?x] (rule ?x ?y) [(even? ?x)]]}, :ret #{[3 2] [4 2] [6 4]}, :rules {'rule '[[(rule ?a ?b) [?a :follow ?b]]]}, - :stats [{:clause '[_ _ ?x], :rels [{:bound #{'?x}, :rows 6}], :t :measurement, :type :lookup :lookup-stats :valid} + :stats [{:clause '[_ _ ?x], :rels [{:bound #{'?x}, :rows 6}], :t :measurement, :type :lookup} {:branches [{:branches [], :clause '(rule ?x ?y), :clauses (), @@ -169,8 +149,7 @@ {:branches [{:clause '[?x :follow ?y], :rels [{:bound #{'?x '?y}, :rows 6}], :t :measurement, - :type :lookup - :lookup-stats :valid}], + :type :lookup}], :clause '([?x :follow ?y]), :clauses '([?x :follow ?y]), :rels [{:bound #{'?x '?y}, :rows 6}], @@ -204,8 +183,7 @@ {:branches [{:clause '[?e1 :follow ?e2], :rels [{:bound #{'?e1 '?e2}, :rows 1}], :t :measurement, - :type :lookup - :lookup-stats :valid}], + :type :lookup}], :clause '([?e1 :follow ?e2]), :clauses '([?e1 :follow ?e2]), :rels [{:bound #{'?e1 '?e2}, :rows 1}], @@ -214,13 +192,11 @@ {:branches '[{:clause [?e1 :follow ?t_tmp], :rels [{:bound #{?e1 ?t_tmp}, :rows 1}], :t :measurement, - :type :lookup - :lookup-stats :valid} + :type :lookup} {:clause [?t_tmp :follow ?e2], :rels [{:bound #{?e1 ?e2 ?t_tmp}, :rows 2}], :t :measurement, - :type :lookup - :lookup-stats :valid}], + :type :lookup}], :clause '([?e1 :follow ?t_tmp] [?t_tmp :follow ?e2]), :clauses '([?e1 :follow ?t_tmp] [?t_tmp :follow ?e2]), :rels [{:bound #{'?e1 '?e2 '?t_tmp}, :rows 2}], diff --git a/test/datahike/test/query_test.cljc b/test/datahike/test/query_test.cljc index 85d95719..3fb4550b 100644 --- a/test/datahike/test/query_test.cljc +++ b/test/datahike/test/query_test.cljc @@ -5,7 +5,8 @@ [datahike.api :as d] [datahike.db :as db] [datahike.test.utils :as utils] - [datahike.query :as dq]) + [datahike.query :as dq] + [taoensso.timbre :as log]) #?(:clj (:import [clojure.lang ExceptionInfo]))) @@ -58,7 +59,8 @@ [:db/add 2 :aka "porosenok"] [:db/add 2 :aka "pi"]]))] (is (= (d/q '[:find ?n1 ?n2 - :where [?e1 :aka ?x] + :where + [?e1 :aka ?x] [?e2 :aka ?x] [?e1 :name ?n1] [?e2 :name ?n2]] db) @@ -536,46 +538,447 @@ (long-array [3 4])]))) (is (= [[3 4] [9 7]] (dq/distinct-tuples [[3 4] [9 7] [3 4]])))) -(defn simple-rel - ([v values] (simple-rel v values {})) - ([v values extra] - (dq/->Relation (merge {v 0} extra) (map vector values)))) - -(deftest test-relprod - (let [x (simple-rel '?x [1 2 3 4]) - y (simple-rel '?y [90] {'?w 1}) - z (simple-rel '?z [10 11 12]) - rels [x y z] - xy-vars ['?x '?y] - rel-data (dq/expansion-rel-data rels xy-vars) - relprod (dq/init-relprod rel-data xy-vars) - relprod-x (dq/relprod-select-keys relprod #{['?x]}) - relprod-xy (dq/relprod-select-keys relprod #{['?x] ['?y]}) - relprod-xy2 (dq/select-all relprod) - relprod-y (dq/select-simple relprod) - prodks (comp set keys :attrs :product)] - (is (= #{} (dq/relprod-vars relprod-x))) - (is (= #{'?x} (dq/relprod-vars relprod-x :include))) - (is (= #{'?y} (dq/relprod-vars relprod-x :exclude))) - (is (= #{'?x '?y} (dq/relprod-vars relprod-x :include :exclude))) - (is (= 2 (count rel-data))) - (is (= [{:rel x - :tuple-count 4 - :vars ['?x]} - {:rel y - :tuple-count 1 - :vars ['?y]}] - rel-data)) - (is (sequential? (:exclude relprod))) - (is (= 2 (count (:exclude relprod)))) - (is (= 1 (count (:exclude relprod-x)))) - (is (= 0 (count (:exclude relprod-xy)))) - (is (= #{'?x} (prodks relprod-x))) - (is (= #{'?x '?y '?w} (prodks relprod-xy))) - (is (= #{'?x '?y '?w} (prodks relprod-xy2))) - (is (= #{'?y '?w} (prodks relprod-y))) - - (doseq [{:keys [include exclude vars]} [relprod relprod-x relprod-xy relprod-xy2 relprod-y]] - (is (= 2 (+ (count include) - (count exclude)))) - (is (= xy-vars vars))))) +;; A good one +(def ex1 '{:source {:max-tx 536926163}, + :pattern1 [?r1 79 ?oc], + :context + {:rels + [{:attrs {?oc 0}, + :tuples + [[5289] + [5294] + [5299] + [5304] + [5307] + [5310] + [5313] + [5317] + [5322] + [5325]], + :tuple-count 3654} + {:attrs {?__auto__1 0}, :tuples [], :tuple-count 0}], + :consts {?__auto__1 "narrow-match"}}, + :clause [?r1 :relation/concept-1 ?oc], + :constrained-patterns + [[?r1 79 5289] + [?r1 79 5294] + [?r1 79 5299] + [?r1 79 5304] + [?r1 79 5307] + [?r1 79 5310] + [?r1 79 5313] + [?r1 79 5317] + [?r1 79 5322] + [?r1 79 5325]], + :constrained-pattern-count 3654}) + +(deftest test-new-search-strategy + (let [;; pattern1 = [?r1 79 ?oc] + {:keys [context pattern1]} ex1 + rels (vec (:rels context)) + bsm (dq/bound-symbol-map rels) + + clean-pattern (dq/replace-unbound-symbols-by-nil bsm pattern1) + + strategy0 [nil :substitute :substitute nil] + strategy1 [nil :substitute :filter nil] + + subst-inds0 (dq/substitution-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy0}) + subst-inds1 (dq/substitution-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy1}) + filt-inds0 (dq/filtering-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy0} + subst-inds0) + filt-inds1 (dq/filtering-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy1} + subst-inds1)] + (is (seq rels)) + (is (= '{?oc {:relation-index 0, :tuple-element-index 0}, + ?__auto__1 {:relation-index 1, :tuple-element-index 0}} + bsm)) + (is (= #{0} subst-inds0)) + (is (= #{} subst-inds1)) + (is (= #{} filt-inds0)) + (is (= #{0} filt-inds1)))) + +(defn pack6 [step] + (fn + ([] (step)) + ([dst] (step dst)) + ([dst e a v tx added? filt] + (step dst [[e a v tx added?] filt])))) + +(deftest test-substitution-plan + (let [-pattern1 '[?w ?x ?y] + context '{:rels [{:attrs {?x 0 + ?y 1} + :tuples [[1 2] + [3 4] + [3 5] + [5 6]]} + {:attrs {?z 0} + :tuples [[9] [10] [11]]}]} + rels (vec (:rels context)) + bsm (dq/bound-symbol-map rels) + clean-pattern (dq/replace-unbound-symbols-by-nil bsm -pattern1) + strategy [nil :substitute :filter nil] + subst-inds (dq/substitution-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy + :rels rels}) + filt-inds (dq/filtering-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy + :rels rels} + subst-inds) + [init-coll subst-xform] (dq/initialization-and-substitution-xform + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy + :rels rels} + subst-inds) + + result (into [] + (comp dq/unpack6 + subst-xform + pack6) + init-coll) + [[_ p0] [_ p1] [_ p2] [_ p3]] result] + (is (= #{0} subst-inds)) + (is (= #{} filt-inds)) + (is (= {'?x {:relation-index 0 :tuple-element-index 0} + '?y {:relation-index 0 :tuple-element-index 1} + '?z {:relation-index 1 :tuple-element-index 0}} + bsm)) + (is (= [[nil 1 nil nil nil] + [nil 3 nil nil nil] + [nil 3 nil nil nil] + [nil 5 nil nil nil]] (map first result))) + (is (p0 [1 2 2])) + (is (not (p0 [1 2 3]))) + (is (p1 [1 2 4])) + (is (p2 [1 2 5])) + (is (not (p1 [1 2 6]))) + (is (p3 [1 2 6])) + (is (not (p3 [1 2 5]))))) + +(deftest test-index-feature-extractor + (let [e (dq/index-feature-extractor [1] true)] + (is (= 3 (e [119 3]))) + (is (= 4 (e [120 4 9 3])))) + (let [e (dq/index-feature-extractor [1 0] true)] + (is (= [3 119] (e [119 3]))) + (is (= [4 120] (e [120 4 9 3])))) + (let [e (dq/index-feature-extractor [] true)] + (is (nil? (e [119 3]))) + (is (nil? (e [120 4 9 3])))) + (is (nil? (dq/index-feature-extractor [] false)))) + +(deftest test-filtering-plan + (let [pattern1 '[?w ?x ?y] + context '{:rels [{:attrs {?x 0} + :tuples [[1] + [3] + [5]]} + {:attrs {?y 0} + :tuples [[2] [4] [6]]} + {:attrs {?z 0} + :tuples [[9] [10] [11]]}]} + + rels (vec (:rels context)) + bsm (dq/bound-symbol-map rels) + clean-pattern (dq/replace-unbound-symbols-by-nil bsm pattern1) + strategy [nil :substitute :filter nil] + subst-inds (dq/substitution-relation-indices + {:bsm bsm + :clean-pattern pattern1 + :strategy-vec strategy}) + filt-inds (dq/filtering-relation-indices + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy} + subst-inds) + [init-coll subst-xform] (dq/initialization-and-substitution-xform + {:bsm bsm + :clean-pattern clean-pattern + :strategy-vec strategy + :rels rels} + subst-inds) + + subst-result (into [] + (comp dq/unpack6 + subst-xform + pack6) + init-coll) + [[_ p0]] subst-result] + (is (nil? p0)) + (is (= '[nil ?x ?y nil nil] clean-pattern)) + (is (= #{0} subst-inds)) + (is (= #{1} filt-inds)) + (is (= {'?x {:relation-index 0 :tuple-element-index 0} + '?y {:relation-index 1 :tuple-element-index 0} + '?z {:relation-index 2 :tuple-element-index 0}} + bsm)) + (is (= '([nil 1 nil nil nil] + [nil 3 nil nil nil] + [nil 5 nil nil nil]) + (map first subst-result))))) + +(defn pcmp [x y] + (or (nil? x) (= x y))) + +(defn mock-backend-fn [datoms] + (fn [e0 a0 v0 t0 added0] + (filter (fn [[e1 a1 v1 t1 added1]] + (and (pcmp e0 e1) + (pcmp a0 a1) + (pcmp v0 v1) + (pcmp t0 t1) + (pcmp added0 added1))) + datoms))) + +(deftest test-full-lookup-pipeline + (let [pattern1 '[?x ?w ?y] + context '{:rels [{:attrs {?x 0} + :tuples [[1] [3] [5]]} + {:attrs {?y 0} + :tuples [[4] [5] [6]]}]} + strategy-vec [:substitute nil :filter nil] + rels (vec (:rels context)) + bsm (dq/bound-symbol-map rels) + clean-pattern (dq/replace-unbound-symbols-by-nil bsm pattern1) + sfn (dq/search-batch-fn {:bsm bsm + :clean-pattern clean-pattern + :rels rels}) + result (sfn strategy-vec + (mock-backend-fn [[0 :abc 5] + [5 :xyz 6] + [1 :k 4] + [5 :p 7]]) + identity)] + (is (= #{[1 :k 4] [5 :xyz 6]} (set result))))) + +(defn concept-id [index] + (let [s (format "%010d" index)] + (str (subs s 0 4) "_" (subs s 4 7) "_" (subs s 7 10)))) + +(defn temp-id [x] + (str "tmp-" x)) + +(defn make-forest + "This function constructs tx-data for a forest of concepts in a terminology, + for example a labor market terminology of occupations. The edges in the + forest point toward the root and are labeled `:concept/broader` because the + closer to the root we get, the broader the concept is. For instance, we could + have a concept for the occupation name 'Software Engineer' and an edge from + that concept pointing at a broader concept 'Occuptions in Computer Science'. + + This function takes as input a concatenated list of pairs of `m` and `t` + on the form `[m1 t1 m2 t2 ... mN tN]` that specifies how many sub nodes + should be generated at each level from the root and the type. For example, + `[5 'ssyk-level-1' 3 'ssyk-level-2']` means that we will construct 5 trees + of root node type 'ssyk-level-1' and each one of them will have 3 children + of node type 'ssyk-level-2'." + ([tree-spec] (make-forest tree-spec 0)) + ([[root-count & tree-spec] init-counter] + {:pre [(number? root-count)]} + (loop [[tos & stack] (repeat root-count [nil tree-spec]) + tx-data [] + counter init-counter + concept-map {}] + (if (nil? tos) + {:tx-data tx-data + :concept-map concept-map} + (let [[parent-id [this-type child-count & remaining-pairs]] tos + concept-id (concept-id counter) + tid (temp-id concept-id) + parent-tid (temp-id parent-id)] + (assert (or (nil? parent-id) (string? parent-id))) + (recur (into stack + (when (seq remaining-pairs) + (repeat child-count [concept-id remaining-pairs]))) + (into tx-data + cat + [[[:db/add tid :concept/id concept-id] + [:db/add tid :concept/type this-type]] + (when parent-id + [[:db/add tid :concept/broader parent-tid]])]) + (inc counter) + (cond-> concept-map + true (update concept-id + merge {:parent-id parent-id + :type this-type + :id concept-id}) + parent-id (update-in [parent-id :child-ids] #(conj (or % []) concept-id))))))))) + +(def schema [#:db{:ident :concept/id, + :valueType :db.type/string, + :cardinality :db.cardinality/one, + :doc "Unique identifier for concepts", + :unique :db.unique/identity} + #:db{:ident :concept/type, + :valueType :db.type/string, + :cardinality :db.cardinality/one, + :doc "The concepts main type"} + #:db {:ident :concept/broader + :valueType :db.type/ref + :cardinality :db.cardinality/one + :doc "A broader concept. NOTE: This the JobTech Taxonomy, every relation between two concepts has an entity with attributes :relation/concept-1, :relation-concept-2 and :relation/type."}]) + +(defn initialize-test-db0 [] + (let [conn (utils/setup-db {:store {:backend :mem} + :schema-flexibility :write + :attribute-refs? false + :keep-history? true})] + (d/transact conn {:tx-data schema}) + conn)) + +(defn group-concepts-by-type [concept-map] + (let [groups (update-vals (group-by (comp :type val) + concept-map) + (fn [kv-pairs] + (mapv first kv-pairs)))] + (doseq [[k v] (sort-by val (update-vals groups count))] + (log/info k v)) + (log/info "Total count:" (count concept-map)) + groups)) + +(deftest synthetic-ssyk-tree-test + + "In this test we construct a labor market taxonomy of occupations. Given +some concept ids, we look up broader concepts. The queries in this test will +include clauses with up to two unknown variables. + +We perform two queries. In the first query, we only provide one input id and +look up concepts broader than that id. + +In the second query, we provide two input ids." + + (testing "Given some concepts, query concepts that are broader." + (let [conn (initialize-test-db0) + ssyk-data (make-forest + [3 "ssyk-level-1" + 5 "ssyk-level-2" + 30 "ssyk-level-3" + 5 "ssyk-level-4" + 2 "occupation-name"]) + ssyk-concept-map (:concept-map ssyk-data) + _ (d/transact conn {:tx-data (:tx-data ssyk-data)}) + concepts-per-type (group-concepts-by-type ssyk-concept-map) + ssyk-level-3-ids (concepts-per-type "ssyk-level-3") + expected-result-fn (fn [concept-ids] + (into #{} + (map (fn [cid] + {:from_id cid + :id (get-in ssyk-concept-map + [cid :parent-id])})) + concept-ids)) + + related-query '{:find [?from-id ?id], + :keys [from_id id], + :in [$ [?from-id ...]], + :where + [[?c :concept/id ?from-id] + [?c :concept/broader ?related-c] + [?related-c :concept/id ?id]]}] + (testing "Query for 1 input concept id." + (let [input-concept-id (first ssyk-level-3-ids) + _ (is (string? input-concept-id)) + result (d/q {:query related-query + :args [(d/db conn) #{input-concept-id}]})] + (is (= (expected-result-fn [input-concept-id]) + (set result))))) + (testing "Query for 2 input ids." + (let [input-ids (set (take 2 ssyk-level-3-ids)) + result (d/q {:query related-query + :args [(d/db conn) input-ids]})] + (is (= (expected-result-fn input-ids) + (set result)))))))) + +(deftest synthetic-ssyk-tree-test2 + + "We construct a forest of four trees with each tree having 2000 subnodes each. We then pick the ids +of two of the root nodes of the trees and the ids from three of the children of one +trees. Then we we query for all (parent,child) pairs." + + (let [conn (initialize-test-db0) + ssyk-data (make-forest [4 "ssyk-level-1" 2000 "ssyk-level-2"]) + ssyk-concept-map (:concept-map ssyk-data) + _ (d/transact conn {:tx-data (:tx-data ssyk-data)}) + concepts-per-type (group-concepts-by-type ssyk-concept-map)] + (testing "Query (parent,child) pairs from a *small* set of possible combinations in a labour market taxonomy." + (let [parent-ids (take 2 (concepts-per-type "ssyk-level-1")) + parent-id (first parent-ids) + child-ids (take 3 (get-in ssyk-concept-map [parent-id :child-ids])) + _ (is (= 2 (count parent-ids))) + _ (is (= 3 (count child-ids))) + result (d/q {:query '{:find [?parent-id ?child-id] + :keys [parent_id child_id] + :in [$ + [?parent-id ...] + [?child-id ...]], + :where + [[?pc :concept/id ?parent-id] + [?cc :concept/id ?child-id] + [?cc :concept/broader ?pc]]} + :args [(d/db conn) + parent-ids + child-ids]}) + expected-result (into #{} + (map (fn [child-id] {:parent_id parent-id :child_id child-id})) + child-ids)] + (is (= 3 (count expected-result))) + (is (= (set result) + expected-result)))))) + +(deftest synthetic-ssyk-tree-test3 + "We construct a labor market taxonomy of 200 trees where each root node has one child. Then +we query all (parent, child) pairs." + + (let [conn (initialize-test-db0) + ssyk-data (make-forest [200 "ssyk-level-1" 1 "ssyk-level-2"]) + ssyk-concept-map (:concept-map ssyk-data) + _ (d/transact conn {:tx-data (:tx-data ssyk-data)}) + + ;; Adding some extra data here also makes `expand-once` perform better than + ;; `identity` and `select-simple`. If we remove these two lines, then + ;; `expand-once`, `identity` and `select-simple` will perform roughly the same. + extra-data (make-forest [100 "skill-headline" 100 "skill"] (count ssyk-concept-map)) + _ (d/transact conn {:tx-data (:tx-data extra-data)}) + + _concepts-per-type (group-concepts-by-type ssyk-concept-map)] + (testing "Query (parent,child) pairs from a *large* set of possible combinations in a labour market taxonomy." + (let [result (d/q {:query '{:find [?parent-id ?child-id] + :keys [parent_id child_id] + :in [$ %], + :where + [[?pc :concept/type "ssyk-level-1"] + [?cc :concept/type "ssyk-level-2"] + [?cc :concept/broader ?pc] + [?pc :concept/id ?parent-id] + [?cc :concept/id ?child-id]]} + :args [(d/db conn)]}) + expected-result (into #{} + (keep (fn [[child-id {:keys [parent-id]}]] + (when parent-id + {:child_id child-id :parent_id parent-id}))) + ssyk-concept-map)] + (is (= 200 (count expected-result))) + (is (= expected-result + (set result))))))) + +(deftest basic-index-selector-test + (let [f (dq/basic-index-selector 5)] + (is (= [10 7] ((f [1 3]) [9 10 4 7 1234]))) + (is (= [7 10] ((f [3 1]) [9 10 4 7 1234]))))) diff --git a/test/datahike/test/strategy_test.clj b/test/datahike/test/strategy_test.clj deleted file mode 100644 index fa22abf4..00000000 --- a/test/datahike/test/strategy_test.clj +++ /dev/null @@ -1,314 +0,0 @@ -(ns datahike.test.strategy-test - (:require [clojure.test :refer [is deftest testing]] - [datahike.test.utils :as utils] - [datahike.api :as d] - [taoensso.timbre :as log] - [datahike.query :as dq])) - -(defn concept-id [index] - (let [s (format "%010d" index)] - (str (subs s 0 4) "_" (subs s 4 7) "_" (subs s 7 10)))) - -(defn temp-id [x] - (str "tmp-" x)) - -(defn make-forest - "This function constructs tx-data for a forest of concepts in a terminology, - for example a labor market terminology of occupations. The edges in the - forest point toward the root and are labeled `:concept/broader` because the - closer to the root we get, the broader the concept is. For instance, we could - have a concept for the occupation name 'Software Engineer' and an edge from - that concept pointing at a broader concept 'Occuptions in Computer Science'. - - This function takes as input a concatenated list of pairs of `m` and `t` - on the form `[m1 t1 m2 t2 ... mN tN]` that specifies how many sub nodes - should be generated at each level from the root and the type. For example, - `[5 'ssyk-level-1' 3 'ssyk-level-2']` means that we will construct 5 trees - of root node type 'ssyk-level-1' and each one of them will have 3 children - of node type 'ssyk-level-2'." - ([tree-spec] (make-forest tree-spec 0)) - ([[root-count & tree-spec] init-counter] - {:pre [(number? root-count)]} - (loop [[tos & stack] (repeat root-count [nil tree-spec]) - tx-data [] - counter init-counter - concept-map {}] - (if (nil? tos) - {:tx-data tx-data - :concept-map concept-map} - (let [[parent-id [this-type child-count & remaining-pairs]] tos - concept-id (concept-id counter) - tid (temp-id concept-id) - parent-tid (temp-id parent-id)] - (assert (or (nil? parent-id) (string? parent-id))) - (recur (into stack - (when (seq remaining-pairs) - (repeat child-count [concept-id remaining-pairs]))) - (into tx-data - cat - [[[:db/add tid :concept/id concept-id] - [:db/add tid :concept/type this-type]] - (when parent-id - [[:db/add tid :concept/broader parent-tid]])]) - (inc counter) - (cond-> concept-map - true (update concept-id - merge {:parent-id parent-id - :type this-type - :id concept-id}) - parent-id (update-in [parent-id :child-ids] #(conj (or % []) concept-id))))))))) - -(def schema [#:db{:ident :concept/id, - :valueType :db.type/string, - :cardinality :db.cardinality/one, - :doc "Unique identifier for concepts", - :unique :db.unique/identity} - #:db{:ident :concept/type, - :valueType :db.type/string, - :cardinality :db.cardinality/one, - :doc "The concepts main type"} - #:db {:ident :concept/broader - :valueType :db.type/ref - :cardinality :db.cardinality/one - :doc "A broader concept. NOTE: This the JobTech Taxonomy, every relation between two concepts has an entity with attributes :relation/concept-1, :relation-concept-2 and :relation/type."}]) - -(defn initialize-test-db0 [] - (let [conn (utils/setup-db {:store {:backend :mem} - :schema-flexibility :write - :attribute-refs? false - :keep-history? true})] - (d/transact conn {:tx-data schema}) - conn)) - -(defn compute-lookup-cost [lookup] - ;; * The call to the db backend is 1 operation. - ;; * Processing each returned tuple is 1 operation. - (+ 1 (:tuple-count lookup))) - -(defn evaluate-strategy-times [f] - (into {} - (map (fn [[k strategy]] [k (let [start (System/nanoTime) - {:keys [ret stats]} (f strategy) - end (System/nanoTime)] - (assert ret) - (assert stats) - {:result ret - :operation-cost (transduce (comp (mapcat :lookup-stats) - (map compute-lookup-cost)) - + - stats) - :elapsed-ns (- end start)})])) - [[:identity identity] - [:select-simple dq/select-simple] - [:select-all dq/select-all] - [:expand-once dq/expand-once]])) - -(defn print-strategy-times [result-map] - (doseq [[k v] (sort-by (comp :elapsed-ns val) result-map)] - (log/info (format "%16s: %.6f" (name k) (* 1.0e-9 (:elapsed-ns v)))))) - -(defn faster-strategy? [strategy-times key-a key-b] - (< (get-in strategy-times [key-a :operation-cost]) - (get-in strategy-times [key-b :operation-cost]))) - -(defn group-concepts-by-type [concept-map] - (let [groups (update-vals (group-by (comp :type val) - concept-map) - (fn [kv-pairs] - (mapv first kv-pairs)))] - (doseq [[k v] (sort-by val (update-vals groups count))] - (log/info k v)) - (log/info "Total count:" (count concept-map)) - groups)) - -(deftest synthetic-ssyk-tree-test - - "In this test we construct a labor market taxonomy of occupations. Given -some concept ids, we look up broader concepts. The queries in this test will -include clauses with up to two unknown variables. - -We perform two queries. In the first query, we only provide one input id and -look up concepts broader than that id. Here, we expect all strategies except -identity to perform well because there will always be some substitutions to be made. - -In the second query, we provide two input ids. This means that the select-simple -strategy will not perform the substitution that it performed in the first case where -we only had one one input id. Therefore, it will perform about as bad as identity." - - (testing "Given some concepts, query concepts that are broader." - (let [conn (initialize-test-db0) - ssyk-data (make-forest - [3 "ssyk-level-1" - 5 "ssyk-level-2" - 30 "ssyk-level-3" - 5 "ssyk-level-4" - 2 "occupation-name"]) - ssyk-concept-map (:concept-map ssyk-data) - _ (d/transact conn {:tx-data (:tx-data ssyk-data)}) - concepts-per-type (group-concepts-by-type ssyk-concept-map) - ssyk-level-3-ids (concepts-per-type "ssyk-level-3") - expected-result-fn (fn [concept-ids] - (into #{} - (map (fn [cid] - {:from_id cid - :id (get-in ssyk-concept-map - [cid :parent-id])})) - concept-ids)) - - related-query '{:find [?from-id ?id], - :keys [from_id id], - :in [$ [?from-id ...]], - :where - [[?c :concept/id ?from-id] - [?c :concept/broader ?related-c] - [?related-c :concept/id ?id]]}] - (testing "Query for 1 input concept id." - (let [input-concept-id (first ssyk-level-3-ids) - _ (is (string? input-concept-id)) - result-map (evaluate-strategy-times - (fn [strategy] - (d/q {:query related-query - :settings {:relprod-strategy strategy} - :args [(d/db conn) #{input-concept-id}] - :stats? true})))] - (doseq [[_ {:keys [result]}] result-map] - (is (= (expected-result-fn [input-concept-id]) - (set result)))) - (log/info "Query related to 1 concept:") - (print-strategy-times result-map) - (is (faster-strategy? result-map :select-simple :identity)) - (is (faster-strategy? result-map :select-all :identity)) - (is (faster-strategy? result-map :expand-once :identity)))) - (testing "Query for 2 input ids." - (let [input-ids (set (take 2 ssyk-level-3-ids)) - result-map (evaluate-strategy-times - (fn [strategy] - (d/q {:query related-query - :settings {:relprod-strategy strategy} - :args [(d/db conn) input-ids] - :stats? true})))] - (doseq [[_ {:keys [result]}] result-map] - (is (= (expected-result-fn input-ids) - (set result)))) - (log/info "Query related to 2 concepts:") - (print-strategy-times result-map) - (is (faster-strategy? result-map :select-all :identity)) - (is (faster-strategy? result-map :expand-once :identity)) - - ;; Because we have more than one input id, select-simple no longer performs that well. - (is (faster-strategy? result-map :select-all :select-simple)) - (is (faster-strategy? result-map :expand-once :select-simple))))))) - -(deftest synthetic-ssyk-tree-test2 - - "This test is designed for the select-all strategy to perform well. We construct a -forest of four trees with each tree having 2000 subnodes each. We then pick the ids -of two of the root nodes of the trees and the ids from three of the children of one -trees. Then we we query for all (parent,child) pairs. - -The select-all strategy is going to expand the `[?cc :concept/broader ?pc]` to the -6 = 2*3 combinations of the possible parent child pairs and consequently make six -queries to the database backend. Each such query will return either one or zero rows. - -The expand-once strategy will only substitude the `?pc` variable in the -`[?cc :concept/broader ?pc]` pattern because `?pc` has the smallest number of -possible bindings, that is two. So it will make two database lookups. Each one of -those lookups will result in 2000 rows returned from the database backend and most -of those rows will be discarded. So it will be slower. - -Finally, the select-simple and identity strategies will both run the query -`[?cc :concept/broader ?pc]` without any substitutions which will return 8000 rows -to be filtered." - - (let [conn (initialize-test-db0) - ssyk-data (make-forest [4 "ssyk-level-1" 2000 "ssyk-level-2"]) - ssyk-concept-map (:concept-map ssyk-data) - _ (d/transact conn {:tx-data (:tx-data ssyk-data)}) - concepts-per-type (group-concepts-by-type ssyk-concept-map)] - (testing "Query (parent,child) pairs from a *small* set of possible combinations in a labour market taxonomy." - (let [parent-ids (take 2 (concepts-per-type "ssyk-level-1")) - parent-id (first parent-ids) - child-ids (take 3 (get-in ssyk-concept-map [parent-id :child-ids])) - _ (is (= 2 (count parent-ids))) - _ (is (= 3 (count child-ids))) - result-map (evaluate-strategy-times - (fn [strategy] - (d/q {:query '{:find [?parent-id ?child-id] - :keys [parent_id child_id] - :in [$ - [?parent-id ...] - [?child-id ...]], - :where - [[?pc :concept/id ?parent-id] - [?cc :concept/id ?child-id] - [?cc :concept/broader ?pc]]} - :settings {:relprod-strategy strategy} - :args [(d/db conn) - parent-ids - child-ids] - :stats? true}))) - expected-result (into #{} - (map (fn [child-id] {:parent_id parent-id :child_id child-id})) - child-ids)] - (is (= 3 (count expected-result))) - (log/info "Find all edges between a few small sets of concept types") - (print-strategy-times result-map) - (doseq [[_k v] result-map] - (is (-> v :result set (= expected-result)))) - (is (faster-strategy? result-map :select-all :expand-once)) - (is (faster-strategy? result-map :expand-once :select-simple)) - (is (faster-strategy? result-map :expand-once :identity)))))) - -(deftest synthetic-ssyk-tree-test3 - - "This test is designed to show a case where select-all performs bad. We construct -a labor market taxonomy of 200 trees where each root node has one child. Then -we query all (parent, child) pairs. - -When the select-all strategy encounters the `[?cc :concept/broader ?pc]` pattern, -it will perform 40000 = 200*200 substitutions for all possible combinations of -`?cc` and `?pc`. Out of those 40000 combinations, only 200 will be valid. That means -39800 database backend queries that return nothing. All the other strategies, even -identity, perform better than that." - - (let [conn (initialize-test-db0) - ssyk-data (make-forest [200 "ssyk-level-1" 1 "ssyk-level-2"]) - ssyk-concept-map (:concept-map ssyk-data) - _ (d/transact conn {:tx-data (:tx-data ssyk-data)}) - - ;; Adding some extra data here also makes `expand-once` perform better than - ;; `identity` and `select-simple`. If we remove these two lines, then - ;; `expand-once`, `identity` and `select-simple` will perform roughly the same. - extra-data (make-forest [100 "skill-headline" 100 "skill"] (count ssyk-concept-map)) - _ (d/transact conn {:tx-data (:tx-data extra-data)}) - - _concepts-per-type (group-concepts-by-type ssyk-concept-map)] - (testing "Query (parent,child) pairs from a *large* set of possible combinations in a labour market taxonomy." - (let [result-map (evaluate-strategy-times - (fn [strategy] - (d/q {:query '{:find [?parent-id ?child-id] - :keys [parent_id child_id] - :in [$ %], - :where - [[?pc :concept/type "ssyk-level-1"] - [?cc :concept/type "ssyk-level-2"] - [?cc :concept/broader ?pc] - [?pc :concept/id ?parent-id] - [?cc :concept/id ?child-id]]} - :settings {:relprod-strategy strategy} - :args [(d/db conn)] - :stats? true}))) - expected-result (into #{} - (keep (fn [[child-id {:keys [parent-id]}]] - (when parent-id - {:child_id child-id :parent_id parent-id}))) - ssyk-concept-map)] - (is (= 200 (count expected-result))) - (doseq [[_k v] result-map] - (is (-> v :result set (= expected-result)))) - (log/info "Find all edges between concept types:") - (print-strategy-times result-map) - (is (faster-strategy? result-map :expand-once :select-simple)) - (is (faster-strategy? result-map :expand-once :identity)) - (is (faster-strategy? result-map :select-simple :select-all)) - (is (faster-strategy? result-map :identity :select-all)))))) diff --git a/test/datahike/test/time_variance_test.cljc b/test/datahike/test/time_variance_test.cljc index 6a35c38d..61340e14 100644 --- a/test/datahike/test/time_variance_test.cljc +++ b/test/datahike/test/time_variance_test.cljc @@ -5,6 +5,7 @@ [datahike.api :as d] #?(:cljs [datahike.cljs :refer [Throwable]]) [datahike.constants :as const] + [datahike.test.utils :as du] [datahike.db.interface :as dbi] [datahike.test.utils :refer [setup-db sleep]]) (:import [java.util Date])) @@ -361,7 +362,34 @@ (map (comp vec seq)) (remove (fn [[e _ _ _]] (< const/tx0 e))) - set)))))) + set)))) + (testing "Datoms extracted like Wanderung does it" + (let [datoms (du/get-all-datoms @conn (map du/unmap-tx-timestamp))] + (is (= [[536870913 :db/txInstant :timestamp 536870913 true] + [1 :db/unique :db.unique/identity 536870913 true] + [1 :db/ident :name 536870913 true] + [1 :db/valueType :db.type/string 536870913 true] + [1 :db/index true 536870913 true] + [1 :db/cardinality :db.cardinality/one 536870913 true] + [2 :db/valueType :db.type/long 536870913 true] + [2 :db/cardinality :db.cardinality/one 536870913 true] + [2 :db/ident :age 536870913 true] + [3 :name "Alice" 536870913 true] + [3 :age 25 536870913 true] + [4 :age 35 536870913 true] + [4 :name "Bob" 536870913 true] + [536870914 :db/txInstant :timestamp 536870914 true] + [3 :age 25 536870914 false] + [3 :age 30 536870914 true] + [536870915 :db/txInstant :timestamp 536870915 true] + [3 :age 30 536870915 false] + [3 :age 35 536870915 true] + [536870916 :db/txInstant :timestamp 536870916 true] + [3 :age 35 536870916 false] + [3 :age 25 536870916 true] + [536870917 :db/txInstant :timestamp 536870917 true] + [3 :age 25 536870917 false]] + datoms)))))) (deftest test-no-duplicates-on-history-search (let [schema [{:db/ident :name diff --git a/test/datahike/test/tools_test.clj b/test/datahike/test/tools_test.clj index e11aebd9..8f4cecce 100644 --- a/test/datahike/test/tools_test.clj +++ b/test/datahike/test/tools_test.clj @@ -20,6 +20,7 @@ a (* a a) b (* b b) c (throw (ex-info "This element should not be evaluated" {})))))) + (defn add-resolver [context [result-var a b]] (when (and (contains? context a) (contains? context b)) @@ -48,3 +49,50 @@ (transduce (map #(+ 1000 %)) (dt/group-by-step even?) (range 10))))) + +(deftest test-match-vector + (is (= 0 (dt/match-vector [nil nil] + [_ _] 0 + [_ 1] 1 + [1 *] 2))) + (is (= 1 (dt/match-vector [nil 9] + [_ _] 0 + [_ 1] 1 + [1 *] 2))) + (is (= 2 (dt/match-vector [10 nil] + [_ _] 0 + [_ 1] 1 + [1 *] 2))) + (is (= 2 (dt/match-vector [10 :asdf] + [_ _] 0 + [_ 1] 1 + [1 *] 2))) + (is (= 3 (dt/match-vector [10 :asdf] + [_ _] 0 + [_ 1] 1 + [1 _] 2 + [1 1] 3))) + (is (= 2 (dt/match-vector [10 nil] + [_ _] 0 + [_ 1] 1 + [1 _] 2 + [1 1] 3)))) + +(defmacro wrap-range-tree [input-symbol] + (dt/range-subset-tree 3 input-symbol (fn [x y] [:inds x :mask y]))) + +(deftest range-subset-tree-test + (is (= (dt/range-subset-tree 1 'x (fn [inds _] [:inds inds])) + '(if + (clojure.core/empty? x) + [:inds []] + (if + (clojure.core/= 0 (clojure.core/first x)) + (clojure.core/let [x (clojure.core/rest x)] [:inds [0]]) + [:inds []])))) + (is (= [:inds [1 2] :mask [nil 0 1]] + (wrap-range-tree [1 2]))) + (is (= [:inds [1] :mask [nil 0 nil]] + (wrap-range-tree [1]))) + (is (= [:inds [0 2] :mask [0 nil 1]] + (wrap-range-tree [0 2])))) diff --git a/test/datahike/test/utils.cljc b/test/datahike/test/utils.cljc index 0175d8ab..09645072 100644 --- a/test/datahike/test/utils.cljc +++ b/test/datahike/test/utils.cljc @@ -3,6 +3,37 @@ [datahike.tools :as tools]) #?(:clj (:import [java.util UUID Date]))) +(defn get-all-datoms + "Based on Wanderung function `wanderung.datahike/extract-datahike-data`." + ([db] (get-all-datoms db identity)) + ([db final-xform] + (let [txs (->> db + (d/q '[:find ?tx ?inst + :in $ + :where + [?tx :db/txInstant ?inst] + [(< #inst "1970-01-02" ?inst)]]) + (sort-by first)) + query {:query '[:find ?e ?a ?v ?t ?added + :in $ ?t + :where + [?e ?a ?v ?t ?added] + (not [?e :db/txInstant ?v ?t ?added])] + :args [(d/history db)]}] + (into [] + (comp (mapcat + (fn [[tid tinst]] + (->> (d/q (update-in query [:args] conj tid)) + (sort-by first) + (into [[tid :db/txInstant tinst tid true]])))) + final-xform) + txs)))) + +(defn unmap-tx-timestamp [[e a _ tx added :as datom]] + (if (= a :db/txInstant) + [e a :timestamp tx added] + datom)) + (defn cfg-template "Returning a config template with a random store-id" []