Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update type interface to use type hierarchy in tablecloth.api.util #76

Merged
merged 21 commits into from
Nov 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/prs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
pull_request:
branches:
- master
- ethan/column-api-dev-branch-1

jobs:
run-tests:
Expand Down
61 changes: 60 additions & 1 deletion src/tablecloth/api/utils.clj
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,44 @@
:numerical #{:int8 :int16 :int32 :int64 :uint8 :uint16 :uint32 :uint64
:long :int :short :byte
:float32 :float64 :double :float}
:textual #{:text :string}})
:textual #{:text :string}
:logical #{:boolean}})

;; This lookup is hardcoded as an optimization. Downside: this
;; lookup must be kept up to date. However, so long as `type-sets`
;; is up-to-date it can be generated from that set.
(def ^:private general-types-lookup
{:int32 #{:integer :numerical},
:int16 #{:integer :numerical},
:float32 #{:float :numerical},
:packed-local-time #{:datetime},
:local-date-time #{:datetime},
:packed-zoned-date-time #{:datetime},
:float64 #{:float :numerical},
:long #{:integer :numerical},
:double #{:float :numerical},
:short #{:integer :numerical},
:packed-local-date-time #{:datetime},
:zoned-date-time #{:datetime},
:instant #{:datetime},
:packed-local-date #{:datetime},
:int #{:integer :numerical},
:int64 #{:integer :numerical},
:local-time #{:datetime},
:packed-duration #{:datetime},
:uint64 #{:integer :numerical},
:float #{:float :numerical},
:duration #{:datetime},
:string #{:textual},
:uint16 #{:integer :numerical},
:int8 #{:integer :numerical},
:uint32 #{:integer :numerical},
:byte #{:integer :numerical},
:local-date #{:datetime},
:boolean #{:logical},
:packed-instant #{:datetime},
:text #{:textual},
:uint8 #{:integer :numerical}})

(defn type?
([general-type]
Expand All @@ -73,11 +110,33 @@
([general-type datatype]
((type-sets general-type) datatype)))

(defn ->general-types
ezmiller marked this conversation as resolved.
Show resolved Hide resolved
"Given a concrete `datatype` (e.g. `:int32`), returns the general
set of general types (e.g. `#{:integer numerical}`)."
[datatype]
(general-types-lookup datatype))

(defn types
"Returns the set of concrete types e.g. (:int32, :float32, etc)"
[]
(apply clojure.set/union (vals type-sets)))

(defn general-types
"Returns the set of general types e.g. (:integer, :logical, etc)"
[]
(vals type-sets))

(defn concrete-type?
"Returns true if `datatype` is a concrete datatype (e.g. :int32)."
[datatype]
(not (nil? ((types) datatype))))

(defn- prepare-datatype-set
[datatype-columns-selector]
(let [k (-> datatype-columns-selector name keyword)]
(get type-sets k #{k})))


(defn- filter-column-names
"Filter column names"
[ds columns-selector meta-field]
Expand Down
9 changes: 8 additions & 1 deletion src/tablecloth/column/api.clj
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,18 @@


(defn typeof
"Returns the datatype fo the elements within the column `col`."
"Returns the concrete type of the elements within the column `col`."
([col]
(tablecloth.column.api.column/typeof col)))


(defn typeof?
"True|false the column's elements are of the provided type `datatype`.
Works with concrete types (e.g. :int32) or general types (e.g. :numerical)."
([col datatype]
(tablecloth.column.api.column/typeof? col datatype)))


(defn zeros
"Create a new column filled wth `n-zeros`."
([n-zeros]
Expand Down
4 changes: 2 additions & 2 deletions src/tablecloth/column/api/api_template.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
column
column?
typeof
typeof?
zeros
ones
)
ones)

(comment
;; Use this to generate the column api
Expand Down
18 changes: 12 additions & 6 deletions src/tablecloth/column/api/column.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
(ns tablecloth.column.api.column
(:require [tech.v3.dataset.column :as col]
[tech.v3.datatype :as dtype]))
[tech.v3.datatype :as dtype]
[tablecloth.api.utils :refer [->general-types concrete-type? type?]]))

(defn column
"Create a `column` from a vector or sequence. "
Expand All @@ -18,16 +19,19 @@
[item]
(col/is-column? item))

;; Alias for tech.v3.datatype.elemwise-datatype`
(defn typeof
"Returns the datatype fo the elements within the column `col`."
"Returns the concrete type of the elements within the column `col`."
[col]
(dtype/elemwise-datatype col))

(defn typeof?
"True|false the column's elements are of type `dtype`"
[col dtype]
(= (dtype/elemwise-datatype col) dtype))
"True|false the column's elements are of the provided type `datatype`. Can check
both concrete types (e.g. :int32) or general types (:numerical, :textual, etc)."
[col datatype]
(let [concrete-type-of-els (dtype/elemwise-datatype col)]
(if (concrete-type? datatype)
(= datatype concrete-type-of-els)
(not (nil? (type? datatype concrete-type-of-els))))))

(defn zeros
"Create a new column filled wth `n-zeros`."
Expand All @@ -38,3 +42,5 @@
"Creates a new column filled with `n-ones`"
[n-ones]
(column (dtype/const-reader 1 n-ones)))


4 changes: 4 additions & 0 deletions test/tablecloth/api/utils_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,7 @@
=> '(4 6 3 6 2 0 5 1 2 4 2))))


(fact "->general-types describes the set of general types for a concrete datatype"
(sut/->general-types :int32) => #{:integer :numerical}
(sut/->general-types :float32) => #{:float :numerical}
(sut/->general-types :string) => #{:textual})
21 changes: 18 additions & 3 deletions test/tablecloth/column/api/column_test.clj
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(ns tablecloth.column.api.column-test
(:require [tablecloth.column.api.column :refer [column zeros ones typeof?]]
(:require [tablecloth.column.api.column :refer [column zeros ones typeof? typeof]]
[midje.sweet :refer [fact =>]]))

(fact "`column` returns a column"
Expand All @@ -22,9 +22,24 @@
(column)
(tech.v3.datatype/elemwise-datatype)) => :object)

(fact "we can check the type of a column's elements with `typeof?`"
(fact "`typeof` returns the concrete type of the elements"
(typeof (column [1 2 3])) => :int64
(typeof (column ["a" "b" "c"])) => :string
(typeof (column [true false])) => :boolean)

(fact "`typeof?` can check the concerete type of column elements"
(typeof? (column [1 2 3]) :int64) => true
(typeof? (column [true false]) :boolean) => true)
(typeof? (column [1 2 3]) :int32) => false
(typeof? (column ["a" "b" "c"]) :string) => true)

(fact "`typeof?` can check the general type of column elements"
(typeof? (column [1 2 3]) :integer) => true
(typeof? (column [1 2 3]) :textual) => false
(typeof? (column [1.0 2.0 3.0]) :numerical) => true
(typeof? (column [1.0 2.0 3.0]) :logical) => false
(typeof? (column ["a" "b" "c"]) :textual) => true
(typeof? (column ["a" "b" "c"]) :numerical) => false
(typeof? (column [true false true]) :logical) => true)

(fact "`zeros` returns a column filled with zeros"
(zeros 3) => [0 0 0])
Expand Down