Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add method for subsetting #96

Merged
79 changes: 77 additions & 2 deletions docs/column_exploration.clj
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

^{:kind/hidden true}
(comment
(do (clay/show-doc! "docs/column_exploration.clj" {:hide-doc? true}))
(clay/show-doc! "docs/column_exploration.clj" {:hide-doc? true})
(clay/write-html! "docs/column_exploration.html")
,)

;; ## What is this exploration?
Expand Down Expand Up @@ -50,6 +51,79 @@
(let [string-column (column ["foo" "bar"])]
(col/typeof string-column))

;; ### Subsetting and accesssing

;; You can access an element in a column in exactly the same ways you
;; would in Clojure.

(def myclm (column (range 5)))

myclm

(myclm 2)

(nth myclm 2)

(get myclm 2)

;; #### Selecting multiple elements

;; There are two ways to select multiple elements from a column:
;; * If you need to select a continuous subset, you can use `slice`;
;; * if you may need to select diverse elements, use `select`.
;;

;; **Slice**

;; The `slice` method allows you to use indexes to specify a portion
;; of the column to extract.

(def myclm
(column (repeatedly 10 #(rand-int 10))))

myclm

(col/slice myclm 3 5)


;; It also supports negative indexing, making it possible to slice
;; from the end of the column:

(col/slice myclm -7 -5)

;; It's also possible to slice from one direction to the beginning or
;; end:

(col/slice myclm 7 :end)

(col/slice myclm -3 :end)

(col/slice myclm :start 7)

(col/slice myclm :start -3)

;; **Select**
;;
;; The `select` fn works by taking a list of index positions:

(col/select myclm [1 3 5 8])

;; We can combine this type of selection with the operations just
;; demonstrated to select certain values.


myclm

;; Let's see which positions are greter than 5.
(ops/> myclm 5)


;; We can use a column of boolean values like the one above with the `select` function as well. `select` will choose all the positions that are true. It's like supplying select a list of the index positions that hold true values.
(col/select myclm (ops/> myclm 5))




;; ### Basic Operations

;; Operations are right now in their own namespace
Expand All @@ -64,7 +138,7 @@

(ops/pow a 2)

(ops/* 10 (fun/sin a))
(ops/* 10 (ops/sin a))

(ops/< a 35)

Expand All @@ -74,3 +148,4 @@
(-> a
(ops/* b)
(ops/< 70))

71 changes: 4 additions & 67 deletions docs/column_exploration.html

Large diffs are not rendered by default.

42 changes: 39 additions & 3 deletions src/tablecloth/column/api.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
;;Autogenerated from tablecloth.column.api.api-template-- DO NOT EDIT
"Tablecloth Column API"
(:require [tablecloth.column.api.api-template]
[tablecloth.column.api.column]))
[tablecloth.column.api.column]
[tech.v3.dataset.column]))

(defn column
"Create a `column` from a vector or sequence. "
Expand All @@ -26,15 +27,50 @@
(tablecloth.column.api.column/ones n-ones)))


(defn select
"Return a new column with the subset of indexes based on the provided `selection`.
`selection` can be a list of indexes to select or boolean values where the index
position of each true element indicates a index to select. When supplying a list
of indices, duplicates are possible and will select the specified position more
than once."
([col selection]
(tech.v3.dataset.column/select col selection)))


(defn slice
"Returns a subset of the column defined by the inclusive `from` and
`to` indexes. If `to` is not provided, slices to the end of the
column. If `from` is not provided (i.e. is `nil`), slices from the
beginning of the column. If either `from` or `to` is a negative
number, it is treated as an index from the end of the column. The
`:start` and `:end` keywords can be used to represent the start and
end of the column, respectively.

Examples:
(def column [1 2 3 4 5])
(slice column 1 3) ;=> [2 3]
(slice column 2) ;=> [3 4 5]
(slice column -3 -1) ;=> [3 4 5]
(slice column :start 2) ;=> [1 2 3 4 5]
(slice column 2 :end) ;=> [3 4 5]
(slice column -2 :end) ;=> [4 5]"
([col from]
(tablecloth.column.api.column/slice col from))
([col from to]
(tablecloth.column.api.column/slice col from to))
([col from to step]
(tablecloth.column.api.column/slice col from to step)))


(defn typeof
"Returns the concrete type of the elements within the column `col`."
([col]
(tablecloth.column.api.column/typeof col)))


(defn typeof?
"True|false the column's elements are of the provided type `datatype`.
Works with concrete types (e.g. :int32) or general types (e.g. :numerical)."
"True|false the column's elements are of the provided type `datatype`. Can check
both concrete types (e.g. :int32) or general types (:numerical, :textual, etc)."
([col datatype]
(tablecloth.column.api.column/typeof? col datatype)))

Expand Down
4 changes: 4 additions & 0 deletions src/tablecloth/column/api/api_template.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
column?
typeof
typeof?
slice
zeros
ones)

(exporter/export-symbols tech.v3.dataset.column
select)

(comment
;; Use this to generate the column api
(exporter/write-api! 'tablecloth.column.api.api-template
Expand Down
27 changes: 27 additions & 0 deletions src/tablecloth/column/api/column.clj
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,31 @@
[n-ones]
(column (dtype/const-reader 1 n-ones)))

(defn slice
"Returns a subset of the column defined by the inclusive `from` and
`to` indexes. If `to` is not provided, slices to the end of the
column. If `from` is not provided (i.e. is `nil`), slices from the
beginning of the column. If either `from` or `to` is a negative
number, it is treated as an index from the end of the column. The
`:start` and `:end` keywords can be used to represent the start and
end of the column, respectively.

Examples:
(def column [1 2 3 4 5])
(slice column 1 3) ;=> [2 3]
(slice column 2) ;=> [3 4 5]
(slice column -3 -1) ;=> [3 4 5]
(slice column :start 2) ;=> [1 2 3 4 5]
(slice column 2 :end) ;=> [3 4 5]
(slice column -2 :end) ;=> [4 5]"
([col from]
(slice col from :end))
([col from to]
(slice col from to 1))
([col from to step]
(let [len (count col)
from (or (when-not (or (= from :start) (nil? from)) from) 0)
to (or (when-not (or (= to :end) (nil? :end)) to) (dec len))]
(col/select col (range (if (neg? from) (+ len from) from)
(inc (if (neg? to) (+ len to) to))
step)))))
28 changes: 26 additions & 2 deletions test/tablecloth/column/api/column_test.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(ns tablecloth.column.api.column-test
(:require [tablecloth.column.api.column :refer [column zeros ones typeof? typeof]]
[midje.sweet :refer [fact =>]]))
(:require [tablecloth.column.api.column :refer [column zeros ones typeof? typeof slice]]
[midje.sweet :refer [fact facts =>]]))

(fact "`column` returns a column"
(tech.v3.dataset.column/is-column? (column)) => true)
Expand Down Expand Up @@ -48,3 +48,27 @@

(fact "`ones` returns a column filled with ones"
(ones 3) => [1 1 1])

(facts "about `slice`"
(let [c (column [1 2 3 4 5])]
(fact "it return a subset of a column inclusively"
(slice c 0 0) => [1]
(slice c 0 4) => [1 2 3 4 5])
(fact "it supports negative indexing inclusively"
(slice c 0 -1)
(slice c -1 -1) => [5]
(slice c -3 -1) => [3 4 5])
(fact "it supports 0 within negative indexing"
(slice c 0 -2) => [1 2 3 4])
(fact "it supports stepped slicing"
(slice c 0 4 2) => [1 3 5])
(fact "it supports using nil to indicate slice from start or end"
(slice c 2) => [3 4 5]
(slice c -2) => [4 5]
(slice c nil 2) => [1 2 3]
(slice c nil -2) => [1 2 3 4])
(fact "it supports special keywords for selecting from start or end"
(slice c :start 2) => [1 2 3]
(slice c 1 :end) => [2 3 4 5]
(slice c -4 :end) => [2 3 4 5]
(slice c :start -3) => [1 2 3])))