From 70fe33c40ebbb11b3eb0eb4e9d5b4b9e93f89512 Mon Sep 17 00:00:00 2001 From: Gem Newman Date: Thu, 3 May 2018 13:48:36 -0500 Subject: [PATCH] Improve docs for equality and DataFrames --- docs/src/index.md | 155 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 153 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index e79f6733..0d5bf9ec 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -123,6 +123,30 @@ julia> ZonedDateTime(he) ### Comparisons +#### Equality + +Two `AbstractInterval`s are considered equal if they have identical left and right +endpoints (taking `Inclusivity` into account): + +```julia +julia> a = Interval(DateTime(2013, 2, 13), DateTime(2013, 2, 13, 1), true, false) +Interval{DateTime}(2013-02-13T00:00:00, 2013-02-13T01:00:00, Inclusivity(true, false)) + +julia> b = Interval(DateTime(2013, 2, 13), DateTime(2013, 2, 13, 1), false, true) +Interval{DateTime}(2013-02-13T00:00:00, 2013-02-13T01:00:00, Inclusivity(false, true)) + +julia> c = HourEnding(DateTime(2013, 2, 13, 1)) +HourEnding{DateTime}(2013-02-13T01:00:00, Inclusivity(false, true)) + +julia> a == b +false + +julia> b == c +true +``` + +#### Less Than + When determining whether one `AbstractInterval` is less than (or greater than) another, two sets of comparison operators are available: `<`/`>` and `≪`/`≫`. @@ -146,6 +170,133 @@ julia> 0..10 ≪ 11..20 true ``` +### `DataFrame` Considerations + +Even in `DataFrame`s, equality comparisons between `AbstractInterval`s perform as expected: + +```julia +julia> using DataFrames + +julia> he = HourEnding(DateTime(2016, 11, 16, 1)):HourEnding(DateTime(2016, 11, 16, 12)) +HourEnding{DateTime}(2016-11-16T01:00:00, Inclusivity(false, true)):1 hour:HourEnding{DateTime}(2016-11-16T12:00:00, Inclusivity(false, true)) + +julia> df1 = DataFrame(; time=he, data=1:12) +12×2 DataFrames.DataFrame +│ Row │ time │ data │ +├─────┼───────────────────┼──────┤ +│ 1 │ (2016-11-16 HE01] │ 1 │ +│ 2 │ (2016-11-16 HE02] │ 2 │ +│ 3 │ (2016-11-16 HE03] │ 3 │ +│ 4 │ (2016-11-16 HE04] │ 4 │ +│ 5 │ (2016-11-16 HE05] │ 5 │ +│ 6 │ (2016-11-16 HE06] │ 6 │ +│ 7 │ (2016-11-16 HE07] │ 7 │ +│ 8 │ (2016-11-16 HE08] │ 8 │ +│ 9 │ (2016-11-16 HE09] │ 9 │ +│ 10 │ (2016-11-16 HE10] │ 10 │ +│ 11 │ (2016-11-16 HE11] │ 11 │ +│ 12 │ (2016-11-16 HE12] │ 12 │ + +julia> df2 = DataFrame(; time=Interval.(he), data=1:12) +12×2 DataFrames.DataFrame +│ Row │ time │ data │ +├─────┼──────────────────────────────────────────────┼──────┤ +│ 1 │ (2016-11-16T00:00:00 .. 2016-11-16T01:00:00] │ 1 │ +│ 2 │ (2016-11-16T01:00:00 .. 2016-11-16T02:00:00] │ 2 │ +│ 3 │ (2016-11-16T02:00:00 .. 2016-11-16T03:00:00] │ 3 │ +│ 4 │ (2016-11-16T03:00:00 .. 2016-11-16T04:00:00] │ 4 │ +│ 5 │ (2016-11-16T04:00:00 .. 2016-11-16T05:00:00] │ 5 │ +│ 6 │ (2016-11-16T05:00:00 .. 2016-11-16T06:00:00] │ 6 │ +│ 7 │ (2016-11-16T06:00:00 .. 2016-11-16T07:00:00] │ 7 │ +│ 8 │ (2016-11-16T07:00:00 .. 2016-11-16T08:00:00] │ 8 │ +│ 9 │ (2016-11-16T08:00:00 .. 2016-11-16T09:00:00] │ 9 │ +│ 10 │ (2016-11-16T09:00:00 .. 2016-11-16T10:00:00] │ 10 │ +│ 11 │ (2016-11-16T10:00:00 .. 2016-11-16T11:00:00] │ 11 │ +│ 12 │ (2016-11-16T11:00:00 .. 2016-11-16T12:00:00] │ 12 │ + +julia> df1 == df2 +true +``` + +However, the fact that `join` uses hashing to determine equality can cause problems: + +```julia +julia> df3 = DataFrame(; time=he, tag='a':'l') +12×2 DataFrames.DataFrame +│ Row │ time │ tag │ +├─────┼───────────────────┼─────┤ +│ 1 │ (2016-11-16 HE01] │ 'a' │ +│ 2 │ (2016-11-16 HE02] │ 'b' │ +│ 3 │ (2016-11-16 HE03] │ 'c' │ +│ 4 │ (2016-11-16 HE04] │ 'd' │ +│ 5 │ (2016-11-16 HE05] │ 'e' │ +│ 6 │ (2016-11-16 HE06] │ 'f' │ +│ 7 │ (2016-11-16 HE07] │ 'g' │ +│ 8 │ (2016-11-16 HE08] │ 'h' │ +│ 9 │ (2016-11-16 HE09] │ 'i' │ +│ 10 │ (2016-11-16 HE10] │ 'j' │ +│ 11 │ (2016-11-16 HE11] │ 'k' │ +│ 12 │ (2016-11-16 HE12] │ 'l' │ + +julia> join(df1, df3; on=:time) +12×3 DataFrames.DataFrame +│ Row │ time │ data │ tag │ +├─────┼───────────────────┼──────┼─────┤ +│ 1 │ (2016-11-16 HE01] │ 1 │ 'a' │ +│ 2 │ (2016-11-16 HE02] │ 2 │ 'b' │ +│ 3 │ (2016-11-16 HE03] │ 3 │ 'c' │ +│ 4 │ (2016-11-16 HE04] │ 4 │ 'd' │ +│ 5 │ (2016-11-16 HE05] │ 5 │ 'e' │ +│ 6 │ (2016-11-16 HE06] │ 6 │ 'f' │ +│ 7 │ (2016-11-16 HE07] │ 7 │ 'g' │ +│ 8 │ (2016-11-16 HE08] │ 8 │ 'h' │ +│ 9 │ (2016-11-16 HE09] │ 9 │ 'i' │ +│ 10 │ (2016-11-16 HE10] │ 10 │ 'j' │ +│ 11 │ (2016-11-16 HE11] │ 11 │ 'k' │ +│ 12 │ (2016-11-16 HE12] │ 12 │ 'l' │ + +julia> join(df2, df3; on=:time) +0×3 DataFrames.DataFrame + +``` + +When `join`ing two `DataFrame`s on a column that contains a mix of `AbstractInterval` +types, it is best to explicitly convert `AnchoredInterval`s to `Interval`s: + +```julia +julia> df3[:time] = Interval.(df3[:time]) +12-element Array{Intervals.Interval{DateTime},1}: + (2016-11-16T00:00:00 .. 2016-11-16T01:00:00] + (2016-11-16T01:00:00 .. 2016-11-16T02:00:00] + (2016-11-16T02:00:00 .. 2016-11-16T03:00:00] + (2016-11-16T03:00:00 .. 2016-11-16T04:00:00] + (2016-11-16T04:00:00 .. 2016-11-16T05:00:00] + (2016-11-16T05:00:00 .. 2016-11-16T06:00:00] + (2016-11-16T06:00:00 .. 2016-11-16T07:00:00] + (2016-11-16T07:00:00 .. 2016-11-16T08:00:00] + (2016-11-16T08:00:00 .. 2016-11-16T09:00:00] + (2016-11-16T09:00:00 .. 2016-11-16T10:00:00] + (2016-11-16T10:00:00 .. 2016-11-16T11:00:00] + (2016-11-16T11:00:00 .. 2016-11-16T12:00:00] + +julia> join(df2, df3; on=:time) +12×3 DataFrames.DataFrame +│ Row │ time │ data │ tag │ +├─────┼──────────────────────────────────────────────┼──────┼─────┤ +│ 1 │ (2016-11-16T00:00:00 .. 2016-11-16T01:00:00] │ 1 │ 'a' │ +│ 2 │ (2016-11-16T01:00:00 .. 2016-11-16T02:00:00] │ 2 │ 'b' │ +│ 3 │ (2016-11-16T02:00:00 .. 2016-11-16T03:00:00] │ 3 │ 'c' │ +│ 4 │ (2016-11-16T03:00:00 .. 2016-11-16T04:00:00] │ 4 │ 'd' │ +│ 5 │ (2016-11-16T04:00:00 .. 2016-11-16T05:00:00] │ 5 │ 'e' │ +│ 6 │ (2016-11-16T05:00:00 .. 2016-11-16T06:00:00] │ 6 │ 'f' │ +│ 7 │ (2016-11-16T06:00:00 .. 2016-11-16T07:00:00] │ 7 │ 'g' │ +│ 8 │ (2016-11-16T07:00:00 .. 2016-11-16T08:00:00] │ 8 │ 'h' │ +│ 9 │ (2016-11-16T08:00:00 .. 2016-11-16T09:00:00] │ 9 │ 'i' │ +│ 10 │ (2016-11-16T09:00:00 .. 2016-11-16T10:00:00] │ 10 │ 'j' │ +│ 11 │ (2016-11-16T10:00:00 .. 2016-11-16T11:00:00] │ 11 │ 'k' │ +│ 12 │ (2016-11-16T11:00:00 .. 2016-11-16T12:00:00] │ 12 │ 'l' │ +``` + ## API ```@docs @@ -153,10 +304,10 @@ Inclusivity Inclusivity(::Integer) Interval AnchoredInterval -≪ -≫ HourEnding HourBeginning HE HB +≪ +≫ ```