diff --git a/src/Microsoft.ML.Api/ApiUtils.cs b/src/Microsoft.ML.Api/ApiUtils.cs index 96e821f16e..0270f6cfb2 100644 --- a/src/Microsoft.ML.Api/ApiUtils.cs +++ b/src/Microsoft.ML.Api/ApiUtils.cs @@ -23,7 +23,7 @@ private static OpCode GetAssignmentOpCode(Type t) t == typeof(DvBool) || t == typeof(DvText) || t == typeof(string) || t.IsArray || (t.IsGenericType && t.GetGenericTypeDefinition() == typeof(VBuffer<>)) || (t.IsGenericType && t.GetGenericTypeDefinition() == typeof(Nullable<>)) || - t == typeof(DvDateTime) || t == typeof(DvDateTimeZone) || t == typeof(DvTimeSpan) || t == typeof(UInt128)) + t == typeof(DateTime) || t == typeof(DateTimeOffset) || t == typeof(TimeSpan) || t == typeof(UInt128)) { return OpCodes.Stobj; } diff --git a/src/Microsoft.ML.Core/Data/ColumnType.cs b/src/Microsoft.ML.Core/Data/ColumnType.cs index 96764d68f1..1656843197 100644 --- a/src/Microsoft.ML.Core/Data/ColumnType.cs +++ b/src/Microsoft.ML.Core/Data/ColumnType.cs @@ -120,47 +120,38 @@ public bool IsBool } /// - /// Whether this type is the standard timespan type. + /// Whether this type is the standard type. /// public bool IsTimeSpan { get { - if (!(this is TimeSpanType)) - return false; - // TimeSpanType is a singleton. - Contracts.Assert(this == TimeSpanType.Instance); - return true; + Contracts.Assert((this == TimeSpanType.Instance) == (this is TimeSpanType)); + return this is TimeSpanType; } } /// - /// Whether this type is a DvDateTime. + /// Whether this type is a . /// public bool IsDateTime { get { - if (!(this is DateTimeType)) - return false; - // DateTimeType is a singleton. - Contracts.Assert(this == DateTimeType.Instance); - return true; + Contracts.Assert((this == DateTimeType.Instance) == (this is DateTimeType)); + return this is DateTimeType; } } /// - /// Whether this type is a DvDateTimeZone. + /// Whether this type is a /// public bool IsDateTimeZone { get { - if (!(this is DateTimeZoneType)) - return false; - // DateTimeZoneType is a singleton. - Contracts.Assert(this == DateTimeZoneType.Instance); - return true; + Contracts.Assert((this == DateTimeOffsetType.Instance) == (this is DateTimeOffsetType)); + return this is DateTimeOffsetType; } } @@ -319,7 +310,7 @@ public static PrimitiveType FromKind(DataKind kind) if (kind == DataKind.DT) return DateTimeType.Instance; if (kind == DataKind.DZ) - return DateTimeZoneType.Instance; + return DateTimeOffsetType.Instance; return NumberType.FromKind(kind); } } @@ -605,7 +596,7 @@ public static DateTimeType Instance } private DateTimeType() - : base(typeof(DvDateTime), DataKind.DT) + : base(typeof(DateTime), DataKind.DT) { } @@ -623,21 +614,21 @@ public override string ToString() } } - public sealed class DateTimeZoneType : PrimitiveType + public sealed class DateTimeOffsetType : PrimitiveType { - private static volatile DateTimeZoneType _instance; - public static DateTimeZoneType Instance + private static volatile DateTimeOffsetType _instance; + public static DateTimeOffsetType Instance { get { if (_instance == null) - Interlocked.CompareExchange(ref _instance, new DateTimeZoneType(), null); + Interlocked.CompareExchange(ref _instance, new DateTimeOffsetType(), null); return _instance; } } - private DateTimeZoneType() - : base(typeof(DvDateTimeZone), DataKind.DZ) + private DateTimeOffsetType() + : base(typeof(DateTimeOffset), DataKind.DZ) { } @@ -645,7 +636,7 @@ public override bool Equals(ColumnType other) { if (other == this) return true; - Contracts.Assert(!(other is DateTimeZoneType)); + Contracts.Assert(!(other is DateTimeOffsetType)); return false; } @@ -672,7 +663,7 @@ public static TimeSpanType Instance } private TimeSpanType() - : base(typeof(DvTimeSpan), DataKind.TS) + : base(typeof(TimeSpan), DataKind.TS) { } diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs index 0249745691..634fde6dd4 100644 --- a/src/Microsoft.ML.Core/Data/DataKind.cs +++ b/src/Microsoft.ML.Core/Data/DataKind.cs @@ -165,11 +165,11 @@ public static Type ToType(this DataKind kind) case DataKind.BL: return typeof(DvBool); case DataKind.TS: - return typeof(DvTimeSpan); + return typeof(TimeSpan); case DataKind.DT: - return typeof(DvDateTime); + return typeof(DateTime); case DataKind.DZ: - return typeof(DvDateTimeZone); + return typeof(DateTimeOffset); case DataKind.UG: return typeof(UInt128); } @@ -209,11 +209,11 @@ public static bool TryGetDataKind(this Type type, out DataKind kind) kind = DataKind.TX; else if (type == typeof(DvBool) || type == typeof(bool) || type == typeof(bool?)) kind = DataKind.BL; - else if (type == typeof(DvTimeSpan)) + else if (type == typeof(TimeSpan)) kind = DataKind.TS; - else if (type == typeof(DvDateTime)) + else if (type == typeof(DateTime)) kind = DataKind.DT; - else if (type == typeof(DvDateTimeZone)) + else if (type == typeof(DateTimeOffset)) kind = DataKind.DZ; else if (type == typeof(UInt128)) kind = DataKind.UG; diff --git a/src/Microsoft.ML.Core/Data/DateTime.cs b/src/Microsoft.ML.Core/Data/DateTime.cs deleted file mode 100644 index d11be2a494..0000000000 --- a/src/Microsoft.ML.Core/Data/DateTime.cs +++ /dev/null @@ -1,550 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using Microsoft.ML.Runtime.Internal.Utilities; - -namespace Microsoft.ML.Runtime.Data -{ - using Conditional = System.Diagnostics.ConditionalAttribute; - using SysDateTime = System.DateTime; - using SysDateTimeOffset = System.DateTimeOffset; - using SysTimeSpan = System.TimeSpan; - - /// - /// A struct to represent a DateTime column type - /// - public struct DvDateTime : IEquatable, IComparable - { - public const long MaxTicks = 3155378975999999999; - private readonly DvInt8 _ticks; - - /// - /// This ctor initializes _ticks to the value of sdt.Ticks, and ignores its DateTimeKind value. - /// - public DvDateTime(SysDateTime sdt) - { - _ticks = sdt.Ticks; - AssertValid(); - } - - /// - /// This ctor accepts any value for ticks, but produces an NA if ticks is out of the legal range. - /// - public DvDateTime(DvInt8 ticks) - { - if ((ulong)ticks.RawValue > MaxTicks) - _ticks = DvInt8.NA; - else - _ticks = ticks; - AssertValid(); - } - - [Conditional("DEBUG")] - internal void AssertValid() - { - Contracts.Assert((ulong)_ticks.RawValue <= MaxTicks || _ticks.IsNA); - } - - public DvInt8 Ticks - { - get - { - AssertValid(); - return _ticks; - } - } - - // REVIEW: Add more System.DateTime members returning their corresponding 'Dv' types (task 4255). - /// - /// Gets the date component of this object. - /// - public DvDateTime Date - { - get - { - AssertValid(); - if (IsNA) - return NA; - return new DvDateTime(GetSysDateTime().Date); - } - } - - /// - /// Gets a DvDateTime object representing the current UTC date and time. - /// - public static DvDateTime UtcNow { get { return new DvDateTime(SysDateTime.UtcNow); } } - - public bool IsNA - { - get - { - AssertValid(); - return (ulong)_ticks.RawValue > MaxTicks; - } - } - - public static DvDateTime NA - { - get { return new DvDateTime(DvInt8.NA); } - } - - public static explicit operator SysDateTime?(DvDateTime dvDt) - { - if (dvDt.IsNA) - return null; - return dvDt.GetSysDateTime(); - } - - /// - /// Creates a new DvDateTime with the same number of ticks as in sdt, ignoring its DateTimeKind value. - /// - public static implicit operator DvDateTime(SysDateTime sdt) - { - return new DvDateTime(sdt); - } - - public static implicit operator DvDateTime(SysDateTime? sdt) - { - if (sdt == null) - return DvDateTime.NA; - return new DvDateTime(sdt.Value); - } - - public override string ToString() - { - AssertValid(); - if (IsNA) - return ""; - return GetSysDateTime().ToString("o"); - } - - internal SysDateTime GetSysDateTime() - { - AssertValid(); - Contracts.Assert(!IsNA); - return new SysDateTime(_ticks.RawValue); - } - - public bool Equals(DvDateTime other) - { - return _ticks.RawValue == other._ticks.RawValue; - } - - public override bool Equals(object obj) - { - return obj is DvDateTime && Equals((DvDateTime)obj); - } - - public int CompareTo(DvDateTime other) - { - if (_ticks.RawValue == other._ticks.RawValue) - return 0; - return _ticks.RawValue < other._ticks.RawValue ? -1 : 1; - } - - public override int GetHashCode() - { - return _ticks.GetHashCode(); - } - } - - /// - /// A struct to represent a DateTimeZone column type. - /// - public struct DvDateTimeZone : IEquatable, IComparable - { - public const long TicksPerMinute = 600000000; - public const long MaxMinutesOffset = 840; - public const long MinMinutesOffset = -840; - - // Stores the UTC date-time (convert to clock time by adding the offset). - private readonly DvDateTime _dateTime; - // Store the offset in minutes. - private readonly DvInt2 _offset; - - // This assumes (and asserts) that the dt/offset combination is valid. - // Callers should do the validation. - private DvDateTimeZone(DvDateTime dt, DvInt2 offset) - { - _dateTime = dt; - _offset = offset; - AssertValid(); - } - - /// - /// Given a number of ticks for the date time portion and a number of minutes for - /// the time zone offset, this constructs a new DvDateTimeZone. If anything is invalid, - /// it produces NA. - /// - /// The number of clock ticks in the date time portion - /// The time zone offset in minutes - public DvDateTimeZone(DvInt8 ticks, DvInt2 offset) - { - var dt = new DvDateTime(ticks); - if (dt.IsNA || offset.IsNA || MinMinutesOffset > offset.RawValue || offset.RawValue > MaxMinutesOffset) - { - _dateTime = DvDateTime.NA; - _offset = DvInt2.NA; - } - else - { - _offset = offset; - _dateTime = ValidateDate(dt, ref _offset); - } - AssertValid(); - } - - public DvDateTimeZone(SysDateTimeOffset dto) - { - // Since it is constructed from a SysDateTimeOffset, all the validations should work. - var success = TryValidateOffset(dto.Offset.Ticks, out _offset); - Contracts.Assert(success); - _dateTime = ValidateDate(new DvDateTime(dto.DateTime), ref _offset); - Contracts.Assert(!_dateTime.IsNA); - Contracts.Assert(!_offset.IsNA); - AssertValid(); - } - - /// - /// Constructs a DvDateTimeZone from a clock date-time and a time zone offset from UTC. - /// - /// The clock time - /// The offset - public DvDateTimeZone(DvDateTime dt, DvTimeSpan offset) - { - if (dt.IsNA || offset.IsNA || !TryValidateOffset(offset.Ticks, out _offset)) - { - _dateTime = DvDateTime.NA; - _offset = DvInt2.NA; - } - else - _dateTime = ValidateDate(dt, ref _offset); - AssertValid(); - } - - /// - /// This method takes a DvDateTime representing clock time, and a TimeSpan representing an offset, - /// validates that both the clock time and the UTC time (which is the clock time minus the offset) - /// are within the valid range, and returns a DvDateTime representing the UTC time (dateTime-offset). - /// - /// The clock time - /// The offset. This value is assumed to be validated as a legal offset: - /// a value in whole minutes, between -14 and 14 hours. - /// The UTC DvDateTime representing the input clock time minus the offset - private static DvDateTime ValidateDate(DvDateTime dateTime, ref DvInt2 offset) - { - Contracts.Assert(!dateTime.IsNA); - Contracts.Assert(!offset.IsNA); - - // Validate that both the UTC and clock times are legal. - Contracts.Assert(MinMinutesOffset <= offset.RawValue && offset.RawValue <= MaxMinutesOffset); - var offsetTicks = offset.RawValue * TicksPerMinute; - // This operation cannot overflow because offset should have already been validated to be within - // 14 hours and the DateTime instance is more than that distance from the boundaries of Int64. - long utcTicks = dateTime.Ticks.RawValue - offsetTicks; - var dvdt = new DvDateTime(utcTicks); - if (dvdt.IsNA) - offset = DvInt2.NA; - return dvdt; - } - - /// - /// This method takes a TimeSpan offset, validates that it is a legal offset for DvDateTimeZone (i.e. - /// in whole minutes, and between -14 and 14 hours), and returns the offset in number of minutes. - /// - /// - /// - /// - private static bool TryValidateOffset(DvInt8 offsetTicks, out DvInt2 offset) - { - if (offsetTicks.IsNA || offsetTicks.RawValue % TicksPerMinute != 0) - { - offset = DvInt2.NA; - return false; - } - - long mins = offsetTicks.RawValue / TicksPerMinute; - short res = (short)mins; - if (res != mins || res > MaxMinutesOffset || res < MinMinutesOffset) - { - offset = DvInt2.NA; - return false; - } - offset = res; - Contracts.Assert(!offset.IsNA); - return true; - } - - [Conditional("DEBUG")] - private void AssertValid() - { - _dateTime.AssertValid(); - if (_dateTime.IsNA) - Contracts.Assert(_offset.IsNA); - else - { - Contracts.Assert(MinMinutesOffset <= _offset.RawValue && _offset.RawValue <= MaxMinutesOffset); - Contracts.Assert((ulong)(_dateTime.Ticks.RawValue + _offset.RawValue * TicksPerMinute) - <= (ulong)DvDateTime.MaxTicks); - } - } - - public DvDateTime ClockDateTime - { - get - { - AssertValid(); - if (_dateTime.IsNA) - return DvDateTime.NA; - var res = new DvDateTime(_dateTime.Ticks.RawValue + _offset.RawValue * TicksPerMinute); - Contracts.Assert(!res.IsNA); - return res; - } - } - - /// - /// Gets the UTC date and time. - /// - public DvDateTime UtcDateTime - { - get - { - AssertValid(); - if (IsNA) - return DvDateTime.NA; - return _dateTime; - } - } - - /// - /// Gets the offset as a time span. - /// - public DvTimeSpan Offset - { - get - { - AssertValid(); - if (_offset.IsNA) - return DvTimeSpan.NA; - return new DvTimeSpan(_offset.RawValue * TicksPerMinute); - } - } - - /// - /// Gets the offset in minutes. - /// - public DvInt2 OffsetMinutes - { - get - { - AssertValid(); - return _offset; - } - } - - // REVIEW: Add more System.DateTimeOffset members returning their corresponding 'Dv' types (task 4255). - - /// - /// Gets the date component of the ClockDateTime. - /// - public DvDateTime ClockDate - { - get - { - AssertValid(); - if (IsNA) - return DvDateTime.NA; - return ClockDateTime.Date; - } - } - - /// - /// Gets the date component of the UtcDateTime. - /// - public DvDateTime UtcDate - { - get - { - AssertValid(); - if (IsNA) - return DvDateTime.NA; - return _dateTime.Date; - } - } - - /// - /// Gets a DvDateTimeZone object representing the current UTC date and time (with offset=0). - /// - public static DvDateTimeZone UtcNow { get { return new DvDateTimeZone(SysDateTimeOffset.UtcNow); } } - - public bool IsNA - { - get - { - AssertValid(); - return _dateTime.IsNA; - } - } - - // The missing value for DvDateTimeZone is represented by a DvDateTimeZone with _dateTime = DvDateTime.NA - // and _offset = 0. - public static DvDateTimeZone NA - { - get { return new DvDateTimeZone(DvDateTime.NA, DvInt2.NA); } - } - - public static explicit operator SysDateTimeOffset?(DvDateTimeZone dvDto) - { - if (dvDto.IsNA) - return null; - return dvDto.GetSysDateTimeOffset(); - } - - public static implicit operator DvDateTimeZone(SysDateTimeOffset sdto) - { - return new DvDateTimeZone(sdto); - } - - public static implicit operator DvDateTimeZone(SysDateTimeOffset? sdto) - { - if (sdto == null) - return DvDateTimeZone.NA; - return new DvDateTimeZone(sdto.Value); - } - - public override string ToString() - { - AssertValid(); - if (IsNA) - return ""; - - return GetSysDateTimeOffset().ToString("o"); - } - - private DateTimeOffset GetSysDateTimeOffset() - { - AssertValid(); - Contracts.Assert(!IsNA); - return new SysDateTimeOffset(ClockDateTime.GetSysDateTime(), new TimeSpan(0, _offset.RawValue, 0)); - } - - /// - /// Compare two values for equality. Note that this differs from System.DateTimeOffset's - /// definition of Equals, which only compares the UTC values, not the offsets. - /// - public bool Equals(DvDateTimeZone other) - { - return _offset.RawValue == other._offset.RawValue && _dateTime.Equals(other._dateTime); - } - - public override bool Equals(object obj) - { - return obj is DvDateTimeZone && Equals((DvDateTimeZone)obj); - } - - /// - /// Compare two values for ordering. Note that this differs from System.DateTimeOffset's - /// definition of CompareTo, which only compares the UTC values, not the offsets. - /// - public int CompareTo(DvDateTimeZone other) - { - AssertValid(); - other.AssertValid(); - - int res = _dateTime.CompareTo(other._dateTime); - if (res != 0) - return res; - if (_offset.RawValue == other._offset.RawValue) - return 0; - return _offset.RawValue < other._offset.RawValue ? -1 : 1; - } - - public override int GetHashCode() - { - return Hashing.CombineHash(_dateTime.GetHashCode(), _offset.GetHashCode()); - } - } - - /// - /// A struct to represent a DateTime column type - /// - public struct DvTimeSpan : IEquatable, IComparable - { - private readonly DvInt8 _ticks; - - public DvInt8 Ticks { get { return _ticks; } } - - public DvTimeSpan(DvInt8 ticks) - { - _ticks = ticks; - } - - public DvTimeSpan(SysTimeSpan sts) - { - _ticks = sts.Ticks; - } - - public DvTimeSpan(SysTimeSpan? sts) - { - _ticks = sts != null ? sts.GetValueOrDefault().Ticks : DvInt8.NA; - } - - public bool IsNA - { - get { return _ticks.IsNA; } - } - - public static DvTimeSpan NA - { - get { return new DvTimeSpan(DvInt8.NA); } - } - - public static explicit operator SysTimeSpan?(DvTimeSpan ts) - { - if (ts.IsNA) - return null; - return new SysTimeSpan(ts._ticks.RawValue); - } - - public static implicit operator DvTimeSpan(SysTimeSpan sts) - { - return new DvTimeSpan(sts); - } - - public static implicit operator DvTimeSpan(SysTimeSpan? sts) - { - return new DvTimeSpan(sts); - } - - public override string ToString() - { - if (IsNA) - return ""; - return new SysTimeSpan(_ticks.RawValue).ToString("c"); - } - - public bool Equals(DvTimeSpan other) - { - return _ticks.RawValue == other._ticks.RawValue; - } - - public override bool Equals(object obj) - { - return obj is DvTimeSpan && Equals((DvTimeSpan)obj); - } - - public int CompareTo(DvTimeSpan other) - { - if (_ticks.RawValue == other._ticks.RawValue) - return 0; - return _ticks.RawValue < other._ticks.RawValue ? -1 : 1; - } - - public override int GetHashCode() - { - return _ticks.GetHashCode(); - } - } -} diff --git a/src/Microsoft.ML.Data/Data/Conversion.cs b/src/Microsoft.ML.Data/Data/Conversion.cs index 0a9833064a..3a6c8fa329 100644 --- a/src/Microsoft.ML.Data/Data/Conversion.cs +++ b/src/Microsoft.ML.Data/Data/Conversion.cs @@ -15,8 +15,8 @@ namespace Microsoft.ML.Runtime.Data.Conversion { using BL = DvBool; - using DT = DvDateTime; - using DZ = DvDateTimeZone; + using DT = DateTime; + using DZ = DateTimeOffset; using I1 = DvInt1; using I2 = DvInt2; using I4 = DvInt4; @@ -28,7 +28,7 @@ namespace Microsoft.ML.Runtime.Data.Conversion using RawI4 = Int32; using RawI8 = Int64; using SB = StringBuilder; - using TS = DvTimeSpan; + using TS = TimeSpan; using TX = DvText; using U1 = Byte; using U2 = UInt16; @@ -252,9 +252,6 @@ private Conversions() AddIsNA(IsNA); AddIsNA(IsNA); AddIsNA(IsNA); - AddIsNA(IsNA); - AddIsNA
(IsNA); - AddIsNA(IsNA); AddGetNA(GetNA); AddGetNA(GetNA); @@ -264,9 +261,6 @@ private Conversions() AddGetNA(GetNA); AddGetNA(GetNA); AddGetNA(GetNA); - AddGetNA(GetNA); - AddGetNA
(GetNA); - AddGetNA(GetNA); AddHasNA(HasNA); AddHasNA(HasNA); @@ -276,9 +270,6 @@ private Conversions() AddHasNA(HasNA); AddHasNA(HasNA); AddHasNA(HasNA); - AddHasNA(HasNA); - AddHasNA
(HasNA); - AddHasNA(HasNA); AddIsDef(IsDefault); AddIsDef(IsDefault); @@ -853,9 +844,6 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type) private bool IsNA(ref R4 src) => src.IsNA(); private bool IsNA(ref R8 src) => src.IsNA(); private bool IsNA(ref BL src) => src.IsNA; - private bool IsNA(ref TS src) => src.IsNA; - private bool IsNA(ref DT src) => src.IsNA; - private bool IsNA(ref DZ src) => src.IsNA; private bool IsNA(ref TX src) => src.IsNA; #endregion IsNA @@ -867,9 +855,6 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type) private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA()) return true; } return false; } private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA()) return true; } return false; } private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer
src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } - private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } private bool HasNA(ref VBuffer src) { for (int i = 0; i < src.Count; i++) { if (src.Values[i].IsNA) return true; } return false; } #endregion HasNA @@ -907,9 +892,6 @@ public ValueGetter GetNAOrDefaultGetter(ColumnType type) private void GetNA(ref R4 value) => value = R4.NaN; private void GetNA(ref R8 value) => value = R8.NaN; private void GetNA(ref BL value) => value = BL.NA; - private void GetNA(ref TS value) => value = TS.NA; - private void GetNA(ref DT value) => value = DT.NA; - private void GetNA(ref DZ value) => value = DZ.NA; private void GetNA(ref TX value) => value = TX.NA; #endregion GetNA @@ -1041,9 +1023,9 @@ public void Convert(ref BL src, ref SB dst) else if (src.IsTrue) dst.Append("1"); } - public void Convert(ref TS src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:c}", (TimeSpan)src); } - public void Convert(ref DT src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:o}", (DateTime)src); } - public void Convert(ref DZ src, ref SB dst) { ClearDst(ref dst); if (!src.IsNA) dst.AppendFormat("{0:o}", (DateTimeOffset)src); } + public void Convert(ref TS src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:c}", src); } + public void Convert(ref DT src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); } + public void Convert(ref DZ src, ref SB dst) { ClearDst(ref dst); dst.AppendFormat("{0:o}", src); } #endregion ToStringBuilder #region FromR4 @@ -1472,62 +1454,50 @@ public bool TryParse(ref TX src, out R8 dst) public bool TryParse(ref TX src, out TS dst) { + dst = default; if (!src.HasChars) { - if (src.IsNA) - dst = TS.NA; - else - dst = default(TS); + Contracts.Check(!src.IsNA, "Missing values cannot be converted to boolean value."); return true; } - TimeSpan res; - if (TimeSpan.TryParse(src.ToString(), CultureInfo.InvariantCulture, out res)) - { - dst = new TS(res); + + if (TimeSpan.TryParse(src.ToString(), CultureInfo.InvariantCulture, out dst)) return true; - } - dst = TS.NA; - return IsStdMissing(ref src); + + Contracts.Check(!IsStdMissing(ref src), "Missing values cannot be converted to boolean value."); + return true; } public bool TryParse(ref TX src, out DT dst) { + dst = default; if (!src.HasChars) { - if (src.IsNA) - dst = DvDateTime.NA; - else - dst = default(DvDateTime); + Contracts.Check(!src.IsNA, "Missing values cannot be converted to boolean value."); return true; } - DateTime res; - if (DateTime.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out res)) - { - dst = new DT(res); + + if (DateTime.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out dst)) return true; - } - dst = DvDateTime.NA; - return IsStdMissing(ref src); + + Contracts.Check(!IsStdMissing(ref src), "Missing values cannot be converted to boolean value."); + return true; } public bool TryParse(ref TX src, out DZ dst) { + dst = default; if (!src.HasChars) { - if (src.IsNA) - dst = DvDateTimeZone.NA; - else - dst = default(DvDateTimeZone); + Contracts.Check(!src.IsNA, "Missing values cannot be converted to boolean value."); return true; } - DateTimeOffset res; - if (DateTimeOffset.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out res)) - { - dst = new DZ(res); + + if (DateTimeOffset.TryParse(src.ToString(), CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out dst)) return true; - } - dst = DvDateTimeZone.NA; - return IsStdMissing(ref src); + + Contracts.Check(!IsStdMissing(ref src), "Missing values cannot be converted to boolean value."); + return true; } // These map unparsable and overflow values to "NA", which is the value Ix.MinValue. Note that this NA @@ -1804,21 +1774,10 @@ public void Convert(ref TX src, ref SB dst) src.AddToStringBuilder(dst); } - public void Convert(ref TX span, ref TS value) - { - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); - } - public void Convert(ref TX span, ref DT value) - { - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); - } - public void Convert(ref TX span, ref DZ value) - { - if (!TryParse(ref span, out value)) - Contracts.Assert(value.IsNA); - } + public void Convert(ref TX span, ref TS value) => TryParse(ref span, out value); + public void Convert(ref TX span, ref DT value) => TryParse(ref span, out value); + public void Convert(ref TX span, ref DZ value) => TryParse(ref span, out value); + #endregion FromTX #region FromBL diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs index d04adaf099..dbddfad9b9 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/CodecFactory.cs @@ -54,18 +54,20 @@ public CodecFactory(IHostEnvironment env, MemoryStreamPool memPool = null) RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); - RegisterSimpleCodec(new UnsafeTypeCodec(this)); + RegisterSimpleCodec(new UnsafeTypeCodec(this)); RegisterSimpleCodec(new DvTextCodec(this)); RegisterSimpleCodec(new BoolCodec(this)); RegisterSimpleCodec(new DateTimeCodec(this)); - RegisterSimpleCodec(new DateTimeZoneCodec(this)); + RegisterSimpleCodec(new DateTimeOffsetCodec(this)); RegisterSimpleCodec(new UnsafeTypeCodec(this)); // Register the old boolean reading codec. var oldBool = new OldBoolCodec(this); RegisterOtherCodec(oldBool.LoadName, oldBool.GetCodec); - RegisterOtherCodec("VBuffer", GetVBufferCodec); + RegisterOtherCodec("DvDateTimeZone", new DateTimeOffsetCodec(this).GetCodec); + RegisterOtherCodec("DvDateTime", new DateTimeCodec(this).GetCodec); + RegisterOtherCodec("DvTimeSpan", new UnsafeTypeCodec(this).GetCodec); RegisterOtherCodec("Key", GetKeyCodec); } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs index f840773872..09bfc5636b 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/Codecs.cs @@ -179,10 +179,10 @@ public override string LoadName } // Gatekeeper to ensure T is a type that is supported by UnsafeTypeCodec. - // Throws an exception if T is neither a DvTimeSpan nor a NumberType. + // Throws an exception if T is neither a TimeSpan nor a NumberType. private static ColumnType UnsafeColumnType(Type type) { - return type == typeof(DvTimeSpan) ? (ColumnType)TimeSpanType.Instance : NumberType.FromType(type); + return type == typeof(TimeSpan) ? (ColumnType)TimeSpanType.Instance : NumberType.FromType(type); } public UnsafeTypeCodec(CodecFactory factory) @@ -598,24 +598,24 @@ public override void Get(ref DvBool value) } } - private sealed class DateTimeCodec : SimpleCodec + private sealed class DateTimeCodec : SimpleCodec { public DateTimeCodec(CodecFactory factory) : base(factory, DateTimeType.Instance) { } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter OpenWriter(Stream stream) { return new Writer(this, stream); } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Writer : ValueWriterBase + private sealed class Writer : ValueWriterBase { private long _numWritten; @@ -624,11 +624,9 @@ public Writer(DateTimeCodec codec, Stream stream) { } - public override void Write(ref DvDateTime value) + public override void Write(ref DateTime value) { - var ticks = value.Ticks.RawValue; - Contracts.Assert(ticks == DvInt8.RawNA || (ulong)ticks <= DvDateTime.MaxTicks); - Writer.Write(ticks); + Writer.Write(value.Ticks); _numWritten++; } @@ -643,10 +641,10 @@ public override long GetCommitLengthEstimate() } } - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase { private int _remaining; - private DvDateTime _value; + private DateTime _value; public Reader(DateTimeCodec codec, Stream stream, int items) : base(codec.Factory, stream) @@ -657,74 +655,63 @@ public Reader(DateTimeCodec codec, Stream stream, int items) public override void MoveNext() { Contracts.Assert(_remaining > 0, "already consumed all values"); - var value = Reader.ReadInt64(); - Contracts.CheckDecode(value == DvInt8.RawNA || (ulong)value <= DvDateTime.MaxTicks); - _value = new DvDateTime(value); + var ticks = Reader.ReadInt64(); + _value = new DateTime(ticks == long.MinValue ? default : ticks); _remaining--; } - public override void Get(ref DvDateTime value) + public override void Get(ref DateTime value) { value = _value; } } } - private sealed class DateTimeZoneCodec : SimpleCodec + private sealed class DateTimeOffsetCodec : SimpleCodec { - private readonly MadeObjectPool _shortBufferPool; private readonly MadeObjectPool _longBufferPool; + private readonly MadeObjectPool _shortBufferPool; - public DateTimeZoneCodec(CodecFactory factory) - : base(factory, DateTimeZoneType.Instance) + public DateTimeOffsetCodec(CodecFactory factory) + : base(factory, DateTimeOffsetType.Instance) { - _shortBufferPool = new MadeObjectPool(() => null); _longBufferPool = new MadeObjectPool(() => null); + _shortBufferPool = new MadeObjectPool(() => null); } - public override IValueWriter OpenWriter(Stream stream) + public override IValueWriter OpenWriter(Stream stream) { return new Writer(this, stream); } - public override IValueReader OpenReader(Stream stream, int items) + public override IValueReader OpenReader(Stream stream, int items) { return new Reader(this, stream, items); } - private sealed class Writer : ValueWriterBase + private sealed class Writer : ValueWriterBase { private List _offsets; private List _ticks; - public Writer(DateTimeZoneCodec codec, Stream stream) + public Writer(DateTimeOffsetCodec codec, Stream stream) : base(codec.Factory, stream) { _offsets = new List(); _ticks = new List(); } - public override void Write(ref DvDateTimeZone value) + public override void Write(ref DateTimeOffset value) { Contracts.Assert(_offsets != null, "writer was already committed"); - var ticks = value.ClockDateTime.Ticks; - var offset = value.OffsetMinutes; + _ticks.Add(value.DateTime.Ticks); - _ticks.Add(ticks.RawValue); - if (ticks.IsNA) - { - Contracts.Assert(offset.IsNA); - _offsets.Add(0); - } - else - { - Contracts.Assert( - offset.RawValue >= DvDateTimeZone.MinMinutesOffset && - offset.RawValue <= DvDateTimeZone.MaxMinutesOffset); - Contracts.Assert(0 <= ticks.RawValue && ticks.RawValue <= DvDateTime.MaxTicks); - _offsets.Add(offset.RawValue); - } + //DateTimeOffset exposes its offset as a TimeSpan, but internally it uses short and in minutes. + //https://github.com/dotnet/coreclr/blob/9499b08eefd895158c3f3c7834e185a73619128d/src/System.Private.CoreLib/shared/System/DateTimeOffset.cs#L51-L53 + //https://github.com/dotnet/coreclr/blob/9499b08eefd895158c3f3c7834e185a73619128d/src/System.Private.CoreLib/shared/System/DateTimeOffset.cs#L286-L292 + //From everything online(ISO8601, RFC3339, SQL Server doc, the offset supports the range -14 to 14 hours, and only supports minute precision. + _offsets.Add((short)(value.Offset.TotalMinutes)); } public override void Commit() @@ -740,13 +727,13 @@ public override void Commit() public override long GetCommitLengthEstimate() { - return (long)_offsets.Count * (sizeof(Int64) + sizeof(Int16)); + return (long)_offsets.Count * (sizeof(short) + sizeof(Int64)); } } - private sealed class Reader : ValueReaderBase + private sealed class Reader : ValueReaderBase { - private readonly DateTimeZoneCodec _codec; + private readonly DateTimeOffsetCodec _codec; private readonly int _entries; private short[] _offsets; @@ -754,7 +741,7 @@ private sealed class Reader : ValueReaderBase private int _index; private bool _disposed; - public Reader(DateTimeZoneCodec codec, Stream stream, int items) + public Reader(DateTimeOffsetCodec codec, Stream stream, int items) : base(codec.Factory, stream) { _codec = codec; @@ -764,17 +751,12 @@ public Reader(DateTimeZoneCodec codec, Stream stream, int items) _offsets = _codec._shortBufferPool.Get(); Utils.EnsureSize(ref _offsets, _entries, false); for (int i = 0; i < _entries; i++) - { _offsets[i] = Reader.ReadInt16(); - Contracts.CheckDecode(DvDateTimeZone.MinMinutesOffset <= _offsets[i] && _offsets[i] <= DvDateTimeZone.MaxMinutesOffset); - } + _ticks = _codec._longBufferPool.Get(); Utils.EnsureSize(ref _ticks, _entries, false); for (int i = 0; i < _entries; i++) - { _ticks[i] = Reader.ReadInt64(); - Contracts.CheckDecode(_ticks[i] == DvInt8.RawNA || (ulong)_ticks[i] <= DvDateTime.MaxTicks); - } } public override void MoveNext() @@ -783,10 +765,12 @@ public override void MoveNext() Contracts.Check(++_index < _entries, "reader already read all values"); } - public override void Get(ref DvDateTimeZone value) + public override void Get(ref DateTimeOffset value) { Contracts.Assert(!_disposed); - value = new DvDateTimeZone(_ticks[_index], _offsets[_index]); + var ticks = _ticks[_index]; + var offset = _offsets[_index]; + value = new DateTimeOffset(new DateTime(ticks == long.MinValue ? default : ticks), new TimeSpan(0, offset == short.MinValue ? default : offset, 0)); } public override void Dispose() diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs index 026228d6be..1da16662c9 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/UnsafeTypeOps.cs @@ -46,7 +46,7 @@ static UnsafeTypeOpsFactory() _type2ops[typeof(UInt64)] = new UInt64UnsafeTypeOps(); _type2ops[typeof(Single)] = new SingleUnsafeTypeOps(); _type2ops[typeof(Double)] = new DoubleUnsafeTypeOps(); - _type2ops[typeof(DvTimeSpan)] = new DvTimeSpanUnsafeTypeOps(); + _type2ops[typeof(TimeSpan)] = new TimeSpanUnsafeTypeOps(); _type2ops[typeof(UInt128)] = new UgUnsafeTypeOps(); } @@ -227,17 +227,21 @@ public override unsafe void Apply(Double[] array, Action func) public override Double Read(BinaryReader reader) { return reader.ReadDouble(); } } - private sealed class DvTimeSpanUnsafeTypeOps : UnsafeTypeOps + private sealed class TimeSpanUnsafeTypeOps : UnsafeTypeOps { public override int Size { get { return sizeof(Int64); } } - public override unsafe void Apply(DvTimeSpan[] array, Action func) + public override unsafe void Apply(TimeSpan[] array, Action func) { - fixed (DvTimeSpan* pArray = array) + fixed (TimeSpan* pArray = array) func(new IntPtr(pArray)); } - public override void Write(DvTimeSpan a, BinaryWriter writer) { writer.Write(a.Ticks.RawValue); } - public override DvTimeSpan Read(BinaryReader reader) { return new DvTimeSpan(reader.ReadInt64()); } + public override void Write(TimeSpan a, BinaryWriter writer) { writer.Write(a.Ticks); } + public override TimeSpan Read(BinaryReader reader) + { + var ticks = reader.ReadInt64(); + return new TimeSpan(ticks == long.MinValue ? default : ticks); + } } private sealed class UgUnsafeTypeOps : UnsafeTypeOps diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs index 48f3f9ddc3..664b5184cc 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextSaver.cs @@ -99,17 +99,17 @@ protected ValueWriterBase(PrimitiveType type, int source, char sep) } else if (type.IsTimeSpan) { - ValueMapper c = MapTimeSpan; + ValueMapper c = MapTimeSpan; Conv = (ValueMapper)(Delegate)c; } else if (type.IsDateTime) { - ValueMapper c = MapDateTime; + ValueMapper c = MapDateTime; Conv = (ValueMapper)(Delegate)c; } else if (type.IsDateTimeZone) { - ValueMapper c = MapDateTimeZone; + ValueMapper c = MapDateTimeZone; Conv = (ValueMapper)(Delegate)c; } else @@ -125,17 +125,17 @@ protected void MapText(ref DvText src, ref StringBuilder sb) TextSaverUtils.MapText(ref src, ref sb, Sep); } - protected void MapTimeSpan(ref DvTimeSpan src, ref StringBuilder sb) + protected void MapTimeSpan(ref TimeSpan src, ref StringBuilder sb) { TextSaverUtils.MapTimeSpan(ref src, ref sb); } - protected void MapDateTime(ref DvDateTime src, ref StringBuilder sb) + protected void MapDateTime(ref DateTime src, ref StringBuilder sb) { TextSaverUtils.MapDateTime(ref src, ref sb); } - protected void MapDateTimeZone(ref DvDateTimeZone src, ref StringBuilder sb) + protected void MapDateTimeZone(ref DateTimeOffset src, ref StringBuilder sb) { TextSaverUtils.MapDateTimeZone(ref src, ref sb); } @@ -851,34 +851,34 @@ internal static void MapText(ref DvText src, ref StringBuilder sb, char sep) } } - internal static void MapTimeSpan(ref DvTimeSpan src, ref StringBuilder sb) + internal static void MapTimeSpan(ref TimeSpan src, ref StringBuilder sb) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (!src.IsNA) - sb.AppendFormat("\"{0:c}\"", (TimeSpan)src); + + sb.AppendFormat("\"{0:c}\"", src); } - internal static void MapDateTime(ref DvDateTime src, ref StringBuilder sb) + internal static void MapDateTime(ref DateTime src, ref StringBuilder sb) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (!src.IsNA) - sb.AppendFormat("\"{0:o}\"", (DateTime)src); + + sb.AppendFormat("\"{0:o}\"", src); } - internal static void MapDateTimeZone(ref DvDateTimeZone src, ref StringBuilder sb) + internal static void MapDateTimeZone(ref DateTimeOffset src, ref StringBuilder sb) { if (sb == null) sb = new StringBuilder(); else sb.Clear(); - if (!src.IsNA) - sb.AppendFormat("\"{0:o}\"", (DateTimeOffset)src); + + sb.AppendFormat("\"{0:o}\"", src); } } } diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs b/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs index 39cc6cd316..9f8890f732 100644 --- a/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs +++ b/src/Microsoft.ML.Data/StaticPipe/StaticSchemaShape.cs @@ -168,7 +168,7 @@ private static Type GetTypeOrNull(SchemaShape.Column col) pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I4 || pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U4 || pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance || - pt == DateTimeType.Instance || pt == DateTimeZoneType.Instance || pt == TimeSpanType.Instance || + pt == DateTimeType.Instance || pt == DateTimeOffsetType.Instance || pt == TimeSpanType.Instance || pt == TextType.Instance)) { return (vecType ?? typeof(Scalar<>)).MakeGenericType(physType); @@ -312,7 +312,7 @@ private static Type GetTypeOrNull(IColumn col) pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I8 || pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U8 || pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance || - pt == DateTimeType.Instance || pt == DateTimeZoneType.Instance || pt == TimeSpanType.Instance || + pt == DateTimeType.Instance || pt == DateTimeOffsetType.Instance || pt == TimeSpanType.Instance || pt == TextType.Instance)) { return (vecType ?? typeof(Scalar<>)).MakeGenericType(physType); diff --git a/src/Microsoft.ML.Parquet/ParquetLoader.cs b/src/Microsoft.ML.Parquet/ParquetLoader.cs index 503debae65..e7d7df0c0d 100644 --- a/src/Microsoft.ML.Parquet/ParquetLoader.cs +++ b/src/Microsoft.ML.Parquet/ParquetLoader.cs @@ -358,7 +358,7 @@ private ColumnType ConvertFieldType(DataType parquetType) case DataType.Decimal: return NumberType.R8; case DataType.DateTimeOffset: - return DateTimeZoneType.Instance; + return DateTimeOffsetType.Instance; case DataType.Interval: return TimeSpanType.Instance; default: @@ -527,9 +527,9 @@ private Delegate CreateGetterDelegate(int col) case DataType.Decimal: return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.DateTimeOffset: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); case DataType.Interval: - return CreateGetterDelegateCore(col, _parquetConversions.Conv); + return CreateGetterDelegateCore(col, _parquetConversions.Conv); default: return CreateGetterDelegateCore(col, _parquetConversions.Conv); } @@ -700,7 +700,7 @@ public ParquetConversions(IChannel channel) public void Conv(ref bool? src, ref DvBool dst) => dst = src ?? DvBool.NA; - public void Conv(ref DateTimeOffset src, ref DvDateTimeZone dst) => dst = src; + public void Conv(ref DateTimeOffset src, ref DateTimeOffset dst) => dst = src; public void Conv(ref IList src, ref DvText dst) => dst = new DvText(ConvertListToString(src)); @@ -727,22 +727,13 @@ public void Conv(ref BigInteger src, ref UInt128 dst) } /// - /// Converts a Parquet Interval data type value to a DvTimeSpan data type value. + /// Converts a Parquet Interval data type value to a TimeSpan data type value. /// /// Parquet Interval value (int : months, int : days, int : milliseconds). - /// DvTimeSpan object. - public void Conv(ref Interval src, ref DvTimeSpan dst) + /// TimeSpan object. + public void Conv(ref Interval src, ref TimeSpan dst) { - try - { - dst = new DvTimeSpan(TimeSpan.FromDays(src.Months * 30 + src.Days) + TimeSpan.FromMilliseconds(src.Millis)); - } - catch (Exception ex) - { - // Handle TimeSpan OverflowException - _ch.Error("Cannot convert Inteval to DvTimeSpan. Exception : '{0}'", ex.Message); - dst = DvTimeSpan.NA; - } + dst = TimeSpan.FromDays(src.Months * 30 + src.Days) + TimeSpan.FromMilliseconds(src.Millis); } private string ConvertListToString(IList list) diff --git a/src/Microsoft.ML.Transforms/NAReplaceUtils.cs b/src/Microsoft.ML.Transforms/NAReplaceUtils.cs index 2340f9b413..fe58be6503 100644 --- a/src/Microsoft.ML.Transforms/NAReplaceUtils.cs +++ b/src/Microsoft.ML.Transforms/NAReplaceUtils.cs @@ -14,7 +14,7 @@ public sealed partial class NAReplaceTransform { private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, ReplacementKind? kind, bool bySlot, IRowCursor cursor, int col) { - ch.Assert(type.ItemType.IsNumber || type.ItemType.IsTimeSpan || type.ItemType.IsDateTime); + ch.Assert(type.ItemType.IsNumber); if (!type.IsVector) { // The type is a scalar. @@ -34,10 +34,6 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, return new R4.MeanAggregatorOne(ch, cursor, col); case DataKind.R8: return new R8.MeanAggregatorOne(ch, cursor, col); - case DataKind.TS: - return new Long.MeanAggregatorOne(ch, type, cursor, col); - case DataKind.DT: - return new Long.MeanAggregatorOne(ch, type, cursor, col); default: break; } @@ -58,10 +54,6 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, return new R4.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); case DataKind.R8: return new R8.MinMaxAggregatorOne(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.TS: - return new Long.MinMaxAggregatorOne(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.DT: - return new Long.MinMaxAggregatorOne(ch, type, cursor, col, kind == ReplacementKind.Max); default: break; } @@ -90,10 +82,6 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, return new R4.MeanAggregatorBySlot(ch, type, cursor, col); case DataKind.R8: return new R8.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.TS: - return new Long.MeanAggregatorBySlot(ch, type, cursor, col); - case DataKind.DT: - return new Long.MeanAggregatorBySlot(ch, type, cursor, col); default: break; } @@ -114,10 +102,6 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, return new R4.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); case DataKind.R8: return new R8.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.TS: - return new Long.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.DT: - return new Long.MinMaxAggregatorBySlot(ch, type, cursor, col, kind == ReplacementKind.Max); default: break; } @@ -142,10 +126,6 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, return new R4.MeanAggregatorAcrossSlots(ch, cursor, col); case DataKind.R8: return new R8.MeanAggregatorAcrossSlots(ch, cursor, col); - case DataKind.TS: - return new Long.MeanAggregatorAcrossSlots(ch, type, cursor, col); - case DataKind.DT: - return new Long.MeanAggregatorAcrossSlots(ch, type, cursor, col); default: break; } @@ -166,10 +146,6 @@ private static StatAggregator CreateStatAggregator(IChannel ch, ColumnType type, return new R4.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); case DataKind.R8: return new R8.MinMaxAggregatorAcrossSlots(ch, cursor, col, kind == ReplacementKind.Max); - case DataKind.TS: - return new Long.MinMaxAggregatorAcrossSlots(ch, type, cursor, col, kind == ReplacementKind.Max); - case DataKind.DT: - return new Long.MinMaxAggregatorAcrossSlots(ch, type, cursor, col, kind == ReplacementKind.Max); default: break; } @@ -1655,16 +1631,9 @@ private static Converter CreateConverter(ColumnType type) { Contracts.AssertValue(type); Contracts.Assert(typeof(TItem) == type.ItemType.RawType); - Converter converter; - if (type.ItemType.IsTimeSpan) - converter = new TSConverter(); - else if (type.ItemType.IsDateTime) - converter = new DTConverter(); - else - { - Contracts.Assert(type.ItemType.RawKind == DataKind.I8); - converter = new I8Converter(); - } + Contracts.Assert(type.ItemType.RawKind == DataKind.I8); + + Converter converter = new I8Converter(); return (Converter)converter; } @@ -1694,34 +1663,6 @@ public override DvInt8 FromLong(long val) return (DvInt8)val; } } - - private sealed class TSConverter : Converter - { - public override long ToLong(DvTimeSpan val) - { - return val.Ticks.RawValue; - } - - public override DvTimeSpan FromLong(long val) - { - Contracts.Assert(DvInt8.RawNA != val); - return new DvTimeSpan(val); - } - } - - private sealed class DTConverter : Converter - { - public override long ToLong(DvDateTime val) - { - return val.Ticks.RawValue; - } - - public override DvDateTime FromLong(long val) - { - Contracts.Assert(0 <= val && val <= DvDateTime.MaxTicks); - return new DvDateTime(val); - } - } } } } \ No newline at end of file diff --git a/src/Microsoft.ML/Data/TextLoader.cs b/src/Microsoft.ML/Data/TextLoader.cs index 330412185e..27d4a9e319 100644 --- a/src/Microsoft.ML/Data/TextLoader.cs +++ b/src/Microsoft.ML/Data/TextLoader.cs @@ -184,11 +184,11 @@ private static bool TryGetDataKind(Type type, out DataKind kind) kind = DataKind.TX; else if (type == typeof(DvBool) || type == typeof(bool)) kind = DataKind.BL; - else if (type == typeof(DvTimeSpan) || type == typeof(TimeSpan)) + else if (type == typeof(TimeSpan)) kind = DataKind.TS; - else if (type == typeof(DvDateTime) || type == typeof(DateTime)) + else if (type == typeof(DateTime)) kind = DataKind.DT; - else if (type == typeof(DvDateTimeZone) || type == typeof(TimeZoneInfo)) + else if (type == typeof(DateTimeOffset)) kind = DataKind.DZ; else if (type == typeof(UInt128)) kind = DataKind.UG; diff --git a/test/BaselineOutput/SingleDebug/Command/DataTypes-1-out.txt b/test/BaselineOutput/SingleDebug/Command/DataTypes-1-out.txt new file mode 100644 index 0000000000..fe04f014c2 --- /dev/null +++ b/test/BaselineOutput/SingleDebug/Command/DataTypes-1-out.txt @@ -0,0 +1 @@ +Wrote 5 rows across 9 columns in %Time% diff --git a/test/BaselineOutput/SingleDebug/Command/DataTypes-2-out.txt b/test/BaselineOutput/SingleDebug/Command/DataTypes-2-out.txt new file mode 100644 index 0000000000..a2aaab4439 --- /dev/null +++ b/test/BaselineOutput/SingleDebug/Command/DataTypes-2-out.txt @@ -0,0 +1 @@ +Wrote 5 rows of length 9 diff --git a/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt b/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt index e7d128e400..e37863c293 100644 --- a/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt +++ b/test/BaselineOutput/SingleDebug/Command/Datatypes-datatypes.txt @@ -16,4 +16,4 @@ bl i1 i2 i4 i8 ts dto dt tx 1 -127 -32767 -2147483647 -9223372036854775807 "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" xyz "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" 9 0:0 - + "00:00:00" "0001-01-01T00:00:00.0000000+00:00" "0001-01-01T00:00:00.0000000" diff --git a/test/BaselineOutput/SingleRelease/Command/DataTypes-1-out.txt b/test/BaselineOutput/SingleRelease/Command/DataTypes-1-out.txt new file mode 100644 index 0000000000..fe04f014c2 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/Command/DataTypes-1-out.txt @@ -0,0 +1 @@ +Wrote 5 rows across 9 columns in %Time% diff --git a/test/BaselineOutput/SingleRelease/Command/DataTypes-2-out.txt b/test/BaselineOutput/SingleRelease/Command/DataTypes-2-out.txt new file mode 100644 index 0000000000..a2aaab4439 --- /dev/null +++ b/test/BaselineOutput/SingleRelease/Command/DataTypes-2-out.txt @@ -0,0 +1 @@ +Wrote 5 rows of length 9 diff --git a/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt b/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt index e7d128e400..e37863c293 100644 --- a/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt +++ b/test/BaselineOutput/SingleRelease/Command/Datatypes-datatypes.txt @@ -16,4 +16,4 @@ bl i1 i2 i4 i8 ts dto dt tx 1 -127 -32767 -2147483647 -9223372036854775807 "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" xyz "7.00:00:00" "2008-11-30T00:00:00.0000000+00:00" "2013-08-05T00:00:00.0000000" 9 0:0 - + "00:00:00" "0001-01-01T00:00:00.0000000+00:00" "0001-01-01T00:00:00.0000000" diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs b/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs index 35859783ad..e24e80ae50 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/CoreBaseTestClass.cs @@ -180,11 +180,11 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ case DataKind.Bool: return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case DataKind.TimeSpan: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Ticks == y.Ticks); case DataKind.DT: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Ticks == y.Ticks); case DataKind.DZ: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); } @@ -223,11 +223,11 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ case DataKind.Bool: return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); case DataKind.TimeSpan: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Ticks == y.Ticks); case DataKind.DT: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Ticks == y.Ticks); case DataKind.DZ: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); } diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs index 4ab2f0e6e5..a7b7b4c640 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs @@ -1037,11 +1037,11 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ case DataKind.Bool: return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case DataKind.TimeSpan: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.DT: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x == y); case DataKind.DZ: - return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); + return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerOne(r1, r2, col, (x, y) => x.Equals(y)); case (DataKind)0: @@ -1083,11 +1083,11 @@ protected Func GetColumnComparer(IRow r1, IRow r2, int col, ColumnType typ case DataKind.Bool: return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); case DataKind.TimeSpan: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.DT: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x == y); case DataKind.DZ: - return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); + return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); case DataKind.UG: return GetComparerVec(r1, r2, col, size, (x, y) => x.Equals(y)); } diff --git a/test/Microsoft.ML.TestFramework/TestCommandBase.cs b/test/Microsoft.ML.TestFramework/TestCommandBase.cs index 943ef77b0b..a14677494b 100644 --- a/test/Microsoft.ML.TestFramework/TestCommandBase.cs +++ b/test/Microsoft.ML.TestFramework/TestCommandBase.cs @@ -2033,13 +2033,14 @@ public void CommandTrainingBinaryFieldAwareFactorizationMachineWithValidationAnd [Fact] public void DataTypes() { - //Skip for linux because DATE/TIME format is different. - if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) - return; - string idvPath = GetDataPath("datatypes.idv"); + OutputPath intermediateData = CreateOutputPath("intermediateDatatypes.idv"); OutputPath textOutputPath = CreateOutputPath("datatypes.txt"); TestCore("savedata", idvPath, "loader=binary", "saver=text", textOutputPath.Arg("dout")); + _step++; + TestCore("savedata", idvPath, "loader=binary", "saver=binary", intermediateData.ArgOnly("dout")); + _step++; + TestCore("savedata", intermediateData.Path, "loader=binary", "saver=text", textOutputPath.Arg("dout")); Done(); } }