Skip to content

Commit

Permalink
Fix ConvertingTransform bug (dotnet#1545)
Browse files Browse the repository at this point in the history
* Fix ConvertingTransform bug

* Enable NgramHash unit test

* Address PR comment.
  • Loading branch information
yaeldMS authored Nov 8, 2018
1 parent 18f7acc commit f222025
Show file tree
Hide file tree
Showing 7 changed files with 934 additions and 105 deletions.
17 changes: 16 additions & 1 deletion src/Microsoft.ML.Data/Transforms/ConvertTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,22 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat
var item = args.Column[i];
var tempResultType = item.ResultType ?? args.ResultType;
DataKind kind;
KeyRange range = item.KeyRange ?? (item.Range != null ? KeyRange.Parse(item.Range) : null) ?? args.KeyRange ?? (args.Range != null ? KeyRange.Parse(args.Range) : null);
KeyRange range = null;
// If KeyRange or Range are defined on this column, set range to the appropriate value.
if (item.KeyRange != null)
range = item.KeyRange;
else if (item.Range != null)
range = KeyRange.Parse(item.Range);
// If KeyRange and Range are not defined for this column, we set range to the value
// defined in the Arguments object only in case the ResultType is not defined on the column.
else if (item.ResultType == null)
{
if (args.KeyRange != null)
range = args.KeyRange;
else if (args.Range != null)
range = KeyRange.Parse(args.Range);
}

if (tempResultType == null)
{
if (range == null)
Expand Down
201 changes: 101 additions & 100 deletions test/BaselineOutput/Common/SavePipe/SavePipeNgramHash-Data.txt

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
TextFeatures: Vec<Text, 2>
Metadata 'SlotNames': Vec<Text, 2>: Length=2, Count=2
[0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt'
---- DelimitedTokenizeTransform ----
---- RowToRowMapperTransform ----
4 columns:
Label: Text
Attrs: Vec<Text, 2>
Expand Down Expand Up @@ -164,7 +164,7 @@
HashNgram7: Vec<R4, 64>
HashNgram8: Vec<R4, 64>
---- SelectColumnsDataTransform ----
8 columns:
9 columns:
NgramHashOne: Vec<R4, 16>
HashNgram1: Vec<R4, 1024>
HashNgram2: Vec<R4, 256>
Expand All @@ -173,3 +173,4 @@
HashNgram5: Vec<R4, 8>
HashNgram6: Vec<R4, 8>
HashNgram7: Vec<R4, 64>
HashNgram8: Vec<R4, 64>
13 changes: 13 additions & 0 deletions test/BaselineOutput/Common/SavePipe/SavePipeWithKey-CursLog.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Bad value at line 19 in column B
Bad value at line 19 in column E
Bad value at line 21 in column B
Bad value at line 21 in column E
Bad value at line 22 in column A
Bad value at line 22 in column C
Bad value at line 23 in column B
Bad value at line 23 in column E
Bad value at line 24 in column A
Bad value at line 24 in column B
Suppressing further bad value messages
Processed 683 rows with 1245 bad values and 0 format errors
Cursored through 683 rows
698 changes: 698 additions & 0 deletions test/BaselineOutput/Common/SavePipe/SavePipeWithKey-Data.txt

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions test/BaselineOutput/Common/SavePipe/SavePipeWithKey-Schema.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---- BoundLoader ----
8 columns:
Label: Key<U1, 0-1>
Features: Vec<U2, 9>
Metadata 'SlotNames': Vec<Text, 9>: Length=9, Count=9
[0] 'thickness', [1] 'uniform_size', [2] 'uniform_shape', [3] 'adhesion', [4] 'epit_size', [5] 'bare_nuclei', [6] 'bland_chromatin', [7] 'normal_nucleoli', [8] 'mitoses'
A: Key<U1, 1-5>
B: Key<U1, 3-8>
C: Key<U4, 0-5>
D: Key<U1, 1-*>
E: Key<U4, 3-*>
F: Key<U1, 0-*>
---- RowToRowMapperTransform ----
10 columns:
Label: Key<U1, 0-1>
Features: Vec<U2, 9>
Metadata 'SlotNames': Vec<Text, 9>: Length=9, Count=9
[0] 'thickness', [1] 'uniform_size', [2] 'uniform_shape', [3] 'adhesion', [4] 'epit_size', [5] 'bare_nuclei', [6] 'bland_chromatin', [7] 'normal_nucleoli', [8] 'mitoses'
A: Key<U1, 1-5>
B: Key<U1, 3-8>
C: Key<U4, 0-5>
D: Key<U1, 1-*>
E: Key<U4, 3-*>
F: Key<U1, 0-*>
Label2: Key<U2, 0-1>
Features2: Vec<R4, 9>
Metadata 'SlotNames': Vec<Text, 9>: Length=9, Count=9
[0] 'thickness', [1] 'uniform_size', [2] 'uniform_shape', [3] 'adhesion', [4] 'epit_size', [5] 'bare_nuclei', [6] 'bland_chromatin', [7] 'normal_nucleoli', [8] 'mitoses'
77 changes: 75 additions & 2 deletions test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.Conversions;
using Microsoft.ML.Transforms.Text;
using System;
Expand Down Expand Up @@ -506,7 +508,7 @@ private bool VerifyMatch<TSrc, TDst>(TSrc src, TDst dst, ValueMapper<TSrc, TDst>
return false;
}

[Fact(Skip = "Fails until issue #1342 is resolved.")]
[Fact]
public void SavePipeNgramHash()
{
string pathData = GetDataPath("lm.sample.txt");
Expand All @@ -522,7 +524,7 @@ public void SavePipeNgramHash()
"xf=NgramHash{bits=6 col=HashNgram4:HashBig,Hash rehash+}",
"xf=NgramHash{bits=3 ngram=1 col={name=HashNgram5 src=Hash src=Hash} col={name=HashNgram6 src=Hash ord-}}",
"xf=NgramHash{bits=6 col=HashNgram7:HashBig,Hash rehash+ all- col={name=HashNgram8 src=Hash all+ ord-}}",
"xf=SelectColumns{keepcol=NgramHashOne keepcol=HashNgram1 keepcol=HashNgram2 keepcol=HashNgram3 keepcol=HashNgram4 keepcol=HashNgram5 keepcol=HashNgram6 keepcol=HashNgram7 keepcol=HashNgram8, hidden=-}",
"xf=SelectColumns{keepcol=NgramHashOne keepcol=HashNgram1 keepcol=HashNgram2 keepcol=HashNgram3 keepcol=HashNgram4 keepcol=HashNgram5 keepcol=HashNgram6 keepcol=HashNgram7 keepcol=HashNgram8 hidden=-}",
});

TestCore(null, true,
Expand Down Expand Up @@ -600,6 +602,77 @@ public void SavePipeWordHash()
Done();
}

[Fact]
public void SavePipeWithKey()
{
var dataPath = GetDataPath("breast-cancer-withheader.txt");
TestCore(dataPath, true,
new[] {
"loader=Text{header=+",
" col=Label:U1[0-1]:0",
" col=Features:U2:1-*",
" col=A:U1[1-5]:1",
" col=B:U1[3-8]:2",
" col=C:U4[0-5]:3",
" col=D:U1[1-*]:4",
" col=E:[3-*]:5",
" col=F:U1[0-*]:6",
"}",
"xf=Convert{col=Label2:U2[0-1]:Label col=Features2:Features type=Num}",
},

pipe =>
{
var argsText = new TextLoader.Arguments();
bool tmp = CmdParser.ParseArguments(Env,
" header=+" +
" col=Label:TX:0" +
" col=Features:TX:1-*" +
" col=A:TX:1" +
" col=B:TX:2" +
" col=C:TX:3" +
" col=D:TX:4" +
" col=E:TX:5" +
" col=F:TX:6",
argsText);
Check(tmp, "Parsing argsText failed!");
IDataView view2 = TextLoader.Create(Env, argsText, new MultiFileSource(dataPath));
var argsConv = new ConvertingTransform.Arguments();
tmp = CmdParser.ParseArguments(Env,
" col=Label:U1[0-1]:Label" +
" col=Features:U2:Features" +
" col=A:U1[1-5]:A" +
" col=B:U1[3-8]:B" +
" col=C:[0-5]:C" +
" col=D:U1[1-*]:D" +
" col=E" +
" col=F:U1[0-*]:F" +
" key={min=3}",
argsConv);
Check(tmp, "Parsing argsConv failed!");
view2 = ConvertingTransform.Create(Env, argsConv, view2);
argsConv = new ConvertingTransform.Arguments();
tmp = CmdParser.ParseArguments(Env,
" col=Label2:U2:Label col=Features2:Num:Features",
argsConv);
Check(tmp, "Parsing argsConv(2) failed!");
view2 = ConvertingTransform.Create(Env, argsConv, view2);
var colsChoose = new[] { "Label", "Features", "Label2", "Features2", "A", "B", "C", "D", "E", "F" };
IDataView view1 = SelectColumnsTransform.CreateKeep(Env, pipe, colsChoose);
view2 = SelectColumnsTransform.CreateKeep(Env, view2, colsChoose);
CheckSameValues(view1, view2);
},

logCurs: true);

Done();
}

[Fact]
public void TestHashTransformFloat()
{
Expand Down

0 comments on commit f222025

Please sign in to comment.