Skip to content

Commit

Permalink
Get a working build (#1)
Browse files Browse the repository at this point in the history
* Set missing executable bits

* Ignore tests that need a missing dataset

* Add missing data sets.  Disable tests for external data sets.

* Respond to PR feedback

* Reformat test data README file to use quoting block.
  • Loading branch information
sandyarmstrong authored and eerhardt committed May 4, 2018
1 parent f0e639a commit 76cb2cd
Show file tree
Hide file tree
Showing 23 changed files with 1,749 additions and 27 deletions.
2 changes: 0 additions & 2 deletions Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Tests", "test\
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "data", "data", "{FDA2FD2C-A708-43AC-A941-4D941B0853BF}"
ProjectSection(SolutionItems) = preProject
test\data\kc_house_data.csv = test\data\kc_house_data.csv
test\data\kc_house_test.csv = test\data\kc_house_test.csv
test\data\sentiment_data.tsv = test\data\sentiment_data.tsv
test\data\sentiment_test.tsv = test\data\sentiment_test.tsv
EndProjectSection
Expand Down
Empty file modified build.sh
100644 → 100755
Empty file.
Empty file modified init-tools.sh
100644 → 100755
Empty file.
Empty file modified run.sh
100644 → 100755
Empty file.
5 changes: 2 additions & 3 deletions src/Microsoft.ML.Core/Microsoft.ML.Core.csproj
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<IncludeInPackage>Microsoft.ML</IncludeInPackage>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<DefineConstants>CORECLR</DefineConstants>
</PropertyGroup>

<ItemGroup>
<Folder Include="CodeGeneration\" />
<Folder Include="Properties\" />
</ItemGroup>

Expand Down
Empty file modified src/Native/build.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ public void TestCrossValidationBinaryMacro()
}
}

[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public void TestCrossValidationMacro()
{
var dataPath = GetDataPath(@"housing.txt");
Expand Down
6 changes: 3 additions & 3 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ public void EntryPointEvaluateMultiClass()
Assert.Equal(3, CountRows(loader));
}

[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public void EntryPointEvaluateRegression()
{
var dataPath = GetDataPath("housing.txt");
Expand Down Expand Up @@ -887,7 +887,7 @@ public void EntryPointSDCAMultiClass()
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier");
}

[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public void EntryPointSDCARegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor");
Expand Down Expand Up @@ -961,7 +961,7 @@ public void EntryPointHogwildSGD()
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier");
}

[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public void EntryPointPoissonRegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor");
Expand Down
6 changes: 3 additions & 3 deletions test/Microsoft.ML.Predictor.Tests/TestDatasetInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public void InferSchemaCommandTest()
{
var datasets = new[]
{
GetDataPath(Path.Combine("..", "data", "sentiment_data.tsv"))
GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv"))
};

using (var env = new TlcEnvironment())
Expand Down Expand Up @@ -124,8 +124,8 @@ public void InferRecipesCommandTest()
var datasets = new Tuple<string, string>[]
{
Tuple.Create(
GetDataPath(Path.Combine("..", "data", "sentiment_data.tsv")),
GetDataPath(Path.Combine("..", "data", "sentiment_data_schema.txt")))
GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data.tsv")),
GetDataPath(Path.Combine("..", "data", "wikipedia-detox-250-line-data-schema.txt")))
};

using (var env = new TlcEnvironment())
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.Tests/PredictionModelTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class HousePricePrediction
public float Price;
}

[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public async Task ReadStrongTypeModelFromStream()
{
using (var memoryStream = new MemoryStream())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ namespace Microsoft.ML.Scenarios
{
public partial class Top5Scenarios
{
public const string SentimentDataPath = "sentiment_data.tsv";
public const string SentimentTestPath = "sentiment_test.tsv";
public const string SentimentDataPath = "wikipedia-detox-250-line-data.tsv";
public const string SentimentTestPath = "wikipedia-detox-250-line-test.tsv";

[Fact]
public void TrainAndPredictSentimentModelTest()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ A real-estate firm Contoso wants to add a house price prediction to their ASP.NE
The application will let users submit information about their house, and see a price they could expect if they put the house for sale.
Because real estate transaction data is public, Contoso has historical data they intend to use to train Machine Learning prediction engine.
*/
[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public async void PredictHousePriceModelTest()
{
string modelFilePath = GetOutputPath("PredictHousePriceModelTest.zip");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace Microsoft.ML.Scenarios
{
public partial class Top5Scenarios
{
[Fact]
[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
public void TrainAndPredictHousePriceModelTest()
{
string dataPath = GetDataPath("kc_house_data.csv");
Expand Down
587 changes: 587 additions & 0 deletions test/data/MNIST.Train.0-class.tiny.txt

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions test/data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
### Wikipedia Detox

Redistributing the dataset "wikipedia-detox-250-line-data.tsv" with attribution:

> Wulczyn, Ellery; Thain, Nithum; Dixon, Lucas (2016): Wikipedia Detox. figshare. https://doi.org/10.6084/m9.figshare.4054689
With modifications by taking a sample of rows and reducing rough language.

### UCI Adult Dataset

>Dua, D. and Karra Taniskidou, E. (2017). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
>
>https://archive.ics.uci.edu/ml/datasets/Adult
### Breast Cancer Wisconsin

Redistributing the dataset "breast-cancer.txt" with attribution:

> O. L. Mangasarian and W. H. Wolberg: "Cancer diagnosis via linear programming", SIAM News, Volume 23, Number 5, September 1990, pp 1 & 18.
>
> Original source: http://ftp.cs.wisc.edu:80/math-prog/cpo-dataset/machine-learn/cancer/cancer1/datacum
>
> Original readme: http://ftp.cs.wisc.edu/math-prog/cpo-dataset/machine-learn/cancer/cancer1/data.doc
### MNIST

> MNIST data originally from [NIST](https://www.nist.gov) and modified by Chris Burges, Corinna Cortes, and Yann LeCun. http://yann.lecun.com/exdb/mnist/
>
> More information: https://en.wikipedia.org/wiki/MNIST_database
Loading

0 comments on commit 76cb2cd

Please sign in to comment.