Skip to content

Commit

Permalink
Logging support in CLI + Implementation of cmd args [--name,--output,…
Browse files Browse the repository at this point in the history
…--verbosity] (dotnet#121)

* addded logging and helper methods

* fixing code after merge

* added resx files, added logger framework, added logging messages

* added new options

* added spacing

* minor fixes

* change command description

* rename option, add headers, include new param in test

* formatted

* build fix

*  changed option name

* Added NlogConfig file

* added back config package

* fix tests
  • Loading branch information
srsaggam authored and Dmitry-A committed Aug 22, 2019
1 parent e71ab96 commit bd43894
Show file tree
Hide file tree
Showing 15 changed files with 589 additions and 41 deletions.
9 changes: 5 additions & 4 deletions src/mlnet.Test/CommandLineTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public void TestCommandLineArgs()

// Create handler outside so that commandline and the handler is decoupled and testable.
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
(trainDataset, testDataset, validationDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
{
parsingSuccessful = true;
});
Expand All @@ -43,7 +43,7 @@ public void TestCommandLineArgsFailTest()

// Create handler outside so that commandline and the handler is decoupled and testable.
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
(trainDataset, testDataset, validationDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
{
parsingSuccessful = true;
});
Expand Down Expand Up @@ -87,13 +87,14 @@ public void TestCommandLineArgsValuesTest()

// Create handler outside so that commandline and the handler is decoupled and testable.
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
{
parsingSuccessful = true;
Assert.AreEqual(mlTask, TaskKind.BinaryClassification);
Assert.AreEqual(trainDataset, file1);
Assert.AreEqual(testDataset, file2);
Assert.AreEqual(labelColumnName, labelName);
Assert.AreEqual(maxExplorationTime, 5);
});

var parser = new CommandLineBuilder()
Expand All @@ -103,7 +104,7 @@ public void TestCommandLineArgsValuesTest()
.Build();

// Incorrect mltask test
string[] args = new[] { "new", "--ml-task", "BinaryClassification", "--train-dataset", file1, "--label-column-name", labelName, "--test-dataset", file2 };
string[] args = new[] { "new", "--ml-task", "BinaryClassification", "--train-dataset", file1, "--label-column-name", labelName, "--test-dataset", file2, "--max-exploration-time", "5" };
parser.InvokeAsync(args).Wait();
File.Delete(file1);
File.Delete(file2);
Expand Down
12 changes: 6 additions & 6 deletions src/mlnet/CodeGenerator/CodeGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
using System.Linq;
using System.Text;
using Microsoft.ML.Auto;
using Microsoft.ML.Data;
using mlnet.Templates;
using static Microsoft.ML.Data.TextLoader;

Expand Down Expand Up @@ -80,13 +79,14 @@ internal void GenerateOutput()
var trainScoreCode = codeGen.TransformText();
var projectSourceCode = csProjGenerator.TransformText();
var consoleHelperCode = consoleHelper.TransformText();
if (!Directory.Exists("./BestModel"))
var outputFolder = Path.Combine(options.OutputBaseDir, options.OutputName);
if (!Directory.Exists(outputFolder))
{
Directory.CreateDirectory("./BestModel");
Directory.CreateDirectory(outputFolder);
}
File.WriteAllText("./BestModel/Train.cs", trainScoreCode);
File.WriteAllText("./BestModel/MyML.csproj", projectSourceCode);
File.WriteAllText("./BestModel/ConsoleHelper.cs", consoleHelperCode);
File.WriteAllText($"{outputFolder}/Train.cs", trainScoreCode);
File.WriteAllText($"{outputFolder}/{options.OutputName}.csproj", projectSourceCode);
File.WriteAllText($"{outputFolder}/ConsoleHelper.cs", consoleHelperCode);
}

internal IList<(string, string)> GenerateTransformsAndUsings()
Expand Down
9 changes: 5 additions & 4 deletions src/mlnet/CodeGenerator/CodeGeneratorOptions.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.IO;
using Microsoft.ML.Auto;

namespace Microsoft.ML.CLI
{
internal class CodeGeneratorOptions
{
internal string OutputName { get; set; }

internal string OutputBaseDir { get; set; }

internal FileInfo TrainDataset { get; set; }

internal FileInfo TestDataset { get; set; }
Expand Down
33 changes: 29 additions & 4 deletions src/mlnet/Commands/CommandDefinitions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.CommandLine;
using System.CommandLine.Builder;
using System.CommandLine.Invocation;
Expand All @@ -18,13 +19,17 @@ internal static System.CommandLine.Command New(ICommandHandler handler)
{
var newCommand = new System.CommandLine.Command("new", "ML.NET CLI tool for code generation", handler: handler)
{
//Dataset(),
TrainDataset(),
ValidationDataset(),
TestDataset(),
MlTask(),
LabelName(),
Timeout(),
LabelColumnIndex()
MaxExplorationTime(),
LabelColumnIndex(),
Verbosity(),
Name(),
OutputBaseDir()
};

newCommand.Argument.AddValidator((sym) =>
Expand All @@ -46,6 +51,9 @@ internal static System.CommandLine.Command New(ICommandHandler handler)

return newCommand;

/*Option Dataset() =>
new Option("--dataset", "Dataset file path.",
new Argument<FileInfo>().ExistingOnly()); */

Option TrainDataset() =>
new Option("--train-dataset", "Train dataset file path.",
Expand All @@ -71,15 +79,32 @@ Option LabelColumnIndex() =>
new Option("--label-column-index", "Index of the label column.",
new Argument<uint>());

Option Timeout() =>
new Option("--timeout", "Timeout in seconds for exploring models.",
Option MaxExplorationTime() =>
new Option("--max-exploration-time", "Timeout in seconds for exploring models.",
new Argument<uint>(defaultValue: 10));

Option Verbosity() =>
new Option(new List<string>() { "--verbosity" }, "Verbosity of the output to be shown by the tool.",
new Argument<string>(defaultValue: "m").WithSuggestions(GetVerbositySuggestions()));

Option Name() =>
new Option(new List<string>() { "--name" }, "Name of the output files(project and folder).",
new Argument<string>(defaultValue: "Sample"));

Option OutputBaseDir() =>
new Option(new List<string>() { "--output" }, "Output folder path.",
new Argument<string>(defaultValue: ".\\Sample"));

}

private static string[] GetMlTaskSuggestions()
{
return Enum.GetValues(typeof(TaskKind)).Cast<TaskKind>().Select(v => v.ToString()).ToArray();
}

private static string[] GetVerbositySuggestions()
{
return new[] { "q", "m", "diag" };
}
}
}
68 changes: 51 additions & 17 deletions src/mlnet/Commands/NewCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.Data.DataView;
using Microsoft.ML.Auto;
using Microsoft.ML.Core.Data;
using Microsoft.ML.Data;
using mlnet;
using mlnet.Utilities;
using NLog;

namespace Microsoft.ML.CLI
{
internal class NewCommand
{
private Options options;
private static Logger logger = LogManager.GetCurrentClassLogger();

internal NewCommand(Options options)
{
Expand All @@ -26,12 +29,13 @@ internal void Run()
{
if (options.MlTask == TaskKind.MulticlassClassification)
{
Console.WriteLine($"Unsupported ml-task: {options.MlTask}");
Console.WriteLine($"{Strings.UnsupportedMlTask}: {options.MlTask}");
}

var context = new MLContext();

//Check what overload method of InferColumns needs to be called.
logger.Log(LogLevel.Info, Strings.InferColumns);
(TextLoader.Arguments TextLoaderArgs, IEnumerable<(string Name, ColumnPurpose Purpose)> ColumnPurpopses) columnInference = default((TextLoader.Arguments TextLoaderArgs, IEnumerable<(string Name, ColumnPurpose Purpose)> ColumnPurpopses));
if (options.LabelName != null)
{
Expand All @@ -42,50 +46,80 @@ internal void Run()
columnInference = context.Data.InferColumns(options.TrainDataset.FullName, options.LabelIndex, groupColumns: false);
}

logger.Log(LogLevel.Info, Strings.CreateDataLoader);
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderArgs);

logger.Log(LogLevel.Info, Strings.LoadData);
IDataView trainData = textLoader.Read(options.TrainDataset.FullName);
IDataView validationData = options.ValidationDataset == null ? null : textLoader.Read(options.ValidationDataset.FullName);

//Explore the models
Pipeline pipeline = null;
var result = ExploreModels(context, trainData, validationData, pipeline);
(Pipeline, ITransformer) result = default;
Console.WriteLine($"{Strings.ExplorePipeline}: {options.MlTask}");
try
{
result = ExploreModels(context, trainData, validationData);
}
catch (Exception e)
{
logger.Log(LogLevel.Error, $"{Strings.ExplorePipelineException}:");
logger.Log(LogLevel.Error, e.StackTrace);
logger.Log(LogLevel.Error, Strings.Exiting);
return;
}

//Get the best pipeline
Pipeline pipeline = null;
pipeline = result.Item1;
var model = result.Item2;

//Save the model
logger.Log(LogLevel.Info, Strings.SavingBestModel);
var modelPath = Path.Combine(@options.OutputBaseDir, options.OutputName);
SaveModel(model, modelPath, $"{options.OutputName}_model.zip", context);


//Generate code
var codeGenerator = new CodeGenerator(pipeline, columnInference, new CodeGeneratorOptions() { TrainDataset = options.TrainDataset, MlTask = options.MlTask, TestDataset = options.TestDataset });
logger.Log(LogLevel.Info, Strings.GenerateProject);
var codeGenerator = new CodeGenerator(
pipeline,
columnInference,
new CodeGeneratorOptions()
{
TrainDataset = options.TrainDataset,
MlTask = options.MlTask,
TestDataset = options.TestDataset,
OutputName = options.OutputName,
OutputBaseDir = options.OutputBaseDir
});
codeGenerator.GenerateOutput();

//Save the model
SaveModel(model, @"./BestModel", "model.zip", context);
}

private (Pipeline, ITransformer) ExploreModels(
MLContext context,
IDataView trainData,
IDataView validationData,
Pipeline pipeline)
IDataView validationData)
{
ITransformer model = null;
string label = options.LabelName ?? "Label"; // It is guaranteed training dataview to have Label column
Pipeline pipeline = null;

if (options.MlTask == TaskKind.BinaryClassification)
{
var result = context.BinaryClassification.AutoFit(trainData, label, validationData, options.Timeout);
result = result.OrderByDescending(t => t.Metrics.Accuracy).ToList();
var bestIteration = result.FirstOrDefault();
var progressReporter = new ProgressHandlers.BinaryClassificationHandler();
var result = context.BinaryClassification.AutoFit(trainData, label, validationData, options.Timeout, progressCallback: progressReporter);
logger.Log(LogLevel.Info, Strings.RetrieveBestPipeline);
var bestIteration = result.Best();
pipeline = bestIteration.Pipeline;
model = bestIteration.Model;
}

if (options.MlTask == TaskKind.Regression)
{
var result = context.Regression.AutoFit(trainData, label, validationData, options.Timeout);
result = result.OrderByDescending(t => t.Metrics.RSquared).ToList();
var bestIteration = result.FirstOrDefault();
var progressReporter = new ProgressHandlers.RegressionHandler();
var result = context.Regression.AutoFit(trainData, label, validationData, options.Timeout, progressCallback: progressReporter);
logger.Log(LogLevel.Info, Strings.RetrieveBestPipeline);
var bestIteration = result.Best();
pipeline = bestIteration.Pipeline;
model = bestIteration.Model;
}
Expand All @@ -105,7 +139,7 @@ private static void SaveModel(ITransformer model, string ModelPath, string model
{
Directory.CreateDirectory(ModelPath);
}
ModelPath = ModelPath + "/" + modelName;
ModelPath = Path.Combine(ModelPath, modelName);
using (var fs = File.Create(ModelPath))
model.SaveTo(mlContext, fs);
}
Expand Down
4 changes: 4 additions & 0 deletions src/mlnet/Data/Options.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ namespace Microsoft.ML.CLI
{
internal class Options
{
internal string OutputName { get; set; }

internal string Name { get; set; }

internal FileInfo Dataset { get; set; }
Expand All @@ -27,5 +29,7 @@ internal class Options

internal uint Timeout { get; set; }

internal string OutputBaseDir { get; set; }

}
}
13 changes: 13 additions & 0 deletions src/mlnet/NLog.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8" ?>
<nlog xmlns="http://www.nlog-project.org/schemas/NLog.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

<targets>
<target name="logfile" xsi:type="File" fileName="debug_log.txt" />
<target name="logconsole" xsi:type="Console" layout="${message}" />
</targets>

<rules>
<logger name="*" minlevel="Debug" writeTo="logfile" />
</rules>
</nlog>
33 changes: 29 additions & 4 deletions src/mlnet/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.CommandLine.Builder;
using System.CommandLine.Invocation;
using System.IO;
using Microsoft.ML.Auto;
using NLog;
using NLog.Config;
using NLog.Targets;

namespace Microsoft.ML.CLI
{
Expand All @@ -16,18 +18,41 @@ public static void Main(string[] args)
{
// Create handler outside so that commandline and the handler is decoupled and testable.
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
{
/* The below variables needs to be initialized via command line api. Since there is a
restriction at this moment on the number of args and its bindings. .Net team is working
on making this feature to make it possible to bind directly to a type till them we shall
have this place holder by initializing the fields below .
The PR that addresses this issue : https://github.com/dotnet/command-line-api/pull/408
*/
var basedir = "."; // This needs to be obtained from command line args.
var name = "Sample"; // This needs to be obtained from command line args.
// Todo: q,m,diag needs to be mapped into LogLevel here.
var verbosity = LogLevel.Info;
var command = new NewCommand(new Options()
{
TrainDataset = trainDataset,
ValidationDataset = validationDataset,
TestDataset = testDataset,
MlTask = mlTask,
LabelName = labelColumnName,
Timeout = timeout,
LabelIndex = labelColumnIndex
Timeout = maxExplorationTime,
LabelIndex = labelColumnIndex,
OutputBaseDir = basedir,
OutputName = name
});
// Override the Logger Configuration
var logconsole = LogManager.Configuration.FindTargetByName("logconsole");
var logfile = (FileTarget)LogManager.Configuration.FindTargetByName("logfile");
logfile.FileName = $"{basedir}/debug_log.txt";
var config = LogManager.Configuration;
config.AddRule(verbosity, LogLevel.Fatal, logconsole);
// Run the command
command.Run();
});

Expand Down
Loading

0 comments on commit bd43894

Please sign in to comment.