diff --git a/.scripts/restore.cmd b/.scripts/restore.cmd index 86f2e802031..be6ca30f364 100644 --- a/.scripts/restore.cmd +++ b/.scripts/restore.cmd @@ -41,6 +41,10 @@ ECHO Restoring "%vwRoot%\vowpalwabbit\packages.config" "%nugetPath%" restore -o "%vwRoot%\vowpalwabbit\packages" "%vwRoot%\vowpalwabbit\packages.config" ECHO. +ECHO Restoring "%vwRoot%\cs\examples\simulator\packages.config" +"%nugetPath%" restore -o "%vwRoot%\vowpalwabbit\packages" "%vwRoot%\cs\examples\simulator\packages.config" +ECHO. + POPD ENDLOCAL \ No newline at end of file diff --git a/cs/examples/simulator/App.config b/cs/examples/simulator/App.config new file mode 100644 index 00000000000..2ae8254d305 --- /dev/null +++ b/cs/examples/simulator/App.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/cs/examples/simulator/Program.cs b/cs/examples/simulator/Program.cs new file mode 100644 index 00000000000..f709a6d8c96 --- /dev/null +++ b/cs/examples/simulator/Program.cs @@ -0,0 +1,77 @@ +using System; + +namespace simulator +{ + class Program + { + private static readonly string help_string = "usage: simulator initial_random tot_iter mod_iter reward_seed vw_seed exp_iter num_contexts num_actions ml_args_snips"; + + static void Main(string[] args) + { + string ml_args = args[0] + " --quiet"; + + int initial_random; + int tot_iter; + int mod_iter; + int reward_seed; + ulong vw_seed; + int exp_iter; + int num_contexts; + int num_actions; + + if (!int.TryParse(args[1], out initial_random)) + { + Console.WriteLine(help_string); + return; + } + + if (!int.TryParse(args[2], out tot_iter)) + { + Console.WriteLine(help_string); + return; + } + + if (!int.TryParse(args[3], out mod_iter)) + { + Console.WriteLine(help_string); + return; + } + + if (!int.TryParse(args[4], out reward_seed)) + { + Console.WriteLine(help_string); + return; + } + + if (!ulong.TryParse(args[5], out vw_seed)) + { + Console.WriteLine(help_string); + return; + } + + if (!int.TryParse(args[6], out exp_iter)) + { + Console.WriteLine(help_string); + return; + } + + if (!int.TryParse(args[7], out num_contexts)) + { + Console.WriteLine(help_string); + return; + } + + if (!int.TryParse(args[8], out num_actions)) + { + Console.WriteLine(help_string); + return; + } + + string ml_args_snips = "--cb_explore_adf --epsilon .05 --cb_type mtr -l 1e-8 --power_t 0 --quiet"; + if (args.Length > 9) + ml_args_snips = args[9] + " --quiet"; + + VowpalWabbitSimulator.Run(ml_args, initial_random, tot_iter, mod_iter, reward_seed, vw_seed, exp_iter, num_contexts, num_actions, ml_args_snips); + } + } +} diff --git a/cs/examples/simulator/Properties/AssemblyInfo.cs b/cs/examples/simulator/Properties/AssemblyInfo.cs new file mode 100644 index 00000000000..943b306d66e --- /dev/null +++ b/cs/examples/simulator/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("simulator")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("simulator")] +[assembly: AssemblyCopyright("Copyright © 2019")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("2a9c6717-3b6c-4db7-a626-16c04fcfeccf")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/cs/examples/simulator/VowpalWabbitSimulator.cs b/cs/examples/simulator/VowpalWabbitSimulator.cs new file mode 100644 index 00000000000..c7d512ab479 --- /dev/null +++ b/cs/examples/simulator/VowpalWabbitSimulator.cs @@ -0,0 +1,202 @@ +//using Microsoft.Research.MultiWorldTesting.ExploreLibrary; +using Newtonsoft.Json; +using System; +using System.IO; +using System.Linq; +using System.Text; +using VW; +using VW.Labels; + +namespace simulator +{ + public static class VowpalWabbitSimulator + { + public class SimulatorExample + { + private readonly int length; + + private readonly byte[] exampleBuffer; + + public float[] PDF { get; } + + public SimulatorExample(int numActions, int sharedContext) + { + // generate distinct per user context with 2 seperate prefered actions + this.PDF = Enumerable.Range(0, numActions).Select(_ => 0.005f).ToArray(); + this.PDF[sharedContext] = 0.03f; + + this.exampleBuffer = new byte[32 * 1024]; + + var str = JsonConvert.SerializeObject( + new + { + Version = "1", + EventId = "1", // can be ignored + a = Enumerable.Range(1, numActions).ToArray(), + c = new + { + // shared user context + U = new { C = sharedContext.ToString() }, + _multi = Enumerable + .Range(0, numActions) + .Select(i => new { A = new { Constant = 1, Id = i.ToString() }, B = new { Id = i.ToString() } }) + .ToArray() + }, + p = Enumerable.Range(0, numActions).Select(i => 0.0f).ToArray() + }); + + Console.WriteLine(str); + + // allow for \0 at the end + this.length = Encoding.UTF8.GetBytes(str, 0, str.Length, exampleBuffer, 0); + exampleBuffer[this.length] = 0; + this.length++; + } + + public VowpalWabbitMultiLineExampleCollection CreateExample(VowpalWabbit vw) + { + VowpalWabbitDecisionServiceInteractionHeader header; + var examples = vw.ParseDecisionServiceJson(this.exampleBuffer, 0, this.length, true, out header); + + var adf = new VowpalWabbitExample[examples.Count - 1]; + examples.CopyTo(1, adf, 0, examples.Count - 1); + + return new VowpalWabbitMultiLineExampleCollection(vw, examples[0], adf); + } + } + + private static void ExportScoringModel(VowpalWabbit learner, ref VowpalWabbit scorer) + { + scorer?.Dispose(); + using (var memStream = new MemoryStream()) + { + learner.SaveModel(memStream); + + memStream.Seek(0, SeekOrigin.Begin); + + // Note: the learner doesn't use save-resume as done online + scorer = new VowpalWabbit(new VowpalWabbitSettings { Arguments = "--quiet", ModelStream = memStream }); + } + } + + public static void Run(string ml_args, int initial_random, int tot_iter, int mod_iter, int rewardSeed, ulong vwSeed, int exp_iter, int numContexts, int numActions, string ml_args_snips) + { + // byte buffer outside so one can change the example and keep the memory around + var exampleBuffer = new byte[32 * 1024]; + + var randGen = new Random(rewardSeed); + var userGen = new Random(); + + var simExamples = Enumerable.Range(0, numContexts) + .Select(i => new SimulatorExample(numActions, i)) + .ToArray(); + + var scorerPdf = new float[numActions]; + var histPred = new int[numActions, numContexts]; + var histPred2 = new int[numActions, numContexts]; + var histActions = new int[numActions, numContexts]; + var histCost = new int[numActions, numContexts]; + var histContext = new int[numContexts]; + int clicks = 0; + double snips_num = 0, snips_den = 0; + + using (var learner = new VowpalWabbit(ml_args)) + using (var learner2 = new VowpalWabbit(ml_args_snips)) + { + VowpalWabbit scorer = null; + + scorer = new VowpalWabbit("--cb_explore_adf --epsilon 1 --quiet"); + for (int i = 1; i <= tot_iter; i++) + { + // sample uniform among users + int userIndex = userGen.Next(simExamples.Length); + var simExample = simExamples[userIndex]; + var pdf = simExample.PDF; + + histContext[userIndex]++; + + using (var ex = simExample.CreateExample(learner)) + { + var scores = ex.Predict(VowpalWabbitPredictionType.ActionProbabilities, scorer); + + var total = 0.0; + + foreach (var actionScore in scores) + { + total += actionScore.Score; + scorerPdf[actionScore.Action] = actionScore.Score; + } + + var draw = randGen.NextDouble() * total; + var sum = 0.0; + uint topAction = 0; + foreach (var actionScore in scores) + { + sum += actionScore.Score; + if(sum > draw) + { + topAction = actionScore.Action; + break; + } + } + + int modelAction = (int)scores[0].Action; + if (i > initial_random) + histPred[modelAction, userIndex] += 1; + histActions[topAction, userIndex] += 1; + + // simulate behavior + float cost = 0; + if (randGen.NextDouble() < pdf[topAction]) + { + cost = -1; + histCost[topAction, userIndex] += 1; + clicks += 1; + } + + ex.Examples[topAction].Label = new ContextualBanditLabel((uint)topAction, cost, scorerPdf[topAction]); + + // simulate delay + if (i >= initial_random && (i % exp_iter == 0)) + { + ExportScoringModel(learner, ref scorer); + } + + // invoke learning + var oneStepAheadScores = ex.Learn(VowpalWabbitPredictionType.ActionProbabilities, learner); + histPred2[oneStepAheadScores[0].Action, userIndex] += 1; + + var oneStepAheadScores2 = ex.Learn(VowpalWabbitPredictionType.ActionProbabilities, learner2); + + // SNIPS + snips_num -= oneStepAheadScores2.First(f => f.Action == topAction).Score * cost / scorerPdf[topAction]; + snips_den += oneStepAheadScores2.First(f => f.Action == topAction).Score / scorerPdf[topAction]; + + if (i % mod_iter == 0 || i == tot_iter) + { + Console.WriteLine(JsonConvert.SerializeObject(new + { + Iter = i, + clicks, + CTR = clicks / (float)i, + aveLoss = learner.PerformanceStatistics.AverageLoss, + CTR_snips = snips_num / snips_den, + CTR_ips = snips_num / (float)i, + aveLoss2 = learner2.PerformanceStatistics.AverageLoss, + snips_num, + snips_den, + histActions, + histPred, + histCost, + histContext, + pdf + })); + } + } + } + Console.WriteLine("---------------------"); + scorer?.Dispose(); + } + } + } +} diff --git a/cs/examples/simulator/packages.config b/cs/examples/simulator/packages.config new file mode 100644 index 00000000000..91071aff5d1 --- /dev/null +++ b/cs/examples/simulator/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/cs/examples/simulator/simulator.csproj b/cs/examples/simulator/simulator.csproj new file mode 100644 index 00000000000..960dd7a2ed3 --- /dev/null +++ b/cs/examples/simulator/simulator.csproj @@ -0,0 +1,100 @@ + + + + + Debug + AnyCPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF} + Exe + Properties + simulator + simulator + v4.5.2 + 512 + true + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + + ..\..\..\vowpalwabbit\packages\Newtonsoft.Json.12.0.1\lib\net45\Newtonsoft.Json.dll + True + + + + + + + + + + + + + + + + + + + + + + {85e55ae0-3784-4968-9271-c81af560e1c1} + vw_clr + + + {e621e022-c1f8-433f-905a-ab9a3de072b7} + vw_common + + + {e4e962ae-7056-4eb0-a8c5-8dc824a4b068} + cs + + + + + \ No newline at end of file diff --git a/vowpalwabbit/vw.sln b/vowpalwabbit/vw.sln index 95dfc17b097..9a735910328 100644 --- a/vowpalwabbit/vw.sln +++ b/vowpalwabbit/vw.sln @@ -57,6 +57,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "python27", "..\python\windo EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vw_unit_test", "..\test\unit_test\unit_test.vcxproj", "{E02E3869-D9AD-4513-B352-93F90B7D6FE3}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "simulator", "..\cs\examples\simulator\simulator.csproj", "{2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -230,6 +232,18 @@ Global {E02E3869-D9AD-4513-B352-93F90B7D6FE3}.Release|x64.Build.0 = Release|x64 {E02E3869-D9AD-4513-B352-93F90B7D6FE3}.Release|x86.ActiveCfg = Release|Win32 {E02E3869-D9AD-4513-B352-93F90B7D6FE3}.Release|x86.Build.0 = Release|Win32 + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Debug|x64.ActiveCfg = Debug|x64 + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Debug|x64.Build.0 = Debug|x64 + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Debug|x86.ActiveCfg = Debug|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Debug|x86.Build.0 = Debug|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Release|Any CPU.Build.0 = Release|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Release|x64.ActiveCfg = Release|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Release|x64.Build.0 = Release|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Release|x86.ActiveCfg = Release|Any CPU + {2A9C6717-3B6C-4DB7-A626-16C04FCFECCF}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -238,7 +252,7 @@ Global {7D381808-CC76-4A1A-A8E6-3F71278858BB} = {5AF0ECDA-A523-4F8E-8537-E741983119CC} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {14CFCE53-7467-4021-BD9A-978C2FC5BCC8} EnterpriseLibraryConfigurationToolBinariesPathV6 = packages\EnterpriseLibrary.TransientFaultHandling.6.0.1304.0\lib\portable-net45+win+wp8 + SolutionGuid = {14CFCE53-7467-4021-BD9A-978C2FC5BCC8} EndGlobalSection EndGlobal