-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move DataFrame to machinelearning #5641
Changes from 50 commits
df1661b
f09c30c
afe3e61
5c4b1b4
5de343c
81f3d42
c6eb2f7
7cee9d9
e64cbad
303ba62
dc4f9b0
838350b
0fa210d
430ac09
70bb9e9
82c315f
afdbc5b
355d3fb
9e10004
1544c23
8d7fb66
7ef10ba
4072f96
a6c34d0
9c80608
d120982
59df417
d79dd2f
28140bd
5c3ac8b
0bef531
b215eb4
3b4aafa
8d08434
7dcf184
881886b
6e60307
6c2d800
7ebe8bc
54633a2
4e6d801
81d0ba5
db5c49e
cb7ab00
cff30e3
e7a9c42
881c619
1c1c3a8
fea6bd2
0b8541a
bf82179
9d74a83
fa39b74
216554a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Threading.Tasks; | ||
using Microsoft.AspNetCore.Html; | ||
using Microsoft.DotNet.Interactive; | ||
using Microsoft.DotNet.Interactive.Formatting; | ||
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; | ||
|
||
namespace Microsoft.Data.Analysis.Interactive | ||
{ | ||
public class DataFrameKernelExtension : IKernelExtension | ||
{ | ||
public Task OnLoadAsync(Kernel kernel) | ||
{ | ||
RegisterDataFrame(); | ||
|
||
return Task.CompletedTask; | ||
} | ||
|
||
public static void RegisterDataFrame() | ||
{ | ||
Formatter<DataFrame>.Register((df, writer) => | ||
{ | ||
const int MAX = 10000; | ||
const int SIZE = 10; | ||
|
||
var uniqueId = DateTime.Now.Ticks; | ||
|
||
var header = new List<IHtmlContent> | ||
{ | ||
th(i("index")) | ||
}; | ||
header.AddRange(df.Columns.Select(c => (IHtmlContent)th(c.Name))); | ||
|
||
if (df.Rows.Count > SIZE) | ||
{ | ||
var maxMessage = df.Rows.Count > MAX ? $" (showing a max of {MAX} rows)" : string.Empty; | ||
var title = h3[style: "text-align: center;"]($"DataFrame - {df.Rows.Count} rows {maxMessage}"); | ||
|
||
// table body | ||
var maxRows = Math.Min(MAX, df.Rows.Count); | ||
var rows = new List<List<IHtmlContent>>(); | ||
for (var index = 0; index < maxRows; index++) | ||
{ | ||
var cells = new List<IHtmlContent> | ||
{ | ||
td(i((index))) | ||
}; | ||
foreach (var obj in df.Rows[index]) | ||
{ | ||
cells.Add(td(obj)); | ||
} | ||
rows.Add(cells); | ||
} | ||
|
||
//navigator | ||
var footer = new List<IHtmlContent>(); | ||
BuildHideRowsScript(uniqueId); | ||
|
||
var paginateScriptFirst = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, 0) + BuildPageScript(uniqueId, SIZE); | ||
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptFirst]("⏮")); | ||
|
||
var paginateScriptPrevTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE); | ||
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrevTen]("⏪")); | ||
|
||
var paginateScriptPrev = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE); | ||
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrev]("◀️")); | ||
|
||
footer.Add(b[style: "margin: 2px;"]("Page")); | ||
footer.Add(b[id: $"page_{uniqueId}", style: "margin: 2px;"]("1")); | ||
|
||
var paginateScriptNext = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE); | ||
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNext]("▶️")); | ||
|
||
var paginateScriptNextTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE); | ||
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNextTen]("⏩")); | ||
|
||
var paginateScriptLast = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE); | ||
footer.Add(button[style: "margin: 2px;", onclick: paginateScriptLast]("⏭️")); | ||
|
||
//table | ||
var t = table[id: $"table_{uniqueId}"]( | ||
caption(title), | ||
thead(tr(header)), | ||
tbody(rows.Select(r => tr[style: "display: none"](r))), | ||
tfoot(tr(td[colspan: df.Columns.Count + 1, style: "text-align: center;"](footer))) | ||
); | ||
writer.Write(t); | ||
|
||
//show first page | ||
writer.Write($"<script>{BuildPageScript(uniqueId, SIZE)}</script>"); | ||
} | ||
else | ||
{ | ||
var rows = new List<List<IHtmlContent>>(); | ||
for (var index = 0; index < df.Rows.Count; index++) | ||
{ | ||
var cells = new List<IHtmlContent> | ||
{ | ||
td(i((index))) | ||
}; | ||
foreach (var obj in df.Rows[index]) | ||
{ | ||
cells.Add(td(obj)); | ||
} | ||
rows.Add(cells); | ||
} | ||
|
||
//table | ||
var t = table[id: $"table_{uniqueId}"]( | ||
thead(tr(header)), | ||
tbody(rows.Select(r => tr(r))) | ||
); | ||
writer.Write(t); | ||
} | ||
}, "text/html"); | ||
} | ||
|
||
private static string BuildHideRowsScript(long uniqueId) | ||
{ | ||
var script = $"var allRows = document.querySelectorAll('#table_{uniqueId} tbody tr:nth-child(n)'); "; | ||
script += "for (let i = 0; i < allRows.length; i++) { allRows[i].style.display='none'; } "; | ||
return script; | ||
} | ||
|
||
private static string BuildPageScript(long uniqueId, int size) | ||
{ | ||
var script = $"var page = parseInt(document.querySelector('#page_{uniqueId}').innerHTML) - 1; "; | ||
script += $"var pageRows = document.querySelectorAll(`#table_{uniqueId} tbody tr:nth-child(n + ${{page * {size} + 1 }})`); "; | ||
script += $"for (let j = 0; j < {size}; j++) {{ pageRows[j].style.display='table-row'; }} "; | ||
return script; | ||
} | ||
|
||
private static string GotoPageIndex(long uniqueId, long page) | ||
{ | ||
var script = $"document.querySelector('#page_{uniqueId}').innerHTML = {page + 1}; "; | ||
return script; | ||
} | ||
|
||
private static string UpdatePageIndex(long uniqueId, int step, long maxPage) | ||
{ | ||
var script = $"var page = parseInt(document.querySelector('#page_{uniqueId}').innerHTML) - 1; "; | ||
script += $"page = parseInt(page) + parseInt({step}); "; | ||
script += $"page = page < 0 ? 0 : page; "; | ||
script += $"page = page > {maxPage} ? {maxPage} : page; "; | ||
script += $"document.querySelector('#page_{uniqueId}').innerHTML = page + 1; "; | ||
return script; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>netcoreapp3.1</TargetFramework> | ||
<IsPackable>false</IsPackable> | ||
<NoWarn>$(NoWarn);MSML_ParameterLocalVarName;SA1028</NoWarn> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SA1028 is about code comments not formatting correctly. That seems like something we should be able to fix (either here or in a follow up PR) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup. My plan is to do it in a follow up PR. SA1028 and a bunch of the other warnings are easily fixable |
||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.0.0"> | ||
pgovind marked this conversation as resolved.
Show resolved
Hide resolved
|
||
<PrivateAssets>all</PrivateAssets> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
pgovind marked this conversation as resolved.
Show resolved
Hide resolved
|
||
</PackageReference> | ||
<PackageReference Include="Microsoft.DotNet.Interactive" Version="1.0.0-beta.20410.1" /> | ||
<PackageReference Include="Microsoft.DotNet.Interactive.Formatting" Version="1.0.0-beta.20410.1" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\Microsoft.Data.Analysis\Microsoft.Data.Analysis.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tabs hurt my eyes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ugh, this setting must be mis-matched locally on my machine between VSCode and VS :\ Will fix in a bit
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Out of curiosity, how did you see tabs in this file in GH? I changed all tabs to spaces (no idea how tabs occurred in the first place) in 4 csprojs in the PR, but unfortunately there is no "Change all tabs to spaces in all files" in VS I think. For now, a solution wide regex search yields no tabs in source code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looked too indented then I saw it while trying to select white space.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I use CodeFlow to review PRs, and you can tell it to show whitespace. Arrows are tabs, dots are spaces.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Has codeflow been fixed to work better with Github comment threads? That was a deal breaker in the past.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, it still has some issues with comment threads. Typically I only create new conversations from CodeFlow. And respond to threads in GH. I usually have them both open on 2 different windows. I just like seeing the full file, searching across the change, seeing the tree view of files, etc in CodeFlow. It's a much better experience for me.