Skip to content

Commit

Permalink
Various updates
Browse files Browse the repository at this point in the history
Various updates, check for empty field names bugfix, make it easier to enter fixed width column positions, for issue #46 render skipped lines correctly
  • Loading branch information
BdR76 committed Apr 9, 2023
1 parent c144219 commit 6ffc6f3
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 43 deletions.
9 changes: 5 additions & 4 deletions CSVLintNppPlugin/CsvLint/CsvAnalyze.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using CSVLint.Tools;
using CsvQuery.PluginInfrastructure;
using Kbg.NppPluginNET.PluginInfrastructure;
using System.Diagnostics;

namespace CSVLint
{
Expand Down Expand Up @@ -315,7 +316,7 @@ public static CsvDefinition InferFromData(bool autodetect, char mansep, string m
foundfieldWidths.Sort();
if (foundfieldWidths.Count < 3) return result; // unlikely fixed width

// widths contain line positions, convert to individual column widths, example pos [8, 14, 15, 22, 25] -> widths [8, 6, 1, 7, 3]
// widths now contain column end positions, convert to individual column widths, example pos [8, 14, 15, 22, 25] -> widths [8, 6, 1, 7, 3]
var pos1 = 0;
for (var i = 0; i < foundfieldWidths.Count; i++)
{
Expand Down Expand Up @@ -408,11 +409,11 @@ public static CsvDefinition InferFromData(bool autodetect, char mansep, string m
// if value in first row (=Names) is not of valid datatype, then first row probably contains column names
var str = csvvalid.EvaluateDataValue(namcol.Name, namcol, namcol.Index);
if (str != "") count++;

// if value in first row (=Names) is empty then probably not header names
if (namcol.Name == "") emptyname = true;
}

// if value in first row (=Names) is empty then probably not header names
if (namcol.Name == "") emptyname = true;

// TODO: carriage returns in header name not supported
// replace with space so that schema.ini can at least be validated
namcol.Name = namcol.Name.Replace("\r\n", " ").Replace('\r', ' ').Replace('\n', ' ');
Expand Down
22 changes: 11 additions & 11 deletions CSVLintNppPlugin/Forms/DetectColumnsForm.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 52 additions & 1 deletion CSVLintNppPlugin/Forms/DetectColumnsForm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;

Expand All @@ -30,11 +31,61 @@ public void InitialiseSetting()
numSkipLines.Value = Main.Settings.DetectSkipLines;
}

private string GetProcessedColWidths()
{
// Quality-of-life extras, pre-process the list of col positions:
// 1) allow both comma and space separated
// 2) expect absolute positions, but also allow column widths
// 3) remove 0 as start position, because it is always implicitly expected
var ret = "";

// 1) Replace = allow comma separated "10, 12, 15, 20" and space separated "10 12 15 20" and semicolon separated
var strvalues = txtFixedWidthPos.Text.Replace(' ', ',').Replace(';', ',').Replace(",,", ",");

// 2) widths should contain column end positions, check if user entered column widths instead
// example expected pos [8, 14, 15, 22, 25] -> but user entered [8, 6, 1, 7, 3]
// This is recognisable, due to larger values coming before smaller values

// filter out invalid integers https://stackoverflow.com/a/2959329/1745616
List<int> ints = (from field in strvalues.Split(',').Where((x) => { int dummy; return Int32.TryParse(x, out dummy); }) select Int32.Parse(field)).ToList();

// This is recognisable, due to larger values coming before smaller values
var cl = 0; //count larger before smaller
for (int i = 0; i < ints.Count - 1; i++)
{
for (int j = i + 1; j < ints.Count; j++)
{
if (ints[i] >= ints[j]) cl++;
}
}

// if more than 0 larger values before smaller values
if (cl > 0)
{
// assume user entered column widths instead of positions, change all width to positions
var endpos = 0;
for (int i = 0; i < ints.Count; i++)
{
endpos += ints[i];
ints[i] = endpos;
}
}

// 3) remove 0 as start position, because it is always implicitly expected
if (ints[0] == 0) ints.RemoveAt(0);
for (int i = 0; i < ints.Count; i++)
{
ret = ret + (i > 0 ? "," : "") + ints[i].ToString();
}

return ret;
}

private void DetectColumnsForm_FormClosing(object sender, FormClosingEventArgs e)
{
// pass new values to previous form
Separator = (cmbColumnSeparator.Text.Length > 0 ? cmbColumnSeparator.Text[0] : '\0');
ManWidths = txtFixedWidthPos.Text.Replace(' ', ',').Replace(",,", ","); // Replace = allow both comma separated "10, 12, 15, 20" and space separated "10 12 15 20"
ManWidths = GetProcessedColWidths();
HeaderNames = chkHeaderNames.Checked;
SkipLines = Convert.ToInt32(numSkipLines.Value);

Expand Down
31 changes: 4 additions & 27 deletions CSVLintNppPlugin/PluginInfrastructure/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -535,33 +535,11 @@ public static void Lex(IntPtr instance, UIntPtr start_pos, IntPtr length_doc, in
if (lineCurrent < skipLines)
{
//i = (int)vtable.PositionFromLine(p_access, (IntPtr)skipLines);

var skipcount = skipLines;
i = start;
isEOL = false;

// skip the first X lines
while ((skipcount > 0) && (i < length))
{
// next character
byte cur = contentBytes[i];
i++;

// new line can be single character \r or \n or two characters \r\n
if ((cur == '\n') || (cur == '\r'))
{
if (!isEOL) skipcount--;
isEOL = true;
}
else
{
isEOL = false;
}
}
i = (int)vtable.LineStart(p_access, (IntPtr)skipLines);

// set no color
vtable.StartStyling(p_access, (IntPtr)(start));
vtable.SetStyleFor(p_access, (IntPtr)(i), (char)0);
vtable.SetStyleFor(p_access, (IntPtr)(i-start), (char)0);
}


Expand Down Expand Up @@ -640,7 +618,7 @@ public static void Lex(IntPtr instance, UIntPtr start_pos, IntPtr length_doc, in
bool whitespace = true; // to catch where value is just two quotes "" right at start of line

// fixed widths
while (i < length-1)
while (i < length - 1)
{
byte cur = contentBytes[i];
byte next = contentBytes[i + 1];
Expand Down Expand Up @@ -689,7 +667,7 @@ public static void Lex(IntPtr instance, UIntPtr start_pos, IntPtr length_doc, in
if (!quote)
{
// next color
if ((idx++ > IDX_MAX) || isEOL) idx = 1; // reset end of line
if ((++idx > IDX_MAX) || isEOL) idx = 1; // reset end of line
}

if (isEOL)
Expand Down Expand Up @@ -724,7 +702,6 @@ public static void Lex(IntPtr instance, UIntPtr start_pos, IntPtr length_doc, in
}
}


// free allocated buffer
Marshal.FreeHGlobal(buffer_ptr);
}
Expand Down
Binary file modified CSVLintNppPlugin/bin/Release-x64/CSVLint.dll
Binary file not shown.
Binary file modified CSVLintNppPlugin/bin/Release/CSVLint.dll
Binary file not shown.

0 comments on commit 6ffc6f3

Please sign in to comment.