Skip to content

Commit

Permalink
Split column bugfix
Browse files Browse the repository at this point in the history
Split column fix for issue #35 process values more sophisticated for quotes+separator, update readme/docs
  • Loading branch information
BdR76 committed Aug 5, 2022
1 parent 5aaf320 commit 5ddb5ce
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 97 deletions.
199 changes: 107 additions & 92 deletions CSVLintNppPlugin/CsvLint/CsvEdit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -771,13 +771,13 @@ public static void ColumnSplit(CsvDefinition csvdef, int ColumnIndex, int SplitC
var IntPar2 = -1 * IntPar;

// decode
List<string> decode1 = new List<string>();
List<string> decodelist = new List<string>();
List<string> decode2 = new List<string>();

// decode
if (SplitCode == 5)
{
decode1 = Parameter1.Split(Parameter2[0]).Select(item => item.Trim()).ToList();
decodelist = Parameter1.Split(Parameter2[0]).Select(item => item.Trim()).ToList();
}

StringBuilder datanew = new StringBuilder();
Expand All @@ -793,23 +793,23 @@ public static void ColumnSplit(CsvDefinition csvdef, int ColumnIndex, int SplitC
csvdef.ParseNextLine(strdata);

// add header column names
for (int c = 0; c < csvdef.Fields.Count; c++)
for (int head = 0; head < csvdef.Fields.Count; head++)
{

// add column header to output, except when remove original column
if ((c != ColumnIndex) || (bRemove == false))
if ((head != ColumnIndex) || (bRemove == false))
{
datanew.Append((c > 0 ? sep : "") + csvdef.Fields[c].Name);
datanew.Append((head > 0 ? sep : "") + csvdef.Fields[head].Name);
}

// add new split columns headers
if (c == ColumnIndex)
if (head == ColumnIndex)
{
// determine new column header names, check for existing postfix
var newname = csvdef.GetUniqueColumnName(csvdef.Fields[c].Name, out int postfix);
var newname = csvdef.GetUniqueColumnName(csvdef.Fields[head].Name, out int postfix);

// when decoding csv values (SplitCode == 5) add more new columns, when normal split then just 2
var addmax = SplitCode == 5 ? decode1.Count + 1 : 2; // +1 = one extra column of any left-over values
var addmax = SplitCode == 5 ? decodelist.Count + 1 : 2; // +1 = one extra column of any left-over values
for (var cnew = 0; cnew < addmax; cnew++)
{
datanew.Append(string.Format("{0}{1} ({2})", sep, newname, postfix + cnew));
Expand All @@ -819,130 +819,145 @@ public static void ColumnSplit(CsvDefinition csvdef, int ColumnIndex, int SplitC
datanew.Append("\n");
}

// list for building new columns
List<string> newcols = new List<string>();

// read all lines
while (!strdata.EndOfStream)
{

// clear temp list
newcols.Clear();

// get values from line
List<string> values = csvdef.ParseNextLine(strdata);

linenr++;

// reformat data line to new line
for (int c = 0; c < values.Count; c++)
for (int col = 0; col < values.Count; col++)
{
// next value
string val = values[c];

// if value contains separator character then put value in quotes
if (val.IndexOf(sep) >= 0) val = string.Format("\"{0}\"", val);
string val = values[col];

// add column to output, except when remove original column
if ((c != ColumnIndex) || (bRemove == false))
if ( (col != ColumnIndex) || (bRemove == false) )
{
datanew.Append(val);
datanew.Append(sep);
newcols.Add(val);
}

// add new split columns values
if (c == ColumnIndex)
if (col == ColumnIndex)
{
string val0 = values[c]; // original value without quotes
string val1 = val0;
string val2 = "";

// how to split value
if (SplitCode == 1)
if (SplitCode == 5)
{
// valid/invalid
var str = csvvalid.EvaluateDataValue(val, csvdef.Fields[ColumnIndex], ColumnIndex);
if (str != "")
// decode multiple values, example val = "1;2;3"
decode2.Clear();
decode2 = val.Split(Parameter2[0]).Select(item => item.Trim()).ToList();

// split value into into columns
foreach (var dec in decodelist)
{
val1 = "";
val2 = val0; // invalid value
// check if separated value in list of decode values
var decidx = decode2.IndexOf(dec);

// add value or empty if not found
newcols.Add((decidx >= 0 ? dec : ""));

// remove from original values list
if (decidx >= 0) decode2.RemoveAt(decidx);
}
}
else if (SplitCode == 2)
{
// split on char
int pos = val0.IndexOf(Parameter1);
if (pos >= 0)

// put any left-over values in the extra column
var remain = "";
foreach (var dec in decode2)
{
val1 = val0.Substring(0, pos);
val2 = val0.Substring(pos + Parameter1.Length, val0.Length - pos - Parameter1.Length);
remain += dec + Parameter2[0];
}
if (remain.Length > 0) remain = remain.Remove(remain.Length - 1); // remove last separator"; "
// add left-over values in last column
newcols.Add(remain);
}
else if (SplitCode == 3)
else
{
// split on position
int pos = val0.IndexOf(Parameter1);
if ((IntPar > 0) && (IntPar < val0.Length))
{
// positive, left string
val1 = val0.Substring(0, IntPar);
val2 = val0.Substring(IntPar, val0.Length - IntPar);
}
else if (IntPar < 0)
// split column
string val1 = val;
string val2 = "";

if (SplitCode == 1)
{
// negative, right string
if (IntPar2 < val0.Length)
{
val1 = val0.Substring(0, val0.Length - IntPar2);
val2 = val0.Substring(val0.Length - IntPar2);
}
else
// valid/invalid
var str = csvvalid.EvaluateDataValue(val, csvdef.Fields[ColumnIndex], ColumnIndex);
if (str != "")
{
// take all as right string
val1 = "";
val2 = val;
val2 = val; // invalid value
}
}
}
else if (SplitCode == 4)
{
// split when contains
int pos = val0.IndexOf(Parameter1);
if (pos >= 0)
else if (SplitCode == 2)
{
val1 = "";
val2 = val0;
// split on char
int pos = val.IndexOf(Parameter1);
if (pos >= 0)
{
val1 = val.Substring(0, pos);
val2 = val.Substring(pos + Parameter1.Length, val.Length - pos - Parameter1.Length);
}
}
}
else if (SplitCode == 5)
{
// decode multiple values, example val0 = "1;2;3"
decode2.Clear();
decode2 = val0.Split(Parameter2[0]).Select(item => item.Trim()).ToList();

// split value into into columns
val1 = "";
foreach (var dec in decode1)
else if (SplitCode == 3)
{
// check if separated value in list of decode values
var decidx = decode2.IndexOf(dec);

// separate column
val1 += (decidx >= 0 ? dec : "") + sep;

// remove from original value
if (decidx >= 0) decode2.RemoveAt(decidx);
// split on position
int pos = val.IndexOf(Parameter1);
if ((IntPar > 0) && (IntPar < val.Length))
{
// positive, left string
val1 = val.Substring(0, IntPar);
val2 = val.Substring(IntPar, val.Length - IntPar);
}
else if (IntPar < 0)
{
// negative, right string
if (IntPar2 < val.Length)
{
val1 = val.Substring(0, val.Length - IntPar2);
val2 = val.Substring(val.Length - IntPar2);
}
else
{
// take all as right string
val1 = "";
val2 = val;
}
}
}
val1 = val1.Remove(val1.Length - 1); // remove last separator

// put any left-over values in the extra column
val2 = "";
foreach (var dec in decode2)
else if (SplitCode == 4)
{
val2 += dec + Parameter2[0];
// split when contains
int pos = val.IndexOf(Parameter1);
if (pos >= 0)
{
val1 = "";
val2 = val;
}
}
if (val2.Length > 0) val2 = val2.Remove(val2.Length - 1); // remove last separator"; "
// add split column values
newcols.Add(val1);
newcols.Add(val2);
}

// add split column values
datanew.Append(val1 + sep);
datanew.Append(val2 + sep);
}
};

// if value contains separator character then put value in quotes
// reformat data line to new line
for (int c = 0; c < newcols.Count; c++)
{
// next value
string val = newcols[c];
if (val.IndexOf('"') >= 0) val = val.Replace("\"", "\"\"");
if (val.IndexOf(sep) >= 0) val = string.Format("\"{0}\"", val);
datanew.Append(val + sep);
}

// remove last separator
datanew.Length -= 1;

Expand Down
2 changes: 1 addition & 1 deletion docs/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ but rather it's a quality control tool to examine, verify or polish up a
dataset before further processing.

First install and open Notepad++, then go to the menu item `Plugins > Plugins Admin...`,
search for "csv lint", check the checkbox and press Intall. This will add
search for "csv lint", check the checkbox and press Install. This will add
CSV Lint under the `Plugins > CSV Lint` menu item and a CSV Lint icon in the
toolbar icon.

Expand Down
11 changes: 7 additions & 4 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
CSV Lint - Notepad++ plugin
===========================
![Release version](https://img.shields.io/github/v/release/BdR76/CSVLint) ![GitHub all releases](https://img.shields.io/github/downloads/BdR76/CSVLint/total) ![GitHub latest release](https://img.shields.io/github/downloads/BdR76/CSVLint/latest/total)
CSV Lint is a plug-in for [Notepad++](http://notepad-plus-plus.org/) for metadata discovery, technical data
validation and reformatting on tabular data files.

Use CSV Lint to quickly detect any technical errors in csv data or fix datetime and decimal formatting.
CSV Lint is a plug-in for [Notepad++](http://notepad-plus-plus.org/) which
adds syntax highlighting to csv and fixed width data files to make them more
readable. It can also detect technical data errors and fix datetime and
decimal formatting errors.

Use CSV Lint for metadata discovery, technical data validation and reformatting on tabular data files.
It is _not_ meant to be a replacement for spreadsheet programs like Excel or SPSS,
but rather it's a quality control tool to examine, verify or polish up a dataset before further processing.

Expand Down Expand Up @@ -38,7 +41,7 @@ The CSV Lint plugin is available in the Plugins Admin in Notepad++ v8.1.9.1 or n
* Install [Notepad++](https://notepad-plus-plus.org/) v8.1.9.1 or newer
* In Notepad++ go to menu item `Plugins > Plugins Admin...`
* On tab `Available` search for `csv lint`
* Check the checkbox and press `Intall` button
* Check the checkbox and press `Install` button
* Click `Yes` to quit Notepad++ and "continue the operations"
* Click `Yes` on the Windows notification "Allow app to make changes"

Expand Down

0 comments on commit 5ddb5ce

Please sign in to comment.