Skip to content

Commit

Permalink
gracefully handle dupe column names #68
Browse files Browse the repository at this point in the history
  • Loading branch information
mukunku committed Jan 29, 2023
1 parent ac12cba commit f4d75e7
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 44 deletions.
84 changes: 44 additions & 40 deletions src/ParquetFileViewer/FieldSelectionDialog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ private void RenderFieldsCheckboxes(IEnumerable<Field> availableFields, IEnumera
int locationX = 0;
int locationY = 5;
bool isFirst = true;
HashSet<string> fieldNames = new HashSet<string>();
bool isClearingSelectAllCheckbox = false;

var checkboxControls = new List<CheckBox>();
foreach (Field field in availableFields)
{
if (isFirst) //Add toggle all checkbox and some other setting changes
Expand Down Expand Up @@ -97,7 +97,6 @@ private void RenderFieldsCheckboxes(IEnumerable<Field> availableFields, IEnumera
if (checkbox.Enabled)
{
checkbox.Checked = selectAllCheckBox.Checked;
//this.PreSelectedFields.Remove((string)checkbox.Tag);
}
}
}
Expand All @@ -108,57 +107,62 @@ private void RenderFieldsCheckboxes(IEnumerable<Field> availableFields, IEnumera
locationY += DynamicFieldCheckboxYIncrement;
}

if (!fieldNames.Contains(field.Name.ToLowerInvariant())) //Normally two fields with the same name shouldn't exist but lets make sure
bool isUnsupportedFieldType = UnsupportedSchemaTypes.Contains(field.SchemaType);
var fieldCheckbox = new CheckBox()
{
bool isUnsupportedFieldType = UnsupportedSchemaTypes.Contains(field.SchemaType);
var fieldCheckbox = new CheckBox()
Name = string.Concat("checkbox_", field.Name),
Text = string.Concat(field.Name, isUnsupportedFieldType ? "(Unsupported)" : string.Empty),
Tag = field.Name,
Checked = preSelectedFields.Contains(field.Name),
Location = new Point(locationX, locationY),
AutoSize = true,
Enabled = !isUnsupportedFieldType
};
fieldCheckbox.CheckedChanged += (object checkboxSender, EventArgs checkboxEventArgs) =>
{
var fieldCheckBox = (CheckBox)checkboxSender;

if (fieldCheckBox.Checked)
{
Name = string.Concat("checkbox_", field.Name),
Text = string.Concat(field.Name, isUnsupportedFieldType ? "(Unsupported)" : string.Empty),
Tag = field.Name,
Checked = preSelectedFields.Contains(field.Name),
Location = new Point(locationX, locationY),
AutoSize = true,
Enabled = !isUnsupportedFieldType
};
fieldCheckbox.CheckedChanged += (object checkboxSender, EventArgs checkboxEventArgs) =>
this.PreSelectedFields.Add((string)fieldCheckBox.Tag);
}
else
{
var fieldCheckBox = (CheckBox)checkboxSender;

if (fieldCheckBox.Checked)
{
this.PreSelectedFields.Add((string)fieldCheckBox.Tag);
}
else
{
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
}
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
}


if (!fieldCheckBox.Checked)
if (!fieldCheckBox.Checked)
{
foreach (Control control in this.fieldsPanel.Controls)
{
foreach (Control control in this.fieldsPanel.Controls)
if (control.Tag.Equals(SelectAllCheckboxName) && control is CheckBox checkbox)
{
if (control.Tag.Equals(SelectAllCheckboxName) && control is CheckBox checkbox)
if (checkbox.Enabled && checkbox.Checked)
{
if (checkbox.Enabled && checkbox.Checked)
{
isClearingSelectAllCheckbox = true;
checkbox.Checked = false;
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
isClearingSelectAllCheckbox = false;
break;
}
isClearingSelectAllCheckbox = true;
checkbox.Checked = false;
this.PreSelectedFields.Remove((string)fieldCheckBox.Tag);
isClearingSelectAllCheckbox = false;
break;
}
}
}
};
this.fieldsPanel.Controls.Add(fieldCheckbox);
}
};
checkboxControls.Add(fieldCheckbox);

locationY += DynamicFieldCheckboxYIncrement;
fieldNames.Add(field.Name.ToLowerInvariant());
}
locationY += DynamicFieldCheckboxYIncrement;
}

//Disable fields with dupe names because we don't support case sensitive fields right now
var duplicateFields = checkboxControls?.GroupBy(f => f.Text.ToUpperInvariant()).Where(g => g.Count() > 1).SelectMany(g => g).ToList();
foreach(var duplicateField in duplicateFields)
{
duplicateField.Enabled = false;
}

this.fieldsPanel.Controls.AddRange(checkboxControls.ToArray<Control>());
}
}
catch (Exception ex)
Expand Down
12 changes: 10 additions & 2 deletions src/ParquetFileViewer/Helpers/UtilityMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace ParquetFileViewer.Helpers
{
Expand All @@ -13,7 +14,7 @@ public static class UtilityMethods
public static async Task<DataTable> ParquetReaderToDataTable(ParquetReader parquetReader, List<string> selectedFields, int offset, int recordCount, CancellationToken cancellationToken)
{
//Get list of data fields and construct the DataTable
DataTable dataTable = new DataTable();
var dataTable = new DataTable();
var fields = new List<(Parquet.Thrift.SchemaElement, Parquet.Schema.DataField)>();
var dataFields = parquetReader.Schema.GetDataFields();
foreach (string selectedField in selectedFields)
Expand All @@ -25,7 +26,14 @@ public static async Task<DataTable> ParquetReaderToDataTable(ParquetReader parqu

fields.Add((thriftSchema, dataField));
DataColumn newColumn = new DataColumn(dataField.Name, ParquetNetTypeToCSharpType(thriftSchema, dataField.DataType));
dataTable.Columns.Add(newColumn);

//We don't support case sensitive field names unfortunately
if (dataTable.Columns.Contains(newColumn.ColumnName))
{
throw new NotSupportedException("Duplicate column detected. Column names are case insensitive and must be unique.");
}

dataTable.Columns.Add(newColumn);
}
else
throw new Exception(string.Format("Field '{0}' does not exist", selectedField));
Expand Down
16 changes: 14 additions & 2 deletions src/ParquetFileViewer/MainForm.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,19 @@ private List<string> SelectedFields
set
{
this.selectedFields = value;
if (value != null && value.Count > 0)

//Check for duplicate fields (We don't support case sensitive field names unfortunately)
var duplicateFields = this.selectedFields?.GroupBy(f => f.ToUpperInvariant()).Where(g => g.Count() > 1).SelectMany(g => g).ToList();
if (duplicateFields?.Count() > 0)
{
this.selectedFields = this.selectedFields.Where(f => !duplicateFields.Any(df => df.Equals(f, StringComparison.InvariantCultureIgnoreCase))).ToList();

MessageBox.Show($"The following duplicate fields could not be loaded: {string.Join(',', duplicateFields)}. " +
$"\r\n\r\nCase sensitive field names are not currently supported.", "Duplicate fields detected",
MessageBoxButtons.OK, MessageBoxIcon.Warning);
}

if (value?.Count > 0)
{
LoadFileToGridview();
}
Expand Down Expand Up @@ -546,7 +558,7 @@ private async void LoadFileToGridview()
{
int i = 0;
var fieldGroups = new List<(int, List<string>)>();
foreach (List<string> fields in UtilityMethods.Split(this.SelectedFields, (int)(this.selectedFields.Count / Environment.ProcessorCount)))
foreach (List<string> fields in UtilityMethods.Split(this.SelectedFields, (int)(this.SelectedFields.Count / Environment.ProcessorCount)))
{
fieldGroups.Add((i++, fields));
}
Expand Down

0 comments on commit f4d75e7

Please sign in to comment.