Skip to content

Commit

Permalink
Merge pull request #5438 from ales-erjavec/csvimport-indicate-encodin…
Browse files Browse the repository at this point in the history
…g-errors

[ENH] textimport: Mark encoding errors in the preview
  • Loading branch information
ajdapretnar authored May 14, 2021
2 parents 3773093 + 49d3da6 commit 5b4a89e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 33 deletions.
39 changes: 20 additions & 19 deletions Orange/widgets/data/owcsvimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,21 +579,32 @@ def selectedFileFormat(self) -> FileFormat:

def default_options_for_mime_type(
path: str, mime_type: str
) -> Tuple[csv.Dialect, bool]:
) -> Options:
defaults = {
"text/csv": (csv.excel(), True),
"text/tab-separated-values": (csv.excel_tab(), True)
}
dialect, header = csv.excel(), True
dialect, header, encoding = csv.excel(), True, "utf-8"
delimiters = None
try_encodings = ["utf-8", "utf-16", "iso8859-1"]
if mime_type in defaults:
dialect, header = defaults[mime_type]
delimiters = [dialect.delimiter]
try:
dialect, header = sniff_csv_with_path(path, delimiters=delimiters)
except (OSError, UnicodeDecodeError, csv.Error):
pass
return dialect, header

for encoding_ in try_encodings:
try:
dialect, header = sniff_csv_with_path(
path, encoding=encoding_, delimiters=delimiters)
encoding = encoding_
except (OSError, UnicodeError, csv.Error):
pass
else:
break
if header:
rowspec = [(range(0, 1), RowSpec.Header)]
else:
rowspec = []
return Options(dialect=dialect, encoding=encoding, rowspec=rowspec)


class OWCSVFileImport(widget.OWWidget):
Expand Down Expand Up @@ -910,11 +921,10 @@ def browse(self, prefixname=None, directory=None):
mb = self._might_be_binary_mb(path)
if mb.exec() == QMessageBox.Cancel:
return
# initialize dialect based on selected format
dialect, header = default_options_for_mime_type(
# initialize options based on selected format
options = default_options_for_mime_type(
path, selected_filter.mime_type,
)
options = None
# Search for path in history.
# If found use the stored params to initialize the import dialog
items = self.itemsFromSettings()
Expand All @@ -923,15 +933,6 @@ def browse(self, prefixname=None, directory=None):
_, options_ = items[idx]
if options_ is not None:
options = options_

if options is None:
if not header:
rowspec = []
else:
rowspec = [(range(0, 1), RowSpec.Header)]
options = Options(
encoding="utf-8", dialect=dialect, rowspec=rowspec)

dlg = CSVImportDialog(
self, windowTitle="Import Options", sizeGripEnabled=True)
dlg.setWindowModality(Qt.WindowModal)
Expand Down
36 changes: 22 additions & 14 deletions Orange/widgets/utils/textimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -991,9 +991,11 @@ def __resetPreview(self):
base = CachedBytesIOWrapper(self.__sample, self.__buffer)

wrapper = io.TextIOWrapper(
base, encoding=self.encoding(), errors="replace"
base, encoding=self.encoding(),
# use surrogate escape to validate/detect encoding errors in
# delegates
errors="surrogateescape"
)

rows = csv.reader(
wrapper, dialect=self.dialect()
)
Expand Down Expand Up @@ -1372,6 +1374,11 @@ def sizeHint(self):
return sh.expandedTo(QSize(8 * hsection, 20 * vsection))


def is_surrogate_escaped(text: str) -> bool:
"""Does `text` contain any surrogate escape characters."""
return any("\udc80" <= c <= "\udcff" for c in text)


class PreviewItemDelegate(QStyledItemDelegate):
def initStyleOption(self, option, index):
# type: (QStyleOptionViewItem, QModelIndex) -> None
Expand All @@ -1389,6 +1396,18 @@ def initStyleOption(self, option, index):
if coltype == ColumnType.Numeric or coltype == ColumnType.Time:
option.displayAlignment = Qt.AlignRight | Qt.AlignVCenter

if not self.validate(option.text):
option.palette.setBrush(
QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern)
)
option.palette.setBrush(
QPalette.All, QPalette.HighlightedText,
QBrush(Qt.red, Qt.SolidPattern)
)

def validate(self, value: str) -> bool: # pylint: disable=no-self-use
return not is_surrogate_escaped(value)

def helpEvent(self, event, view, option, index):
# type: (QHelpEvent, QAbstractItemView, QStyleOptionViewItem, QModelIndex) -> bool
if event.type() == QEvent.ToolTip:
Expand Down Expand Up @@ -1467,17 +1486,6 @@ def __init__(self, *args, converter=None, **kwargs):
super().__init__(*args, **kwargs)
self.converter = converter or float

def initStyleOption(self, option, index):
super().initStyleOption(option, index)
if not self.validate(option.text):
option.palette.setBrush(
QPalette.All, QPalette.Text, QBrush(Qt.red, Qt.SolidPattern)
)
option.palette.setBrush(
QPalette.All, QPalette.HighlightedText,
QBrush(Qt.red, Qt.SolidPattern)
)

def validate(self, value):
if value in {"NA", "Na", "na", "n/a", "N/A", "?", "", "."}:
return True
Expand All @@ -1486,7 +1494,7 @@ def validate(self, value):
except ValueError:
return False
else:
return True
return super().validate(value)


def number_parser(groupsep, decimalsep):
Expand Down

0 comments on commit 5b4a89e

Please sign in to comment.