Skip to content

Commit

Permalink
Merge pull request #2989 from ales-erjavec/fixes/io-quoted
Browse files Browse the repository at this point in the history
[FIX] Fix reading double quoted text fields
  • Loading branch information
kernc authored Apr 6, 2018
2 parents d2cd142 + 0448496 commit d1b60f2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
15 changes: 9 additions & 6 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,20 +822,23 @@ def read(self):
try:
dialect = csv.Sniffer().sniff(
# Take first couple of *complete* lines as sample
''.join(file.readline() for _ in range(5)),
''.join(file.readline() for _ in range(10)),
self.DELIMITERS)
delimiter = dialect.delimiter
quotechar = dialect.quotechar
except UnicodeDecodeError as e:
error = e
continue
except csv.Error:
dialect = csv.excel()
dialect.delimiter = self.DELIMITERS[0]
delimiter = self.DELIMITERS[0]
quotechar = csv.excel.quotechar

file.seek(0)
dialect.skipinitialspace = True

try:
reader = csv.reader(file, dialect=dialect)
reader = csv.reader(
file, delimiter=delimiter, quotechar=quotechar,
skipinitialspace=True,
)
data = self.data_table(reader)

# TODO: Name can be set unconditionally when/if
Expand Down
21 changes: 21 additions & 0 deletions Orange/tests/test_tab_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,27 @@ def test_read_easy(self):
np.testing.assert_almost_equal(table.X, np.array([[0, 0], [np.nan, 1], [1, 0]]))
np.testing.assert_almost_equal(table.Y, np.array([[1, 1], [2, 0], [0, np.nan]]))

def test_read_save_quoted(self):
quoted = '''\
S\tA
s\td
m\t
"""a"""\ti
"""b"""\tj
"""c\td"""\tk
'''
expected = ['"a"', '"b"', '"c\td"']
f = io.StringIO(quoted)
table = read_tab_file(f)
self.assertSequenceEqual(table.metas[:, 0].tolist(), expected)

f = io.StringIO()
f.close = lambda: None
TabReader.write_file(f, table)
saved = f.getvalue()
table1 = read_tab_file(io.StringIO(saved))
self.assertSequenceEqual(table1.metas[:, 0].tolist(), expected)

def test_read_and_save_attributes(self):
samplefile = """\
Feature 1\tFeature 2\tClass 1\tClass 42
Expand Down

0 comments on commit d1b60f2

Please sign in to comment.