diff --git a/Orange/data/io.py b/Orange/data/io.py index 958b836bb52..77b4ea89e46 100644 --- a/Orange/data/io.py +++ b/Orange/data/io.py @@ -822,20 +822,23 @@ def read(self): try: dialect = csv.Sniffer().sniff( # Take first couple of *complete* lines as sample - ''.join(file.readline() for _ in range(5)), + ''.join(file.readline() for _ in range(10)), self.DELIMITERS) + delimiter = dialect.delimiter + quotechar = dialect.quotechar except UnicodeDecodeError as e: error = e continue except csv.Error: - dialect = csv.excel() - dialect.delimiter = self.DELIMITERS[0] + delimiter = self.DELIMITERS[0] + quotechar = csv.excel.quotechar file.seek(0) - dialect.skipinitialspace = True - try: - reader = csv.reader(file, dialect=dialect) + reader = csv.reader( + file, delimiter=delimiter, quotechar=quotechar, + skipinitialspace=True, + ) data = self.data_table(reader) # TODO: Name can be set unconditionally when/if diff --git a/Orange/tests/test_tab_reader.py b/Orange/tests/test_tab_reader.py index 93fffcca36c..1e842f0ff7e 100644 --- a/Orange/tests/test_tab_reader.py +++ b/Orange/tests/test_tab_reader.py @@ -50,6 +50,27 @@ def test_read_easy(self): np.testing.assert_almost_equal(table.X, np.array([[0, 0], [np.nan, 1], [1, 0]])) np.testing.assert_almost_equal(table.Y, np.array([[1, 1], [2, 0], [0, np.nan]])) + def test_read_save_quoted(self): + quoted = '''\ + S\tA + s\td + m\t + """a"""\ti + """b"""\tj + """c\td"""\tk + ''' + expected = ['"a"', '"b"', '"c\td"'] + f = io.StringIO(quoted) + table = read_tab_file(f) + self.assertSequenceEqual(table.metas[:, 0].tolist(), expected) + + f = io.StringIO() + f.close = lambda: None + TabReader.write_file(f, table) + saved = f.getvalue() + table1 = read_tab_file(io.StringIO(saved)) + self.assertSequenceEqual(table1.metas[:, 0].tolist(), expected) + def test_read_and_save_attributes(self): samplefile = """\ Feature 1\tFeature 2\tClass 1\tClass 42