Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor change to csv reading #146

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 21 additions & 23 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
from pandas.core.index import Index
from pandas.core.frame import DataFrame

def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
def read_csv(filepath_or_buffer, sep=None, header=0, skiprows=None, index_col=0,
na_values=None, date_parser=None, names=None):
"""
Read CSV file into DataFrame

Parameters
----------
filepath_or_buffer : string or file handle / StringIO
sep : string, default None
Delimiter to use. By default will try to automatically determine
this
header : int, default 0
Row to use for the column labels of the parsed DataFrame
skiprows : list-like
Expand Down Expand Up @@ -50,7 +53,20 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
except Exception: # pragma: no cover
f = open(filepath_or_buffer, 'r')

reader = csv.reader(f, dialect='excel')
sniff_sep = True
# default dialect
dia = csv.excel
if sep is not None:
sniff_sep = False
dia.delimiter = sep
# attempt to sniff the delimiter
if sniff_sep:
sample = f.readline()
sniffed = csv.Sniffer().sniff(sample)
dia.delimiter = sniffed.delimiter
f.seek(0)

reader = csv.reader(f, dialect=dia)

if skiprows is not None:
skiprows = set(skiprows)
Expand All @@ -63,8 +79,7 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
date_parser=date_parser)

def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None,
index_col=0, na_values=None, names=None,
date_parser=None):
index_col=0, na_values=None, date_parser=None, names=None):
"""
Read delimited file into DataFrame

Expand Down Expand Up @@ -92,25 +107,8 @@ def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None,
-------
parsed : DataFrame
"""
if hasattr(filepath_or_buffer, 'read'):
reader = filepath_or_buffer
else:
try:
# universal newline mode
reader = open(filepath_or_buffer, 'U')
except Exception: # pragma: no cover
reader = open(filepath_or_buffer, 'r')

if skiprows is not None:
skiprows = set(skiprows)
lines = [l for i, l in enumerate(reader) if i not in skiprows]
else:
lines = [l for l in reader]

lines = [re.split(sep, l.rstrip()) for l in lines]
return _simple_parser(lines, header=header, indexCol=index_col,
colNames=names, na_values=na_values,
date_parser=date_parser)
return read_csv(filepath_or_buffer, sep, header, skiprows,
index_col, na_values, date_parser, names)

def _simple_parser(lines, colNames=None, header=0, indexCol=0,
na_values=None, date_parser=None, parse_dates=True):
Expand Down