Skip to content

Commit

Permalink
BUG: fix to_latex() when using MultiIndex with NaN in (pandas-dev#20797)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomneep authored and toobaz committed Apr 25, 2018
1 parent d1ace10 commit 7ec74e5
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 29 deletions.
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,10 @@ I/O
- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`)
- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`)
- Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`)
- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the `index_names=False` option would result in incorrect output (:issue:`18326`)
- Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`)
- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`)
- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`)
- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`)
Expand Down
55 changes: 26 additions & 29 deletions pandas/io/formats/latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,35 +64,32 @@ def get_col_type(dtype):

# reestablish the MultiIndex that has been joined by _to_str_column
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
out = self.frame.index.format(
adjoin=False, sparsify=self.fmt.sparsify,
names=self.fmt.has_index_names, na_rep=self.fmt.na_rep
)

# index.format will sparsify repeated entries with empty strings
# so pad these with some empty space
def pad_empties(x):
for pad in reversed(x):
if pad:
break
return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]
out = (pad_empties(i) for i in out)

# Add empty spaces for each column level
clevels = self.frame.columns.nlevels
strcols.pop(0)
name = any(self.frame.index.names)
cname = any(self.frame.columns.names)
lastcol = self.frame.index.nlevels - 1
previous_lev3 = None
for i, lev in enumerate(self.frame.index.levels):
lev2 = lev.format()
blank = ' ' * len(lev2[0])
# display column names in last index-column
if cname and i == lastcol:
lev3 = [x if x else '{}' for x in self.frame.columns.names]
else:
lev3 = [blank] * clevels
if name:
lev3.append(lev.name)
current_idx_val = None
for level_idx in self.frame.index.labels[i]:
if ((previous_lev3 is None or
previous_lev3[len(lev3)].isspace()) and
lev2[level_idx] == current_idx_val):
# same index as above row and left index was the same
lev3.append(blank)
else:
# different value than above or left index different
lev3.append(lev2[level_idx])
current_idx_val = lev2[level_idx]
strcols.insert(i, lev3)
previous_lev3 = lev3
out = [[' ' * len(i[-1])] * clevels + i for i in out]

# Add the column names to the last index column
cnames = self.frame.columns.names
if any(cnames):
new_names = [i if i else '{}' for i in cnames]
out[self.frame.index.nlevels - 1][:clevels] = new_names

# Get rid of old multiindex column and add new ones
strcols = out + strcols[1:]

column_format = self.column_format
if column_format is None:
Expand All @@ -118,7 +115,7 @@ def get_col_type(dtype):
ilevels = self.frame.index.nlevels
clevels = self.frame.columns.nlevels
nlevels = clevels
if any(self.frame.index.names):
if self.fmt.has_index_names and self.fmt.show_index_names:
nlevels += 1
strrows = list(zip(*strcols))
self.clinebuf = []
Expand Down
71 changes: 71 additions & 0 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,3 +621,74 @@ def test_to_latex_multiindex_names(self, name0, name1, axes):
\end{tabular}
""" % tuple(list(col_names) + [idx_names_row])
assert observed == expected

@pytest.mark.parametrize('one_row', [True, False])
def test_to_latex_multiindex_nans(self, one_row):
# GH 14249
df = pd.DataFrame({'a': [None, 1], 'b': [2, 3], 'c': [4, 5]})
if one_row:
df = df.iloc[[0]]
observed = df.set_index(['a', 'b']).to_latex()
expected = r"""\begin{tabular}{llr}
\toprule
& & c \\
a & b & \\
\midrule
NaN & 2 & 4 \\
"""
if not one_row:
expected += r"""1.0 & 3 & 5 \\
"""
expected += r"""\bottomrule
\end{tabular}
"""
assert observed == expected

def test_to_latex_non_string_index(self):
# GH 19981
observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex()
expected = r"""\begin{tabular}{llr}
\toprule
& & 2 \\
0 & 1 & \\
\midrule
1 & 2 & 3 \\
& 2 & 3 \\
\bottomrule
\end{tabular}
"""
assert observed == expected

def test_to_latex_midrule_location(self):
# GH 18326
df = pd.DataFrame({'a': [1, 2]})
df.index.name = 'foo'
observed = df.to_latex(index_names=False)
expected = r"""\begin{tabular}{lr}
\toprule
{} & a \\
\midrule
0 & 1 \\
1 & 2 \\
\bottomrule
\end{tabular}
"""

assert observed == expected

def test_to_latex_multiindex_empty_name(self):
# GH 18669
mi = pd.MultiIndex.from_product([[1, 2]], names=[''])
df = pd.DataFrame(-1, index=mi, columns=range(4))
observed = df.to_latex()
expected = r"""\begin{tabular}{lrrrr}
\toprule
& 0 & 1 & 2 & 3 \\
{} & & & & \\
\midrule
1 & -1 & -1 & -1 & -1 \\
2 & -1 & -1 & -1 & -1 \\
\bottomrule
\end{tabular}
"""
assert observed == expected

0 comments on commit 7ec74e5

Please sign in to comment.