diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 69d41f29506d1..43e384b01ad2c 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1156,6 +1156,10 @@ I/O - Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) - Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) +- Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) +- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the `index_names=False` option would result in incorrect output (:issue:`18326`) +- Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) - Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 67b0a4f0e034e..94d48e7f98286 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -64,35 +64,32 @@ def get_col_type(dtype): # reestablish the MultiIndex that has been joined by _to_str_column if self.fmt.index and isinstance(self.frame.index, MultiIndex): + out = self.frame.index.format( + adjoin=False, sparsify=self.fmt.sparsify, + names=self.fmt.has_index_names, na_rep=self.fmt.na_rep + ) + + # index.format will sparsify repeated entries with empty strings + # so pad these with some empty space + def pad_empties(x): + for pad in reversed(x): + if pad: + break + return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]] + out = (pad_empties(i) for i in out) + + # Add empty spaces for each column level clevels = self.frame.columns.nlevels - strcols.pop(0) - name = any(self.frame.index.names) - cname = any(self.frame.columns.names) - lastcol = self.frame.index.nlevels - 1 - previous_lev3 = None - for i, lev in enumerate(self.frame.index.levels): - lev2 = lev.format() - blank = ' ' * len(lev2[0]) - # display column names in last index-column - if cname and i == lastcol: - lev3 = [x if x else '{}' for x in self.frame.columns.names] - else: - lev3 = [blank] * clevels - if name: - lev3.append(lev.name) - current_idx_val = None - for level_idx in self.frame.index.labels[i]: - if ((previous_lev3 is None or - previous_lev3[len(lev3)].isspace()) and - lev2[level_idx] == current_idx_val): - # same index as above row and left index was the same - lev3.append(blank) - else: - # different value than above or left index different - lev3.append(lev2[level_idx]) - current_idx_val = lev2[level_idx] - strcols.insert(i, lev3) - previous_lev3 = lev3 + out = [[' ' * len(i[-1])] * clevels + i for i in out] + + # Add the column names to the last index column + cnames = self.frame.columns.names + if any(cnames): + new_names = [i if i else '{}' for i in cnames] + out[self.frame.index.nlevels - 1][:clevels] = new_names + + # Get rid of old multiindex column and add new ones + strcols = out + strcols[1:] column_format = self.column_format if column_format is None: @@ -118,7 +115,7 @@ def get_col_type(dtype): ilevels = self.frame.index.nlevels clevels = self.frame.columns.nlevels nlevels = clevels - if any(self.frame.index.names): + if self.fmt.has_index_names and self.fmt.show_index_names: nlevels += 1 strrows = list(zip(*strcols)) self.clinebuf = [] diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 5ebf196be094e..ddd34e0c1626d 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -621,3 +621,74 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): \end{tabular} """ % tuple(list(col_names) + [idx_names_row]) assert observed == expected + + @pytest.mark.parametrize('one_row', [True, False]) + def test_to_latex_multiindex_nans(self, one_row): + # GH 14249 + df = pd.DataFrame({'a': [None, 1], 'b': [2, 3], 'c': [4, 5]}) + if one_row: + df = df.iloc[[0]] + observed = df.set_index(['a', 'b']).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +NaN & 2 & 4 \\ +""" + if not one_row: + expected += r"""1.0 & 3 & 5 \\ +""" + expected += r"""\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_non_string_index(self): + # GH 19981 + observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & 2 \\ +0 & 1 & \\ +\midrule +1 & 2 & 3 \\ + & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_midrule_location(self): + # GH 18326 + df = pd.DataFrame({'a': [1, 2]}) + df.index.name = 'foo' + observed = df.to_latex(index_names=False) + expected = r"""\begin{tabular}{lr} +\toprule +{} & a \\ +\midrule +0 & 1 \\ +1 & 2 \\ +\bottomrule +\end{tabular} +""" + + assert observed == expected + + def test_to_latex_multiindex_empty_name(self): + # GH 18669 + mi = pd.MultiIndex.from_product([[1, 2]], names=['']) + df = pd.DataFrame(-1, index=mi, columns=range(4)) + observed = df.to_latex() + expected = r"""\begin{tabular}{lrrrr} +\toprule + & 0 & 1 & 2 & 3 \\ +{} & & & & \\ +\midrule +1 & -1 & -1 & -1 & -1 \\ +2 & -1 & -1 & -1 & -1 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected