From 5c5bb46068f58b8285fff2bb04a0091db58d2e39 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Mon, 28 Aug 2017 23:55:45 +0200
Subject: [PATCH] BUG: make order of index from pd.concat deterministic

closes #17344
---
 doc/source/whatsnew/v0.21.0.txt     |  1 +
 pandas/core/common.py               | 14 ++++++++++++++
 pandas/core/indexes/api.py          |  9 ++-------
 pandas/tests/reshape/test_concat.py | 13 ++++++++++++-
 4 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index fcadd26156b1d4..4fbe289f25acf1 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -405,6 +405,7 @@ Reshaping
 - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
 - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
 - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
+- Bug in :func:`concat` which would randomly determine the order of the index along the common dimension (:issue:`17344`)
 
 Numeric
 ^^^^^^^
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 44cb36b8a32076..515a4010961205 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -629,3 +629,17 @@ def _random_state(state=None):
     else:
         raise ValueError("random_state must be an integer, a numpy "
                          "RandomState, or None")
+
+
+def _get_distinct_objs(objs):
+    """
+    Return a list with distinct elements of "objs" (different ids).
+    Preserves order.
+    """
+    ids = set()
+    res = []
+    for obj in objs:
+        if not id(obj) in ids:
+            ids.add(id(obj))
+            res.append(obj)
+    return res
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index db73a6878258ad..323d50166e7b6f 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -23,8 +23,7 @@
            'PeriodIndex', 'DatetimeIndex',
            '_new_Index', 'NaT',
            '_ensure_index', '_get_na_value', '_get_combined_index',
-           '_get_objs_combined_axis',
-           '_get_distinct_indexes', '_union_indexes',
+           '_get_objs_combined_axis', '_union_indexes',
            '_get_consensus_names',
            '_all_indexes_same']
 
@@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0):
 
 def _get_combined_index(indexes, intersect=False):
     # TODO: handle index names!
-    indexes = _get_distinct_indexes(indexes)
+    indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:
         return Index([])
     if len(indexes) == 1:
@@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False):
     return _ensure_index(union)
 
 
-def _get_distinct_indexes(indexes):
-    return list(dict((id(x), x) for x in indexes).values())
-
-
 def _union_indexes(indexes):
     if len(indexes) == 0:
         raise AssertionError('Must have at least 1 Index to union')
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 52cd18126859a1..6e646f9b294429 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -5,7 +5,7 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems
+from pandas.compat import StringIO, iteritems, PY2
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
@@ -1944,6 +1944,17 @@ def test_concat_categoricalindex(self):
                            index=exp_idx)
         tm.assert_frame_equal(result, exp)
 
+    def test_concat_order(self):
+        # GH 17344
+        dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
+        dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
+                for i in range(100)]
+        result = pd.concat(dfs).columns
+        expected = dfs[0].columns
+        if PY2:
+            expected = expected.sort_values()
+        tm.assert_index_equal(result, expected)
+
 
 @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
 @pytest.mark.parametrize('dt', np.sctypes['float'])