Skip to content

Commit

Permalink
Merge pull request #3649 from hoechenberger/master
Browse files Browse the repository at this point in the history
ENH: Allow for custom variable/value column names when melt()'ing
  • Loading branch information
jreback committed May 21, 2013
2 parents 79cda50 + f36d7a8 commit 4c12848
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 17 deletions.
5 changes: 4 additions & 1 deletion doc/source/reshaping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ Reshaping by Melt
The ``melt`` function found in ``pandas.core.reshape`` is useful to massage a
DataFrame into a format where one or more columns are identifier variables,
while all other columns, considered measured variables, are "pivoted" to the
row axis, leaving just two non-identifier columns, "variable" and "value".
row axis, leaving just two non-identifier columns, "variable" and "value". The
names of those columns can be customized by supplying the ``var_name`` and
``value_name`` parameters.

For instance,

Expand All @@ -212,6 +214,7 @@ For instance,
'weight' : [130, 150]})
cheese
melt(cheese, id_vars=['first', 'last'])
melt(cheese, id_vars=['first', 'last'], var_name='quantity')
Combining with stats and GroupBy
--------------------------------
Expand Down
3 changes: 3 additions & 0 deletions doc/source/v0.11.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ Enhancements
import os
os.remove('mi.csv')

- ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name``
to specify custom column names of the returned DataFrame.

Bug Fixes
~~~~~~~~~

Expand Down
30 changes: 20 additions & 10 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,16 +600,19 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
return result


def melt(frame, id_vars=None, value_vars=None):
def melt(frame, id_vars=None, value_vars=None,
var_name='variable', value_name='value'):
"""
"Unpivots" a DataFrame from wide format to long format, optionally leaving
id variables set
Parameters
----------
frame : DataFrame
id_vars :
value_vars :
id_vars : tuple, list, or ndarray
value_vars : tuple, list, or ndarray
var_name : scalar
value_name : scalar
Examples
--------
Expand All @@ -621,9 +624,16 @@ def melt(frame, id_vars=None, value_vars=None):
>>> melt(df, id_vars=['A'], value_vars=['B'])
A variable value
a B 1
b B 3
c B 5
a B 1
b B 3
c B 5
>>> melt(df, id_vars=['A'], value_vars=['B'],
... var_name='myVarname', value_name='myValname')
A myVarname myValname
a B 1
b B 3
c B 5
"""
# TODO: what about the existing index?
if id_vars is not None:
Expand All @@ -648,11 +658,11 @@ def melt(frame, id_vars=None, value_vars=None):
for col in id_vars:
mdata[col] = np.tile(frame.pop(col).values, K)

mcolumns = id_vars + ['variable', 'value']
mcolumns = id_vars + [var_name, value_name]

mdata['value'] = frame.values.ravel('F')

mdata['variable'] = np.asarray(frame.columns).repeat(N)
mdata[value_name] = frame.values.ravel('F')
mdata[var_name] = np.asarray(frame.columns).repeat(N)

return DataFrame(mdata, columns=mcolumns)


Expand Down
71 changes: 65 additions & 6 deletions pandas/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,73 @@ def test_melt():
df['id1'] = (df['A'] > 0).astype(int)
df['id2'] = (df['B'] > 0).astype(int)

molten1 = melt(df)
molten2 = melt(df, id_vars=['id1'])
molten3 = melt(df, id_vars=['id1', 'id2'])
molten4 = melt(df, id_vars=['id1', 'id2'],
var_name = 'var'
value_name = 'val'

# Default column names
result = melt(df)
result1 = melt(df, id_vars=['id1'])
result2 = melt(df, id_vars=['id1', 'id2'])
result3 = melt(df, id_vars=['id1', 'id2'],
value_vars='A')
molten5 = melt(df, id_vars=['id1', 'id2'],
result4 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'])


expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
'value': df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', 'variable', 'value'])
tm.assert_frame_equal(result4, expected4)

# Supply custom name for the 'variable' column
result5 = melt(df, var_name=var_name)
result6 = melt(df, id_vars=['id1'], var_name=var_name)
result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
result8 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=var_name)
result9 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=var_name)

expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
'value': df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', var_name, 'value'])
tm.assert_frame_equal(result9, expected9)

# Supply custom name for the 'value' column
result10 = melt(df, value_name=value_name)
result11 = melt(df, id_vars=['id1'], value_name=value_name)
result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
result13 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', value_name=value_name)
result14 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], value_name=value_name)

expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
value_name: df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', 'variable', value_name])
tm.assert_frame_equal(result14, expected14)

# Supply custom names for the 'variable' and 'value' columns
result15 = melt(df, var_name=var_name, value_name=value_name)
result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
result17 = melt(df, id_vars=['id1', 'id2'],
var_name=var_name, value_name=value_name)
result18 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=var_name, value_name=value_name)
result19 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=var_name, value_name=value_name)

expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
value_name: df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', var_name, value_name])
tm.assert_frame_equal(result19, expected19)

def test_convert_dummies():
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
Expand Down

0 comments on commit 4c12848

Please sign in to comment.