Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Allow for custom variable/value column names when melt()'ing #3649

Merged
merged 3 commits into from
May 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion doc/source/reshaping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ Reshaping by Melt
The ``melt`` function found in ``pandas.core.reshape`` is useful to massage a
DataFrame into a format where one or more columns are identifier variables,
while all other columns, considered measured variables, are "pivoted" to the
row axis, leaving just two non-identifier columns, "variable" and "value".
row axis, leaving just two non-identifier columns, "variable" and "value". The
names of those columns can be customized by supplying the ``var_name`` and
``value_name`` parameters.

For instance,

Expand All @@ -212,6 +214,7 @@ For instance,
'weight' : [130, 150]})
cheese
melt(cheese, id_vars=['first', 'last'])
melt(cheese, id_vars=['first', 'last'], var_name='quantity')

Combining with stats and GroupBy
--------------------------------
Expand Down
3 changes: 3 additions & 0 deletions doc/source/v0.11.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ Enhancements
import os
os.remove('mi.csv')

- ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name``
to specify custom column names of the returned DataFrame.

Bug Fixes
~~~~~~~~~

Expand Down
30 changes: 20 additions & 10 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,16 +600,19 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
return result


def melt(frame, id_vars=None, value_vars=None):
def melt(frame, id_vars=None, value_vars=None,
var_name='variable', value_name='value'):
"""
"Unpivots" a DataFrame from wide format to long format, optionally leaving
id variables set

Parameters
----------
frame : DataFrame
id_vars :
value_vars :
id_vars : tuple, list, or ndarray
value_vars : tuple, list, or ndarray
var_name : scalar
value_name : scalar

Examples
--------
Expand All @@ -621,9 +624,16 @@ def melt(frame, id_vars=None, value_vars=None):

>>> melt(df, id_vars=['A'], value_vars=['B'])
A variable value
a B 1
b B 3
c B 5
a B 1
b B 3
c B 5

>>> melt(df, id_vars=['A'], value_vars=['B'],
... var_name='myVarname', value_name='myValname')
A myVarname myValname
a B 1
b B 3
c B 5
"""
# TODO: what about the existing index?
if id_vars is not None:
Expand All @@ -648,11 +658,11 @@ def melt(frame, id_vars=None, value_vars=None):
for col in id_vars:
mdata[col] = np.tile(frame.pop(col).values, K)

mcolumns = id_vars + ['variable', 'value']
mcolumns = id_vars + [var_name, value_name]

mdata['value'] = frame.values.ravel('F')

mdata['variable'] = np.asarray(frame.columns).repeat(N)
mdata[value_name] = frame.values.ravel('F')
mdata[var_name] = np.asarray(frame.columns).repeat(N)

return DataFrame(mdata, columns=mcolumns)


Expand Down
71 changes: 65 additions & 6 deletions pandas/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,73 @@ def test_melt():
df['id1'] = (df['A'] > 0).astype(int)
df['id2'] = (df['B'] > 0).astype(int)

molten1 = melt(df)
molten2 = melt(df, id_vars=['id1'])
molten3 = melt(df, id_vars=['id1', 'id2'])
molten4 = melt(df, id_vars=['id1', 'id2'],
var_name = 'var'
value_name = 'val'

# Default column names
result = melt(df)
result1 = melt(df, id_vars=['id1'])
result2 = melt(df, id_vars=['id1', 'id2'])
result3 = melt(df, id_vars=['id1', 'id2'],
value_vars='A')
molten5 = melt(df, id_vars=['id1', 'id2'],
result4 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'])


expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
'value': df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', 'variable', 'value'])
tm.assert_frame_equal(result4, expected4)

# Supply custom name for the 'variable' column
result5 = melt(df, var_name=var_name)
result6 = melt(df, id_vars=['id1'], var_name=var_name)
result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
result8 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=var_name)
result9 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=var_name)

expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
'value': df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', var_name, 'value'])
tm.assert_frame_equal(result9, expected9)

# Supply custom name for the 'value' column
result10 = melt(df, value_name=value_name)
result11 = melt(df, id_vars=['id1'], value_name=value_name)
result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
result13 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', value_name=value_name)
result14 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], value_name=value_name)

expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
'variable': ['A']*10 + ['B']*10,
value_name: df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', 'variable', value_name])
tm.assert_frame_equal(result14, expected14)

# Supply custom names for the 'variable' and 'value' columns
result15 = melt(df, var_name=var_name, value_name=value_name)
result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
result17 = melt(df, id_vars=['id1', 'id2'],
var_name=var_name, value_name=value_name)
result18 = melt(df, id_vars=['id1', 'id2'],
value_vars='A', var_name=var_name, value_name=value_name)
result19 = melt(df, id_vars=['id1', 'id2'],
value_vars=['A', 'B'], var_name=var_name, value_name=value_name)

expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
'id2': df['id2'].tolist() * 2,
var_name: ['A']*10 + ['B']*10,
value_name: df['A'].tolist() + df['B'].tolist()},
columns=['id1', 'id2', var_name, value_name])
tm.assert_frame_equal(result19, expected19)

def test_convert_dummies():
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
Expand Down