Skip to content

Commit

Permalink
Fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Jul 11, 2012
2 parents 2ea9339 + f09b6a6 commit d2656ba
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 0 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pandas 0.8.1
**New features**

- Can pass dict of per-column line styles to DataFrame.plot (#1559)
- Add new ``bootstrap_plot`` function

**Improvements to existing features**

Expand All @@ -43,6 +44,7 @@ pandas 0.8.1
- Fix resampling bug to lower case daily frequency (#1588)
- Fix kendall/spearman DataFrame.corr bug with no overlap (#1595)
- Fix bug in DataFrame.set_index (#1592)
- Don't ignore axes in boxplot if by specified (#1565)

pandas 0.8.0
============
Expand Down
21 changes: 21 additions & 0 deletions doc/source/visualization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,27 @@ of the same class will usually be closer together and form larger structures.
@savefig andrews_curves.png width=6in
andrews_curves(data, 'Name')
Parallel Coordinates
~~~~~~~~~~~~~~~~~~~~

Parallel coordinates is a plotting technique for plotting multivariate data.
It allows one to see clusters in data and to estimate other statistics visually.
Using parallel coordinates points are represented as connected line segments.
Each vertical line represents one attribute. One set of connected line segments
represents one data point. Points that tend to cluster will appear closer together.

.. ipython:: python
from pandas import read_csv
from pandas.tools.plotting import parallel_coordinates
data = read_csv('data/iris.data')
plt.figure()
@savefig parallel_coordinates.png width=6in
parallel_coordinates(data, 'Name')
Lag Plot
~~~~~~~~

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,14 @@ def test_andrews_curves(self):
df = read_csv(path)
_check_plot_works(andrews_curves, df, 'Name')

@slow
def test_parallel_coordinates(self):
from pandas import read_csv
from pandas.tools.plotting import parallel_coordinates
path = os.path.join(curpath(), 'data/iris.csv')
df = read_csv(path)
_check_plot_works(parallel_coordinates, df, 'Name')

@slow
def test_plot_int_columns(self):
df = DataFrame(np.random.randn(100, 4)).cumsum()
Expand Down
47 changes: 47 additions & 0 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,53 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
plt.setp(axis.get_yticklabels(), fontsize=8)
return fig

def parallel_coordinates(data, class_column, cols=None, ax=None, **kwds):
"""Parallel coordinates plotting.
Parameters:
-----------
data: A DataFrame containing data to be plotted
class_column: Column name containing class names
cols: A list of column names to use, optional
ax: matplotlib axis object, optional
kwds: A list of keywords for matplotlib plot method
Returns:
--------
ax: matplotlib axis object
"""
import matplotlib.pyplot as plt
import random
def random_color(column):
random.seed(column)
return [random.random() for _ in range(3)]
n = len(data)
classes = set(data[class_column])
class_col = data[class_column]
if cols == None:
columns = [data[col] for col in data.columns if (col != class_column)]
else:
columns = [data[col] for col in cols]
used_legends = set([])
x = range(len(columns))
if ax == None:
ax = plt.gca()
for i in range(n):
row = [columns[c][i] for c in range(len(columns))]
y = row
label = None
if str(class_col[i]) not in used_legends:
label = str(class_col[i])
used_legends.add(label)
ax.plot(x, y, color=random_color(class_col[i]), label=label, **kwds)
for i, col in enumerate(columns):
ax.axvline(i, linewidth=1, color='black')
ax.set_xticks(range(len(columns)))
ax.set_xticklabels([col for col in data.columns if col != class_column])
ax.legend(loc='upper right')
ax.grid()
return ax

def lag_plot(series, ax=None, **kwds):
"""Lag plot for time series.
Expand Down

0 comments on commit d2656ba

Please sign in to comment.