Skip to content

Commit

Permalink
Remove unneeded expression matrix code (#1078)
Browse files Browse the repository at this point in the history
* remove code from IntensityTable.to_expression_matrix that references regions object produced by deleted code
  • Loading branch information
ambrosejcarr authored Apr 2, 2019
1 parent b909295 commit 8210145
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 26 deletions.
62 changes: 62 additions & 0 deletions notebooks/ISS_Pipeline_-_Breast_-_1_FOV.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,68 @@
"seg.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Assign spots to cells and create cell x gene count matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from starfish.spots import TargetAssignment\n",
"al = TargetAssignment.Label()\n",
"labeled = al.run(label_image, decoded)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from starfish.expression_matrix.expression_matrix import ExpressionMatrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cg = labeled.to_expression_matrix()\n",
"cg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the (x, y) centroids of segmented cells in small cyan dots. Plot cells expressing VIM in blue, and cells expressing HER2 in red. Compare with the following plot of the displayed _spots_ below. This demonstrates that (1) the expression matrix is being properly created but (2) many of the spots are occuring outside segmented cells, suggesting that the segmentation may be too restrictive."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Test data is from a region of the tissue slice containing cancer cells, there is no VIM.\n",
"if not use_test_data:\n",
" vim_mask = cg.loc[:, 'VIM'] > 0\n",
" her2_mask = cg.loc[:, 'HER2'] > 0\n",
" plt.scatter(cg['x'], -cg['y'], s=5, c='c')\n",
" plt.scatter(cg['x'][vim_mask], -cg['y'][vim_mask], s=12, c='b')\n",
" plt.scatter(cg['x'][her2_mask], -cg['y'][her2_mask], s=12, c='r')\n",
"else:\n",
" assert np.sum(cg.loc[:, 'HER2'] > 0) > 0\n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
36 changes: 36 additions & 0 deletions notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,42 @@
seg.show()
# EPY: END code

# EPY: START markdown
#### Assign spots to cells and create cell x gene count matrix
# EPY: END markdown

# EPY: START code
from starfish.spots import TargetAssignment
al = TargetAssignment.Label()
labeled = al.run(label_image, decoded)
# EPY: END code

# EPY: START code
from starfish.expression_matrix.expression_matrix import ExpressionMatrix
# EPY: END code

# EPY: START code
cg = labeled.to_expression_matrix()
cg
# EPY: END code

# EPY: START markdown
#Plot the (x, y) centroids of segmented cells in small cyan dots. Plot cells expressing VIM in blue, and cells expressing HER2 in red. Compare with the following plot of the displayed _spots_ below. This demonstrates that (1) the expression matrix is being properly created but (2) many of the spots are occuring outside segmented cells, suggesting that the segmentation may be too restrictive.
# EPY: END markdown

# EPY: START code
#Test data is from a region of the tissue slice containing cancer cells, there is no VIM.
if not use_test_data:
vim_mask = cg.loc[:, 'VIM'] > 0
her2_mask = cg.loc[:, 'HER2'] > 0
plt.scatter(cg['x'], -cg['y'], s=5, c='c')
plt.scatter(cg['x'][vim_mask], -cg['y'][vim_mask], s=12, c='b')
plt.scatter(cg['x'][her2_mask], -cg['y'][her2_mask], s=12, c='r')
else:
assert np.sum(cg.loc[:, 'HER2'] > 0) > 0

# EPY: END code

# EPY: START markdown
#### Visualize results
#
Expand Down
45 changes: 19 additions & 26 deletions starfish/intensity_table/intensity_table.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from itertools import product
from json import loads
from typing import Dict, List, Optional, Union
from typing import Dict, List, Union

import numpy as np
import pandas as pd
import regional
import xarray as xr

from starfish.expression_matrix.expression_matrix import ExpressionMatrix
Expand Down Expand Up @@ -419,41 +418,35 @@ def to_decoded_spots(self) -> DecodedSpots:
df = df.drop(pixel_coordinates.intersection(df.columns), axis=1).drop(Features.AXIS, axis=1)
return DecodedSpots(df)

def to_expression_matrix(self, regions: Optional[regional.many]=None) -> ExpressionMatrix:
def to_expression_matrix(self) -> ExpressionMatrix:
"""Generates a cell x gene count matrix where each cell is annotated with spatial metadata
Parameters
----------
regions: Optional[regional.Many]
cell segmentation results that were used to assign points to cells. If not provided, the
centers of the cells will be estimated by taking the midpoint between the extreme-valued
spots on each axis.
Requires that spots in the IntensityTable have been assigned to cells.
Returns
-------
ExpressionMatrix :
cell x gene expression table
"""
try:
grouped = self.to_features_dataframe().groupby(['cell_id', 'target'])
except KeyError as e:
if "cell_id" in str(e):
raise RuntimeError(
"IntensityTable must have 'cell_id' assignments for each cell before "
"this function can be called. See starfish.TargetAssignment.Label."
)
else:
raise

# create the 2-d counts matrix
grouped = self.to_features_dataframe().groupby(['cell_id', 'target'])
counts = grouped.count().iloc[:, 0].unstack().fillna(0)

if regions:
# counts.index stores cell_id, extract cell information from the regional.many object
metadata = {
"area": ("cells", [regions[id_].area for id_ in counts.index]),
"x": ("cells", [regions[id_].center[0] for id_ in counts.index]),
"y": ("cells", [regions[id_].center[1] for id_ in counts.index]),
"z": ("cells", np.zeros(counts.shape[0]))
}
else:
grouped = self.to_features_dataframe().groupby(['cell_id'])[['x', 'y', 'z']]
min_ = grouped.min()
max_ = grouped.max()
coordinate_df = min_ + (max_ - min_) / 2
metadata = {name: ("cells", data.values) for name, data in coordinate_df.items()}
metadata['area'] = ("cells", np.full(counts.shape[0], fill_value=np.nan))
grouped = self.to_features_dataframe().groupby(['cell_id'])[['x', 'y', 'z']]
min_ = grouped.min()
max_ = grouped.max()
coordinate_df = min_ + (max_ - min_) / 2
metadata = {name: ("cells", data.values) for name, data in coordinate_df.items()}
metadata['area'] = ("cells", np.full(counts.shape[0], fill_value=np.nan))

# add genes to the metadata
metadata.update({"genes": counts.columns.values})
Expand Down

2 comments on commit 8210145

@ttung
Copy link
Collaborator

@ttung ttung commented on 8210145 Apr 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ambrosejcarr this commit looks like it has a lot more than what your commit message suggests. was there a mixup?

@ambrosejcarr
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, commit message was missing a piece: I also added an example using the relevant code to the ISS vignette.

Otherwise, it's accurate.

Please sign in to comment.