Remove unneeded expression matrix code (#1078)

* remove code from IntensityTable.to_expression_matrix that references regions object produced by deleted code
spacetx · Apr 2, 2019 · 8210145 · 8210145 · ttung · Apr 2, 2019
1 parent b909295
commit 8210145
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 26 deletions.
diff --git a/notebooks/ISS_Pipeline_-_Breast_-_1_FOV.ipynb b/notebooks/ISS_Pipeline_-_Breast_-_1_FOV.ipynb
@@ -378,6 +378,68 @@
     "seg.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Assign spots to cells and create cell x gene count matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from starfish.spots import TargetAssignment\n",
+    "al = TargetAssignment.Label()\n",
+    "labeled = al.run(label_image, decoded)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from starfish.expression_matrix.expression_matrix import ExpressionMatrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cg = labeled.to_expression_matrix()\n",
+    "cg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot the (x, y) centroids of segmented cells in small cyan dots. Plot cells expressing VIM in blue, and cells expressing HER2 in red. Compare with the following plot of the displayed _spots_ below. This demonstrates that (1) the expression matrix is being properly created but (2) many of the spots are occuring outside segmented cells, suggesting that the segmentation may be too restrictive."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Test data is from a region of the tissue slice containing cancer cells, there is no VIM.\n",
+    "if not use_test_data:\n",
+    "    vim_mask = cg.loc[:, 'VIM'] > 0\n",
+    "    her2_mask = cg.loc[:, 'HER2'] > 0\n",
+    "    plt.scatter(cg['x'], -cg['y'], s=5, c='c')\n",
+    "    plt.scatter(cg['x'][vim_mask], -cg['y'][vim_mask], s=12, c='b')\n",
+    "    plt.scatter(cg['x'][her2_mask], -cg['y'][her2_mask], s=12, c='r')\n",
+    "else:\n",
+    "    assert np.sum(cg.loc[:, 'HER2'] > 0) > 0\n",
+    ""
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py b/notebooks/py/ISS_Pipeline_-_Breast_-_1_FOV.py
@@ -245,6 +245,42 @@
 seg.show()
 # EPY: END code
 
+# EPY: START markdown
+#### Assign spots to cells and create cell x gene count matrix
+# EPY: END markdown
+
+# EPY: START code
+from starfish.spots import TargetAssignment
+al = TargetAssignment.Label()
+labeled = al.run(label_image, decoded)
+# EPY: END code
+
+# EPY: START code
+from starfish.expression_matrix.expression_matrix import ExpressionMatrix
+# EPY: END code
+
+# EPY: START code
+cg = labeled.to_expression_matrix()
+cg
+# EPY: END code
+
+# EPY: START markdown
+#Plot the (x, y) centroids of segmented cells in small cyan dots. Plot cells expressing VIM in blue, and cells expressing HER2 in red. Compare with the following plot of the displayed _spots_ below. This demonstrates that (1) the expression matrix is being properly created but (2) many of the spots are occuring outside segmented cells, suggesting that the segmentation may be too restrictive.
+# EPY: END markdown
+
+# EPY: START code
+#Test data is from a region of the tissue slice containing cancer cells, there is no VIM.
+if not use_test_data:
+    vim_mask = cg.loc[:, 'VIM'] > 0
+    her2_mask = cg.loc[:, 'HER2'] > 0
+    plt.scatter(cg['x'], -cg['y'], s=5, c='c')
+    plt.scatter(cg['x'][vim_mask], -cg['y'][vim_mask], s=12, c='b')
+    plt.scatter(cg['x'][her2_mask], -cg['y'][her2_mask], s=12, c='r')
+else:
+    assert np.sum(cg.loc[:, 'HER2'] > 0) > 0
+
+# EPY: END code
+
 # EPY: START markdown
 #### Visualize results
 #

diff --git a/starfish/intensity_table/intensity_table.py b/starfish/intensity_table/intensity_table.py
@@ -1,10 +1,9 @@
 from itertools import product
 from json import loads
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Union
 
 import numpy as np
 import pandas as pd
-import regional
 import xarray as xr
 
 from starfish.expression_matrix.expression_matrix import ExpressionMatrix
@@ -419,41 +418,35 @@ def to_decoded_spots(self) -> DecodedSpots:
         df = df.drop(pixel_coordinates.intersection(df.columns), axis=1).drop(Features.AXIS, axis=1)
         return DecodedSpots(df)
 
-    def to_expression_matrix(self, regions: Optional[regional.many]=None) -> ExpressionMatrix:
+    def to_expression_matrix(self) -> ExpressionMatrix:
         """Generates a cell x gene count matrix where each cell is annotated with spatial metadata
 
-        Parameters
-        ----------
-        regions: Optional[regional.Many]
-            cell segmentation results that were used to assign points to cells. If not provided, the
-            centers of the cells will be estimated by taking the midpoint between the extreme-valued
-            spots on each axis.
+        Requires that spots in the IntensityTable have been assigned to cells.
 
         Returns
         -------
         ExpressionMatrix :
             cell x gene expression table
         """
+        try:
+            grouped = self.to_features_dataframe().groupby(['cell_id', 'target'])
+        except KeyError as e:
+            if "cell_id" in str(e):
+                raise RuntimeError(
+                    "IntensityTable must have 'cell_id' assignments for each cell before "
+                    "this function can be called. See starfish.TargetAssignment.Label."
+                )
+            else:
+                raise
 
-        # create the 2-d counts matrix
-        grouped = self.to_features_dataframe().groupby(['cell_id', 'target'])
         counts = grouped.count().iloc[:, 0].unstack().fillna(0)
 
-        if regions:
-            # counts.index stores cell_id, extract cell information from the regional.many object
-            metadata = {
-                "area": ("cells", [regions[id_].area for id_ in counts.index]),
-                "x": ("cells", [regions[id_].center[0] for id_ in counts.index]),
-                "y": ("cells", [regions[id_].center[1] for id_ in counts.index]),
-                "z": ("cells", np.zeros(counts.shape[0]))
-            }
-        else:
-            grouped = self.to_features_dataframe().groupby(['cell_id'])[['x', 'y', 'z']]
-            min_ = grouped.min()
-            max_ = grouped.max()
-            coordinate_df = min_ + (max_ - min_) / 2
-            metadata = {name: ("cells", data.values) for name, data in coordinate_df.items()}
-            metadata['area'] = ("cells", np.full(counts.shape[0], fill_value=np.nan))
+        grouped = self.to_features_dataframe().groupby(['cell_id'])[['x', 'y', 'z']]
+        min_ = grouped.min()
+        max_ = grouped.max()
+        coordinate_df = min_ + (max_ - min_) / 2
+        metadata = {name: ("cells", data.values) for name, data in coordinate_df.items()}
+        metadata['area'] = ("cells", np.full(counts.shape[0], fill_value=np.nan))
 
         # add genes to the metadata
         metadata.update({"genes": counts.columns.values})