From 7158982c6f3aae3520bac6af652cf05b5e54ec0b Mon Sep 17 00:00:00 2001
From: cklamann <12862284+cklamann@users.noreply.github.com>
Date: Fri, 13 Sep 2024 15:07:27 -0400
Subject: [PATCH 1/2] add info about input file, fix spelling

---
 docs/source/tutorial/getting-started.ipynb | 39 ++++++++++++----------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/docs/source/tutorial/getting-started.ipynb b/docs/source/tutorial/getting-started.ipynb
index 73be150..53948ab 100644
--- a/docs/source/tutorial/getting-started.ipynb
+++ b/docs/source/tutorial/getting-started.ipynb
@@ -50,7 +50,6 @@
    },
    "outputs": [],
    "source": [
-    "# pl.utilities.seed.seed_everything(10, workers=True)\n",
     "seed_everything(10, workers=True)"
    ]
   },
@@ -67,7 +66,7 @@
    "id": "69ef8c1d",
    "metadata": {},
    "source": [
-    "The example below runs Kmeans with 10 clusters read from \"sample_input.h5ad\" object.\n"
+    "The example below runs Kmeans with 10 clusters read from \"sample_input.h5ad\" object."
    ]
   },
   {
@@ -91,8 +90,9 @@
    "id": "52d3d9fb",
    "metadata": {},
    "source": [
-    "- Users might want to arcsinh protein expressions in \\*.h5ad (for example, 'sample_input.h5ad').\n",
-    "- The utility.py provides an easy setup of GMM, KM (Kmeans) or PG (PhenoGraph).\n",
+    "- The input anndata object should contain a cell-by-protein matrix of segmented single-cell expression profiles in the `.X` position. Optionally, cell size information can also be provided as a column of the `.obs` DataFrame. In this case `model_cell_size` should be set to `True` and the column specified in the `cell_size_col_name`argument.\n",
+    "- Users might want to arcsinh protein expressions in \\*.h5ad (for example, `sample_input.h5ad`).\n",
+    "- The `utility.py` provides an easy setup of GMM, KM (Kmeans) or PG (PhenoGraph).\n",
     "- Default settings are applied to each method.\n",
     "- k can be omitted when PG is used.\n"
    ]
@@ -132,7 +132,7 @@
     "\n",
     "- adata: annDATA object of the sample\n",
     "- dist_option (default: 'T'): T for Student-T (df=2) and N for Normal (Gaussian)\n",
-    "- the proportion of anticipated segmentation error free cells (default: 0.6)\n",
+    "- singlet_prop (default: 0.6): the proportion of anticipated segmentation error free cells \n",
     "- model_cell_size (default: 'Y'): Y for incoporating cell size in the model and N otherwise\n",
     "- cell_size_col_name (default: 'area'): area is the column name in anndata.obs dataframe\n",
     "- model_zplane_overlap (default: 'Y'): Y for modeling z-plane overlap when cell size is modelled and N otherwise\n",
@@ -140,8 +140,10 @@
     "- model_regularizer (default: 1): Regularizier term impose on synthetic doublet loss (BCE)\n",
     "- learning_rate (default: 1e-3): The learning rate of ADAM optimizer for STARLING\n",
     "\n",
-    "Equivalent as the above example:\n",
-    "st = starling.ST(adata, 'T', 'Y', 'area', 'Y', 1, 1e-3)\n"
+    "Equivalent to the above example:\n",
+    "```python\n",
+    "st = starling.ST(adata, 'T', 'Y', 'area', 'Y', 1, 1e-3)\n",
+    "```\n"
    ]
   },
   {
@@ -149,7 +151,7 @@
    "id": "63939215",
    "metadata": {},
    "source": [
-    "## Setting trainning log\n"
+    "## Setting training log\n"
    ]
   },
   {
@@ -157,7 +159,7 @@
    "id": "d721258f",
    "metadata": {},
    "source": [
-    "Once training starts, a new directory 'log' will created.\n"
+    "Once training starts, a new directory 'log' will be created."
    ]
   },
   {
@@ -237,7 +239,7 @@
    "id": "3ba887b2",
    "metadata": {},
    "source": [
-    "## Appending STARLING results to annData object\n"
+    "## Appending STARLING results to the annData object\n"
    ]
   },
   {
@@ -258,7 +260,7 @@
    "id": "a705d895",
    "metadata": {},
    "source": [
-    "## The following information can be retrived from annData object:\n",
+    "## The following information can be retrived from the annData object:\n",
     "\n",
     "- st.adata.varm['init_exp_centroids'] -- initial expression cluster centroids (P x C matrix)\n",
     "- st.adata.varm['st_exp_centroids'] -- ST expression cluster centroids (P x C matrix)\n",
@@ -271,7 +273,8 @@
     "- st.adata.obs['init_label'] -- initial assignments\n",
     "- st.adata.obs['st_label'] -- ST assignments\n",
     "- st.adata.obs['max_assign_prob'] -- ST max probabilites of assignments\n",
-    "  - N: # of cells; C: # of clusters; P: # of proteins\n"
+    "\n",
+    "_N: # of cells; C: # of clusters; P: # of proteins_\n"
    ]
   },
   {
@@ -354,7 +357,7 @@
    "id": "80e61208",
    "metadata": {},
    "source": [
-    "## Showing initial expression centriods:\n"
+    "## Showing initial expression centroids:\n"
    ]
   },
   {
@@ -364,7 +367,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## initial expression centriods (p x c) matrix\n",
+    "## initial expression centroids (p x c) matrix\n",
     "pd.DataFrame(result.varm[\"init_exp_centroids\"], index=result.var_names)"
    ]
   },
@@ -381,7 +384,7 @@
    "id": "f0bc41a8",
    "metadata": {},
    "source": [
-    "## Showing Starling expression centriods:\n"
+    "## Showing Starling expression centroids:\n"
    ]
   },
   {
@@ -391,7 +394,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## starling expression centriods (p x c) matrix\n",
+    "## starling expression centroids (p x c) matrix\n",
     "pd.DataFrame(result.varm[\"st_exp_centroids\"], index=result.var_names)"
    ]
   },
@@ -400,7 +403,7 @@
    "id": "a2cccf9d",
    "metadata": {},
    "source": [
-    "From here one could easily annotate cluster centriods to cell type.\n"
+    "From here one could easily annotate cluster centroids to cell type.\n"
    ]
   },
   {
@@ -427,7 +430,7 @@
    "id": "b203933c",
    "metadata": {},
    "source": [
-    "Currently, we assign a cell label based on the maximum probability among all possible clusters. However, there could be mislabeled because maximum and second highest probabilies can be very close that the user might be interested.\n"
+    "Currently, we assign a cell label based on the maximum probability among all possible clusters. However, these could be mislabeled because maximum and second highest probabilies can be very close."
    ]
   }
  ],

From 64d3d3dbabb917beee24a1c4cba08eb7dd79e1d5 Mon Sep 17 00:00:00 2001
From: cklamann <12862284+cklamann@users.noreply.github.com>
Date: Fri, 13 Sep 2024 15:08:55 -0400
Subject: [PATCH 2/2] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a70ba5c..9ed3c10 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ A **tutorial** outlining basic usage is available [here][tutorial].
 
 ## Requirements
 
-Python 3.9 or 3.10 are required to run starling. If your current version of python is not one of these, we recommend using [pyenv](https://github.com/pyenv/pyenv) to install a compatible version alongside your current one. Alternately, you could use the Docker configuration described below.
+Python 3.9 or above is required to run starling. If your current version of python is not one of these, we recommend using [pyenv](https://github.com/pyenv/pyenv) to install a compatible version alongside your current one. Alternately, you could use the Docker configuration described below.
 
 ## Installation