Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
jkobject committed Nov 18, 2024
2 parents e3eb39c + ccf28da commit 97cd5f1
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions scdataloader/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(
filter_gene_by_counts: Union[int, bool] = False,
filter_cell_by_counts: Union[int, bool] = False,
normalize_sum: float = 1e4,
subset_hvg: int = 0,
n_hvg_for_postp: int = 0,
use_layer: Optional[str] = None,
is_symbol: bool = False,
hvg_flavor: str = "seurat_v3",
Expand Down Expand Up @@ -65,7 +65,7 @@ def __init__(
Defaults to 1e4.
log1p (bool, optional): Determines whether to apply log1p transform to the normalized data.
Defaults to True.
subset_hvg (int or bool, optional): Determines whether to subset highly variable genes.
n_hvg_for_postp (int or bool, optional): Determines whether to subset to highly variable genes for the PCA.
Defaults to False.
hvg_flavor (str, optional): Specifies the flavor of highly variable genes selection.
See :func:`scanpy.pp.highly_variable_genes` for more details. Defaults to "seurat_v3".
Expand Down Expand Up @@ -310,13 +310,15 @@ def __call__(self, adata) -> AnnData:
)["X"]
)
# step 5: subset hvg
if self.subset_hvg:
if self.n_hvg_for_postp:
sc.pp.highly_variable_genes(
adata,
n_top_genes=self.subset_hvg,
n_top_genes=self.n_hvg_for_postp,
batch_key=self.batch_key,
flavor=self.hvg_flavor,
subset=False,
subset=True,
layer="norm",

)
sc.pp.log1p(adata, layer="norm")
sc.pp.pca(
Expand Down

0 comments on commit 97cd5f1

Please sign in to comment.