Skip to content

Commit

Permalink
add feedback to selectivityFuncs.py
Browse files Browse the repository at this point in the history
  • Loading branch information
armaan-abraham committed Jun 7, 2024
1 parent 3db9b0c commit 1e62f2b
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 14 deletions.
24 changes: 22 additions & 2 deletions bicytok/MBmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,50 @@


def getKxStar():
# Armaan: redefining this as a constant would make more sense.
return 2.24e-12


def cytBindingModel(
recCount: np.ndarray, recXaffs: np.ndarray, dose: float, vals: np.ndarray
):
# Armaan: update this docstring, particularly the Return section
"""Runs binding model for a given mutein, valency, dose, and cell type
Armaan: I think saying that the function runs the binding model for a
particular cell type is a bit misleading. It's more that the function runs
for a specific set of receptor abundances.
Args:
Armaan: Be very specific that the function only returns the number of
bound receptors corresponding to the first element of this `recCount`
array.
recCount: total count of signaling and targeting receptors
recXaffs: Ka for monomer ligand to receptors
Armaan: this argument's name could be more relevant.
recXaffs: Ka for monomer ligand to receptors
dose: ligand concentration/dose that is being modeled
Armaan: change this variable name to valencies. `vals` is often used as
an abbreviation for `values`.
vals: array of valencies of each ligand epitope
Return:
Armaan: This output description is outdated. Only the amount of the
first receptor bound is returned.
output: amount of receptor bound of each kind of receptor
"""
Kx = getKxStar()
# Armaan: Why 1e9? Again, it should be clear why literals are chosen.
# Additionally, are you using this value elsewhere? If so, define it in a
# separate file as a constant, and then import it.
ligandConc = dose / (vals[0][0] * 1e9)

output = polyc(
ligandConc,
Kx,
recCount,
vals,
# Only one complex, so its ratio to the overall number of complexes is
# 1.
np.array([1]),
recXaffs,
)[1][0, 0]
Expand Down
2 changes: 1 addition & 1 deletion bicytok/distanceMetricFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sklearn.neighbors import KernelDensity

from .imports import importCITE
from .selectivityFuncs import convFactCalc, calcReceptorAbundances
from .selectivityFuncs import calcReceptorAbundances, convFactCalc

path_here = dirname(dirname(__file__))

Expand Down
2 changes: 1 addition & 1 deletion bicytok/figures/figure1.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import seaborn as sns

from ..selectivityFuncs import (
get_cell_bindings,
calcReceptorAbundances,
get_cell_bindings,
optimizeSelectivityAffs,
)
from .common import getSetup
Expand Down
63 changes: 53 additions & 10 deletions bicytok/selectivityFuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ def calcReceptorAbundances(
CITE_DF = importCITE()
CITE_DF_new = CITE_DF[epitopes + [cellCat]]
CITE_DF_new = CITE_DF_new.loc[CITE_DF_new[cellCat].isin(cellList)]
# Armaan: Could you immediately rename the cellCat column (e.g. "Cell
# Type")? That way, I think you could avoid passing cellCat to other
# functions (e.g. optimizeSelectivityAffs, get_rec_vecs). But if you've used
# varying values of cellCat in other places before and envision it being
# used in the future, then of course feel free to keep it.

# Get conv factors, average them to use on epitopes with unlisted conv facts
meanConv = convFactCalc(CITE_DF).Weight.mean()
# Armaan: I think it would make more sense to move this confFactDict into
# confFactCalc, as it better falls under the responsibility of that
# function.
# convFactDict values calculated by convFactCalc
convFactDict = {"CD25": 77.136987, "CD122": 332.680090, "CD127": 594.379215}

Expand All @@ -49,6 +57,7 @@ def calcReceptorAbundances(


# Vectorization function for cytBindingModel
# Armaan: What does this function name mean? Could you improve it?
bispecOpt_Vec = np.vectorize(
cytBindingModel, excluded=["recXaffs", "vals"], signature="(n),()->()"
)
Expand All @@ -57,8 +66,11 @@ def calcReceptorAbundances(
def minOffTargSelec(
recXaffs: np.ndarray,
targRecs: pd.DataFrame,
# Armaan: Could you rename all occurrences of the use of "T" to refer to
# target to either "targ" or "target"?
offTRecs: pd.DataFrame,
dose: float,
# Armaan: rename to valencies
vals: np.ndarray,
):
"""Serves as the function which will have its return value minimized to get
Expand All @@ -82,11 +94,23 @@ def minOffTargSelec(
affs = get_affs(recXaffs)

targetBound = (
# Armaan: I don't think you need to sum here, as bispecOpt_Vec should
# just return the number of bound signaling receptor for the signal
# targeting cell. You may need a flatten() or reshape() though, which I
# think is the only purpose of the sum here.
np.sum(
bispecOpt_Vec(
recCount=targRecs.to_numpy(), recXaffs=affs, dose=dose, vals=vals
)
)
# Armaan: I believe that you can delete this denominator, as targRecs should
# always have one row consisting of the one target cell. Alternatively
# (this also pertains to my previous comment), if you want to make this
# code work for cases where there are multiple target cells (which I
# assume was once the case), then you could keep this here. However,
# there would need to be several other changes to the codebase to
# reflect this generality (e.g. in get_rec_vecs). I would recommend just
# keeping it specific and changing these lines for now.
/ targRecs.shape[0]
)
offTargetBound = (
Expand Down Expand Up @@ -158,6 +182,7 @@ def optimizeSelectivityAffs(
return optSelectivity, optAffs


# Armaan: Rename to calc_CITE_conv_factors or something?
def convFactCalc(CITE_DF: pd.DataFrame) -> pd.DataFrame:
"""Returns conversion factors by marker for converting CITEseq signal into abundance
Args:
Expand All @@ -167,6 +192,8 @@ def convFactCalc(CITE_DF: pd.DataFrame) -> pd.DataFrame:
weightDF: factor to convert unprocessed CITE-seq receptor values to
numeric receptor counts
"""
# Armaan: Could you rename cellToI to something more relevant? The "To"
# implies a mapping, but this is just a list.
cellToI = [
"CD4 TCM",
"CD8 Naive",
Expand All @@ -191,7 +218,6 @@ def convFactCalc(CITE_DF: pd.DataFrame) -> pd.DataFrame:
"Treg": "Treg",
}

markDict = {"CD25": "IL2Ra", "CD122": "IL2Rb", "CD127": "IL7Ra", "CD132": "gc"}

markers = ["CD122", "CD127", "CD25"]
markerDF = pd.DataFrame()
Expand All @@ -206,14 +232,23 @@ def convFactCalc(CITE_DF: pd.DataFrame) -> pd.DataFrame:
"Number": cellTDF.size,
}
)
markerDF = (
dftemp
if isinstance(markerDF, pd.DataFrame)
else pd.concat([markerDF, dftemp])
)

if markerDF is pd.DataFrame():
markerDF = dftemp
else:
markerDF = pd.concat([markerDF, dftemp])

markDict = {"CD25": "IL2Ra", "CD122": "IL2Rb", "CD127": "IL7Ra", "CD132": "gc"}
markerDF = markerDF.replace({"Marker": markDict, "Cell Type": cellDict})
markerDFw = pd.DataFrame()
# Armaan: You might need to step through the code to confirm this, but I
# believe that this loop can be simplified to a loop over the rows in
# markerDF, because there shouldn't be more than one row with the same cell
# type and marker, this wouldn't even make sense (correct me if I'm wrong
# though). This would allow you to avoid the use of unique(), the boolean &
# indexing, and the use of wAvg.
# Wait, actually, if my above statement is correct, then you can just delete
# this whole double for loop and calculate the average in the loop above.
for marker in markerDF.Marker.unique():
for cell in markerDF["Cell Type"].unique():
subDF = markerDF.loc[
Expand All @@ -234,6 +269,8 @@ def convFactCalc(CITE_DF: pd.DataFrame) -> pd.DataFrame:
recDF = importReceptors()
weightDF = None

# Armaan: If my comment above is correct, you can also move this logic into
# the initial loop over markerDF and delete this loop.
for rec in markerDFw.Marker.unique():
CITEval = np.array([])
Quantval = np.array([])
Expand Down Expand Up @@ -263,15 +300,14 @@ def convFactCalc(CITE_DF: pd.DataFrame) -> pd.DataFrame:
}
)

if weightDF is None:
weightDF = dftemp
else:
weightDF = pd.concat([weightDF, dftemp])
weightDF = dftemp if weightDF is None else pd.concat([weightDF, dftemp])

return weightDF


def get_rec_vecs(
# Armaan: Could you make this parameter name a bit more descriptive? The
# docstring is good, but would help to have the name be descriptive too.
df: pd.DataFrame,
targCell: str,
offTCells: list,
Expand All @@ -293,6 +329,8 @@ def get_rec_vecs(
countOffT: dataframe of receptor counts of off-target cell types,
no cell type naming column
"""
# Armaan: put column indexing first, then separate by rows into target and
# off target, so you don't need to repeat the column indexing.
dfTargCell = df.loc[df[cellCat] == targCell]
countTarg = dfTargCell[[signal] + targets + [cellCat]]

Expand All @@ -302,6 +340,11 @@ def get_rec_vecs(
return countTarg, countOffT


# Armaan: Can you refactor this and minOffTargSelec to use the same logic for
# infering the number of bound signaling receptors? These two function share a
# lot of the same functionality. One reason to avoid this is if this function is
# a lot slower, and you don't want to call it during optimization, but it
# doesn't seem obvious that it would be.
def get_cell_bindings(
df: np.ndarray,
signal: str,
Expand Down

0 comments on commit 1e62f2b

Please sign in to comment.