Skip to content

Commit

Permalink
seascapes built
Browse files Browse the repository at this point in the history
  • Loading branch information
DavisWeaver committed May 14, 2024
1 parent 27946d3 commit 30470f4
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 4 deletions.
66 changes: 64 additions & 2 deletions evodm/data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pandas as pd
import os

def clean_seascapes():
drugs = ['paclitaxel', 'gefitinib', 'osimertinib', 'savolatinib']
file = '../../../evodm_cancer/data/combined_'

def process_genotype(Y):
genotypes = []
for y in Y:
Expand All @@ -24,16 +25,37 @@ def process_genotype(Y):
return genotypes

def compute_fitness(df):
#Function to compute the fitness of each genotype at each concentration in a given drug relative to the wild type.
ref = df[df['genotype'] == '0000']
ref = ref.rename(columns = {'luminescence': 'ref_luminescence'}).drop(columns = ['genotype'])
df = df.merge(ref, on='concentration')
df['fitness'] = df['luminescence'] / df['ref_luminescence']
return df

def rank_conc(df):
#Function to add the concentration rank to the dataframe to use instead of absolute concentration.
concs = pd.unique(df['concentration'])
concs.sort() #make sure they are in ascending order.
ranks = [i + 1 for i in range(len(concs))]
#create a dataframe with just the unique concentration values for a given drug
df_ranks = pd.DataFrame(concs)
df_ranks['rank'] = ranks
df_ranks.columns = ['concentration', 'conc_rank']
#merge the rank dataframe with the original dataframe
df = df.merge(df_ranks, on='concentration')
return df

dfs = []


file = '../../../evodm_cancer/data/combined_'

for drug in drugs:
filed = file + drug + '.xlsx'
df = pd.read_excel(filed)
try:
df = pd.read_excel(filed)
except:
df = pd.read_csv(filed.replace("../../../", "../../"))
try:
df = df.drop(columns = 'cond')
except:
Expand All @@ -47,6 +69,8 @@ def compute_fitness(df):
df = df.groupby(['genotype', 'concentration']).mean().reset_index()
df = compute_fitness(df)
df['drug'] = drug

df = rank_conc(df)
dfs.append(df)

df = pd.concat(dfs)
Expand All @@ -55,6 +79,44 @@ def compute_fitness(df):
df.to_csv(final_file, index=False)
return df

#Convenience function to load the seascapes data
def load_seascapes(file='../../../evodm_cancer/data/combined_seascapes_cleaned.csv'):
try:
df = pd.read_csv(file, dtype = {'genotype': str})
except:
clean_seascapes()
df = pd.read_csv(file, dtype = {'genotype': str})
return df

def define_dag_seascapes():

#load the data
df = load_seascapes()
df = df[df['drug'] != 'paclitaxel'] #get rid of the drug paclitaxel just for now because it is missing some key data
#setup reference variables
drugs = pd.unique(df['drug'])
concs = pd.unique(df['conc_rank'])

#iterate through drugs
ls_i = dict.fromkeys(drugs)
for i in drugs:
df_i = df[df['drug'] == i]
ls_j = {}
#iterate through concentrations -
#absolute concentrations were not standardized between drugs. do we need to just use the rank order of concentration?
for j in concs:
df_j = df_i[df_i['conc_rank'] == j]
#ls_g = dict.fromkeys(genotypes)
# for g in range(len(df_j)):
# ls_g[df_j['genotype'].iloc[g]] = df_j['fitness'].iloc[g]
fitness = df_j['fitness'].tolist()
ls_j[j] = fitness
ls_i[i] = ls_j

return ls_i

#iterate through drug, cAoncentration combos to generate appropriate dictionaries for landscapes

################################################Ignore below here unless you like looking at data structures #########################
def get_example_drug(N=5):

Expand Down
6 changes: 4 additions & 2 deletions evodm/evol_game.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from evodm.landscapes import Landscape
from evodm.data import load_seascapes
import numpy as np
from tensorflow.keras.utils import to_categorical
import math
Expand Down Expand Up @@ -151,8 +152,8 @@ def define_landscapes(self, drugs, normalize_drugs):
# [i.get_TM() for i in self.landscapes] #pre-compute TM
[i.get_TM_phenom(phenom = self.PHENOM) for i in self.landscapes]

return

return

def step(self):

Expand Down Expand Up @@ -537,6 +538,7 @@ def define_mira_landscapes(as_dict = False):
drugs.append([2.59, 2.572, 2.393, 2.832, 2.44, 2.808, 2.652, 0.611, 2.067, 2.446, 2.957, 2.633, 2.735, 2.863, 2.796, 3.203]) #FEP
return drugs


#Function to compute reward for a given simulation step - used by the environment class.
#Could have defined this in-line but made it a separate function in case we want to make it
#more sophisticated in the future.
Expand Down

0 comments on commit 30470f4

Please sign in to comment.