seascapes built

DavisWeaver · May 14, 2024 · 30470f4 · 30470f4
1 parent 27946d3
commit 30470f4
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 4 deletions.
diff --git a/evodm/data.py b/evodm/data.py
@@ -1,8 +1,9 @@
 import pandas as pd
+import os
 
 def clean_seascapes():
     drugs = ['paclitaxel', 'gefitinib', 'osimertinib', 'savolatinib']
-    file = '../../../evodm_cancer/data/combined_'
+
     def process_genotype(Y):
         genotypes = []
         for y in Y:
@@ -24,16 +25,37 @@ def process_genotype(Y):
         return genotypes
 
     def compute_fitness(df):
+         #Function to compute the fitness of each genotype at each concentration in a given drug relative to the wild type.
          ref = df[df['genotype'] == '0000']
          ref = ref.rename(columns = {'luminescence': 'ref_luminescence'}).drop(columns = ['genotype'])
          df = df.merge(ref, on='concentration')
          df['fitness'] = df['luminescence'] / df['ref_luminescence']
          return df
+
+    def rank_conc(df):
+        #Function to add the concentration rank to the dataframe to use instead of absolute concentration. 
+        concs = pd.unique(df['concentration'])
+        concs.sort() #make sure they are in ascending order. 
+        ranks = [i + 1 for i in range(len(concs))]
+        #create a dataframe with just the unique concentration values for a given drug
+        df_ranks = pd.DataFrame(concs)
+        df_ranks['rank'] = ranks
+        df_ranks.columns = ['concentration', 'conc_rank']
+        #merge the rank dataframe with the original dataframe
+        df = df.merge(df_ranks, on='concentration')
+        return df
 
     dfs = []
+
+
+    file = '../../../evodm_cancer/data/combined_'
+
     for drug in drugs:
         filed = file + drug + '.xlsx' 
-        df = pd.read_excel(filed)
+        try:
+            df = pd.read_excel(filed)
+        except:
+            df = pd.read_csv(filed.replace("../../../", "../../"))
         try: 
            df = df.drop(columns = 'cond')
         except: 
@@ -47,6 +69,8 @@ def compute_fitness(df):
         df = df.groupby(['genotype', 'concentration']).mean().reset_index()
         df = compute_fitness(df)
         df['drug'] = drug 
+
+        df = rank_conc(df)
         dfs.append(df) 
 
     df = pd.concat(dfs)
@@ -55,6 +79,44 @@ def compute_fitness(df):
     df.to_csv(final_file, index=False)
     return df
 
+#Convenience function to load the seascapes data
+def load_seascapes(file='../../../evodm_cancer/data/combined_seascapes_cleaned.csv'):
+    try:
+        df = pd.read_csv(file, dtype = {'genotype': str})
+    except:
+        clean_seascapes()
+        df = pd.read_csv(file, dtype = {'genotype': str})
+    return df
+
+def define_dag_seascapes():
+
+    #load the data
+    df = load_seascapes()
+    df = df[df['drug'] != 'paclitaxel'] #get rid of the drug paclitaxel just for now  because it is missing some key data
+    #setup reference variables 
+    drugs = pd.unique(df['drug'])
+    concs = pd.unique(df['conc_rank'])
+
+    #iterate through drugs 
+    ls_i = dict.fromkeys(drugs)
+    for i in drugs:
+        df_i = df[df['drug'] == i]
+        ls_j = {}
+        #iterate through concentrations -
+        #absolute concentrations were not standardized between drugs. do we need to just use the rank order of concentration?
+        for j in concs:
+            df_j = df_i[df_i['conc_rank'] == j]
+            #ls_g = dict.fromkeys(genotypes)
+           # for g in range(len(df_j)):
+            #    ls_g[df_j['genotype'].iloc[g]] = df_j['fitness'].iloc[g]
+            fitness = df_j['fitness'].tolist()
+            ls_j[j] = fitness
+        ls_i[i] = ls_j
+
+    return ls_i
+
+    #iterate through drug, cAoncentration combos to generate appropriate dictionaries for landscapes
+
 ################################################Ignore below here unless you like looking at data structures #########################
 def get_example_drug(N=5):
 

diff --git a/evodm/evol_game.py b/evodm/evol_game.py
@@ -1,4 +1,5 @@
 from evodm.landscapes import Landscape
+from evodm.data import load_seascapes
 import numpy as np
 from tensorflow.keras.utils import to_categorical
 import math
@@ -151,8 +152,8 @@ def define_landscapes(self, drugs, normalize_drugs):
         #    [i.get_TM() for i in self.landscapes] #pre-compute TM
         [i.get_TM_phenom(phenom = self.PHENOM) for i in self.landscapes]
 
-        return 
-
+        return
+ 
 
     def step(self):
 
@@ -537,6 +538,7 @@ def define_mira_landscapes(as_dict = False):
         drugs.append([2.59, 2.572, 2.393, 2.832, 2.44, 2.808, 2.652, 0.611, 2.067, 2.446, 2.957, 2.633, 2.735, 2.863, 2.796, 3.203])     #FEP
     return drugs
 
+
 #Function to compute reward for a given simulation step - used by the environment class. 
 #Could have defined this in-line but made it a separate function in case we want to make it 
 #more sophisticated in the future.