diff --git a/models/extract_small_tiles.py b/models/extract_small_tiles.py new file mode 100644 index 0000000..0979721 --- /dev/null +++ b/models/extract_small_tiles.py @@ -0,0 +1,73 @@ +import os +import cv2 +import skimage.io +from tqdm.notebook import tqdm +import zipfile +import numpy as np + + +## +## extract tiles from large image and store in zip +### + + +TRAIN = './kaggle/train_images/' +MASKS = './kaggle//train_label_masks/' +OUT_TRAIN = 'train.zip' +OUT_MASKS = 'masks.zip' +sz = 128 +N = 16 + + +def tile(img, mask): + result = [] + shape = img.shape + pad0,pad1 = (sz - shape[0]%sz)%sz, (sz - shape[1]%sz)%sz + img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]], + constant_values=255) + mask = np.pad(mask,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]], + constant_values=0) + img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3) + img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3) + mask = mask.reshape(mask.shape[0]//sz,sz,mask.shape[1]//sz,sz,3) + mask = mask.transpose(0,2,1,3,4).reshape(-1,sz,sz,3) + if len(img) < N: + mask = np.pad(mask,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=0) + img = np.pad(img,[[0,N-len(img)],[0,0],[0,0],[0,0]],constant_values=255) + idxs = np.argsort(img.reshape(img.shape[0],-1).sum(-1))[:N] + img = img[idxs] + mask = mask[idxs] + for i in range(len(img)): + result.append({'img':img[i], 'mask':mask[i], 'idx':i}) + return result + + + +x_tot,x2_tot = [],[] +names = [name[:-10] for name in os.listdir(MASKS)] +with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out,\ + zipfile.ZipFile(OUT_MASKS, 'w') as mask_out: + for name in tqdm(names): + print (name) + if len(skimage.io.MultiImage(os.path.join(TRAIN,name+'.tiff'))) and len(skimage.io.MultiImage(os.path.join(MASKS,name+'_mask.tiff'))): + img = skimage.io.MultiImage(os.path.join(TRAIN,name+'.tiff'))[-1] + mask = skimage.io.MultiImage(os.path.join(MASKS,name+'_mask.tiff'))[-1] + + tiles = tile(img,mask) + for t in tiles: + img,mask,idx = t['img'],t['mask'],t['idx'] + x_tot.append((img/255.0).reshape(-1,3).mean(0)) + x2_tot.append(((img/255.0)**2).reshape(-1,3).mean(0)) + #if read with PIL RGB turns into BGR + img = cv2.imencode('.png',cv2.cvtColor(img, cv2.COLOR_RGB2BGR))[1] + img_out.writestr(f'{name}_{idx}.png', img) + mask = cv2.imencode('.png',mask[:,:,0])[1] + mask_out.writestr(f'{name}_{idx}.png', mask) + + + + +#image stats +img_avr = np.array(x_tot).mean(0) +img_std = np.sqrt(np.array(x2_tot).mean(0) - img_avr**2) +print('mean:',img_avr, ', std:', np.sqrt(img_std)) diff --git a/models/model_resnet50.py b/models/model_resnet50.py new file mode 100644 index 0000000..f4cf749 --- /dev/null +++ b/models/model_resnet50.py @@ -0,0 +1,342 @@ +#BASIC +import numpy as np +import pandas as pd +import os + +# DATA visualization +import matplotlib.pyplot as plt +import seaborn as sns +import PIL +from IPython.display import Image, display +from plotly import graph_objs as go +import plotly.express as px +import plotly.figure_factory as ff + +import openslide + + +from PIL import Image + + + +import keras +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras import backend as K +import keras.utils +from keras import utils as np_utils + +from sklearn.model_selection import train_test_split + + + +from tensorflow.python.client import device_lib + + + + +from keras.applications.resnet50 import ResNet50 +from keras.preprocessing import image +from keras.applications.resnet50 import preprocess_input, decode_predictions + + +import tensorflow.keras.optimizers + +import math + +#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +print(device_lib.list_local_devices()) + +show= 0 + +import sys, getopt + + + +BASE_FOLDER = "/datadrive/" +##!ls {BASE_FOLDER} + +mask_dir = f'{BASE_FOLDER}/train_label_masks' + + +extracted_mask_dir = f'{BASE_FOLDER}/extracted_masks' +extracted_train_dir = f'{BASE_FOLDER}/extracted_train' + + +train = pd.read_csv(BASE_FOLDER+"train.csv") +test = pd.read_csv(BASE_FOLDER+"test.csv") +sub = pd.read_csv(BASE_FOLDER+"sample_submission.csv") + +print(train.head()) + + +print("unique ids : ", len(train.image_id.unique())) +print("unique data provider : ", len(train.data_provider.unique())) +print("unique isup_grade(target) : ", len(train.isup_grade.unique())) +print("unique gleason_score : ", len(train.gleason_score.unique())) + + +train['gleason_score'].unique() + + + +print(train[train['gleason_score']=='0+0']['isup_grade'].unique()) +print(train[train['gleason_score']=='negative']['isup_grade'].unique()) + + + +print(len(train[train['gleason_score']=='0+0']['isup_grade'])) +print(len(train[train['gleason_score']=='negative']['isup_grade'])) + +print(train[(train['gleason_score']=='3+4') | (train['gleason_score']=='4+3')]['isup_grade'].unique()) +print(train[(train['gleason_score']=='3+5') | (train['gleason_score']=='5+3')]['isup_grade'].unique()) +print(train[(train['gleason_score']=='5+4') | (train['gleason_score']=='4+5')]['isup_grade'].unique()) + + + +print(train[train['gleason_score']=='3+4']['isup_grade'].unique()) +print(train[train['gleason_score']=='4+3']['isup_grade'].unique()) + + + +print(train[(train['isup_grade'] == 2) & (train['gleason_score'] == '4+3')]) + + + +train.drop([7273],inplace=True) + +train['gleason_score'] = train['gleason_score'].apply(lambda x: "0+0" if x=="negative" else x) + + + + + +# dropping passed columns +train.drop(["data_provider", "isup_grade"], axis = 1, inplace = True) + +print("before") +print(train.head()) + + +temp = train.groupby('gleason_score').count()['image_id'].reset_index().sort_values(by='image_id',ascending=False) +temp.style.background_gradient(cmap='Reds') + +print(temp) + + +train['gleason_score'] = train['gleason_score'].apply(lambda x: "0" if x=="0+0" else ( \ +"1" if x=="3+3" else ( "2" if x=="3+4" else ("3" if x=="4+3" else ("4" if x=="4+4" else (\ +"5" if x=="4+5" else ("6") if x=="5+4" else ("7" if x=="5+5" else ("8" if x=="3+5" else ("9" if x=="5+3" else "-1"))) +))))) ) + +print("after") + +print(train.head()) + +temp = train.groupby('gleason_score').count()['image_id'].reset_index().sort_values(by='image_id',ascending=False) +temp.style.background_gradient(cmap='Reds') + +print(temp) + + +print("rows+" ) +print(train.shape) + + +data_array = [] +train_value = [] + + +dimension = 16 +nb_lines = int(math.sqrt(dimension)) + +lines = nb_lines*128 + + + + +start=0 +def main(argv): + outputfile = '' + try: + opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="]) + except getopt.GetoptError: + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + sys.exit() + elif opt in ("-i", "--ifile"): + start = arg + print(start) + elif opt in ("-o", "--ofile"): + outputfile = arg + + +BATCH_IMAGES = 700 +def compute () : + print("lines:"+str(lines)) + j=0 + for index, row in train.iterrows(): + images_id = row['image_id'] + score = row['gleason_score'] + #print(images_id+ " "+score) + i=0 + + if j start+ BATCH_IMAGES: + break + j= j+1 + + slice_array = [] + for k in range (0,dimension): + try: + img = Image.open(f'{extracted_train_dir}/{images_id}_{k}.png').convert('RGBA') + arr = np.array(img) + #print(arr.shape) + r, g, b, a = np.rollaxis(arr, axis=-1) + #R =arr.take(0,axis=2) + #G = arr.take(1 ,axis=2) + #B = arr.tak(2,axis=2)i + x = np.dstack([r,g,b]) + R_ = arr # np.array(R) + print('rgb,'+ str(x.shape)) + if k == 0: + slice_array = x #R_ + else: + #print ("test:" + str(k)) + #print(R) + slice_array= np.concatenate((slice_array, x ),axis=1) + print(slice_array.shape) + + #if k ==0: + # train_value.append(score) + + if k == dimension-1: + #print ("test") + data_array.append(slice_array.reshape(lines,lines,3)) + train_value.append(score) + img.close() + + + + except: + pass + #if j ==700: + # break + + + + mat_data = np.array(data_array) + print('mat_data:') + print(mat_data.shape) + print(mat_data.shape[0]) + + + mat_train = np.array(train_value) + print('mat_train:') + print(mat_train.shape) + + + + + + X = mat_data.reshape(mat_data.shape[0], lines, lines, 3) + #x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) + input_shape = (lines, lines, 3) + # convert class vectors to binary class matrices + Y = keras.utils.to_categorical(mat_train, num_classes=10) + #y_test = keras.utils.to_categorical(y_test, num_classes) + X = X.astype('float32') + #x_test = x_test.astype('float32') + X /= 255 + #x_test /= 255 + print('x_train shape:', X.shape) + print(X.shape[0], 'train samples') + #print(x_test.shape[0], 'test samples') + + + + + X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2) + + batch_size = 1 + num_classes = 10 + epochs = 30 + print("model") + + import keras + import tensorflow as tf + from keras_applications.resnext import ResNeXt50 + from keras_applications.resnext import preprocess_input as resnext50_preprocess_input + from keras.callbacks import ModelCheckpoint + #from keras.applications.resnext import ResNeXt50 + + + import os.path + + filepath = "model.h5" + file_exists = os.path.exists(file_path) + if not (file_exists): + #resnet50_imagenet_model = ResNeXt50(include_top=False, weights='imagenet' , input_shape=(lines, lines, 3)) + resnet50_imagenet_model = ResNeXt50(include_top=False, weights='imagenet' , input_shape=(lines, lines, 3),backend=keras.backend,layers = keras.layers, models = keras.models, utils = keras.utils ) + + #Flatten output layer of Resnet + flattened = keras.layers.Flatten()(resnet50_imagenet_model.output) + + #Fully connected layer 1 + fc1 = keras.layers.Dense(128, activation='relu', name="AddedDense1")(flattened) + + #Fully connected layer, output layer + fc2 = keras.layers.Dense(10, activation='softmax', name="AddedDense2")(fc1) + + model = keras.models.Model(inputs=resnet50_imagenet_model.input,outputs=fc2) + + + model.summary() + + model.compile(loss=keras.losses.categorical_crossentropy,optimizer=tf.keras.optimizers.Adadelta(),metrics=['accuracy']) + + + else: + model = load_model(filepath) + model.summary() + + checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') + callbacks_list = [checkpoint] + + # fit the model + #model.fit(x_train, y_train, epochs=5, batch_size=50, callbacks=callbacks_list) + + # load the model + #new_model = load_model(filepath) + #assert_allclose(model.predict(x_train), new_model.predict(x_train), 1e-5) + + # fit the model + #checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') + #callbacks_list = [checkpoint] + #new_model.fit(x_train, y_train, epochs=5, batch_size=50, callbacks=callbacks_list) + + + + hist = model.fit(X_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(X_test, y_test),callbacks=callbacks_list) + print("The model has successfully trained") + model.save('mnist.h5') + print("Saving the model as mnist.h5") + + print("end") + + + score = model.evaluate(X_test, y_test, verbose=0) + print('Test loss:', score[0]) + print('Test accuracy:', score[1]) + + + + + +if __name__ == "__main__": + main(sys.argv[1:]) + compute() diff --git a/models/saved_model.pb b/models/saved_model.pb new file mode 100644 index 0000000..d537387 Binary files /dev/null and b/models/saved_model.pb differ diff --git a/models/variables/variables.data-00000-of-00001 b/models/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000..5cfbedd Binary files /dev/null and b/models/variables/variables.data-00000-of-00001 differ diff --git a/models/variables/variables.index b/models/variables/variables.index new file mode 100644 index 0000000..3b64713 Binary files /dev/null and b/models/variables/variables.index differ diff --git a/service/include/BasicLoader.hpp b/service/include/BasicLoader.hpp new file mode 100644 index 0000000..d6bfdd2 --- /dev/null +++ b/service/include/BasicLoader.hpp @@ -0,0 +1,32 @@ +#include "Model.hpp" +#include "Loader.hpp" + +namespace ML { + +template class BasicLoader : public template Loader { + +public: + +BasicLoader(std::string resource_name) _resource_name(resource_name) {}; + private: + + T * doLoad() override + { + T _m = new (_m) Model(_resource_name); + + return _m; + } + + void doUnload() override + { + + } + + void doEstimate() override + {} + + T * _m; + +}; + +} \ No newline at end of file diff --git a/service/include/BasicModelManger.hpp b/service/include/BasicModelManger.hpp new file mode 100644 index 0000000..07d3a1f --- /dev/null +++ b/service/include/BasicModelManger.hpp @@ -0,0 +1,9 @@ +#pragma once + +class BasicModelManager : public ModelManager { + public: + using PreHook = std::function; + + ~BasicModelManager(); + +} \ No newline at end of file diff --git a/service/include/Loader.hpp b/service/include/Loader.hpp new file mode 100644 index 0000000..b7262f5 --- /dev/null +++ b/service/include/Loader.hpp @@ -0,0 +1,46 @@ +#praga once + + +namespace ML { +enum class Satus +{ + LOAD_STARTED, + LOAD_ONGOING, + LOAD_ENDED +} + +class Loader{ + public: + + void Load() + { + doLoad(); + }; + + + void Unload() + { + doUnload(); + } + + + void EstimateRessource() + { + doEstimate(); + } + + + private: + + virtual void doLoad() =0; + virtual void doUnload() =0; + virtual void doEstimate() =0; + + std::string _resource_name; + + +}; + +using LoaderSource = Source>; + +} \ No newline at end of file diff --git a/service/include/ModelManager.hpp b/service/include/ModelManager.hpp new file mode 100644 index 0000000..bc883be --- /dev/null +++ b/service/include/ModelManager.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include + + +template class ModelManager { + public: + + virtual ~ModelManager() = default; + virtual std::vector AvailableModels() const = 0; + + private: + + struct ModelId{ + uint32_t _id; + std::string _name + } + std::map list_models; + +}; \ No newline at end of file