shopee_custom.py

# -*- coding: utf-8 -*-
"""Shopee_old_cnn.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/19zJ573_Ic4y6eW5s4nRcw1MPP0ZorMg5
"""

from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir("/content/drive/MyDrive/Shopee")

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
np.random.seed(2018)
from gensim.models import Word2Vec
import nltk
nltk.download('wordnet')
stemmer = SnowballStemmer('english')

from numpy import dot
from numpy.linalg import norm


# In[2]:


import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np


# In[3]:


train_df = pd.read_csv('train.csv')


# In[4]:


train_df


# In[4]:


train_image_list = train_df['image'].to_list()
train_image_list


# In[5]:


import cv2
train_images = []
base_path = 'train_images/'
for image in train_image_list:
    print(base_path+image)
    # train_images.append(cv2.imread(base_path+image))
    train_images.append(cv2.resize(cv2.imread(base_path+image), (32,32)))

# # Image resizing

# In[6]:


import cv2


# In[7]:


# res = []
# for img in train_images:
#     try:
#       res.append(cv2.resize(img, (32,32)))
#     except:
#       print("error during image resize", img)
#       continue


# In[8]:


res = train_images


# In[9]:


x_arr = np.asarray(res)


# In[10]:


x_arr.shape


# # Data normalization

# In[11]:


x_arr = x_arr/255


# In[12]:


y_train = train_df['label_group']
y_train


# In[13]:


y_train[:5]


# In[14]:


y_arr = y_train.to_numpy()
y_arr.shape


# In[15]:


y_arr


# In[16]:


x_arr


# In[17]:


y_arr


# In[18]:


my_dict = {}
ptr = -1
modified_labels = []
for label in y_arr:
    if label not in my_dict.keys():
        ptr = ptr+1
        my_dict[label] = ptr
        modified_labels.append(ptr)
    else:
        my_dict[label] = my_dict.get(label)
        modified_labels.append(my_dict.get(label))


# In[19]:


ptr


# In[20]:


y_arr.size


# In[21]:


len(modified_labels)


# In[22]:


uniqueKeys = set(my_dict.keys())
len(uniqueKeys)


# In[23]:


uniqueValues = set(my_dict.values())
len(uniqueValues)


# In[24]:


my_dict


# In[25]:


modified_labels


# In[26]:


len(modified_labels)


# In[27]:


modified_label_arr = np.asarray(modified_labels)
modified_label_arr


# In[28]:


categories = np.unique(y_arr)
categories


# In[29]:


categories.size


# In[30]:


modified_label_arr


# In[31]:


len(uniqueKeys)

# # Model training

# In[32]:

import torch
import torch.nn as nn

class ResidualBlock(torch.nn.Module):
    """ Residual Block Class"""

    def __init__(self, channels):
        """
        Initialize residual block with given configs
        :param channels:
        """
        super(ResidualBlock, self).__init__()

        self.block1 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=channels[0],
                            out_channels=channels[1],
                            kernel_size=3,
                            padding=1),
            torch.nn.AdaptiveAvgPool2d(2, 2),
            torch.nn.BatchNorm2d(channels[1]),
            torch.nn.ReLU(inplace=True))

        self.block2 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=channels[1],
                            out_channels=channels[2],
                            kernel_size= 3,
                            padding=1),
            torch.nn.AdaptiveAvgPool2d(2, 2, padding=1),
            torch.nn.BatchNorm2d(channels[2]),
            torch.nn.ReLU(inplace=True))

        self.block3 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=channels[2],
                            out_channels=channels[2],
                            kernel_size= 3,
                            padding=1),
            torch.nn.Conv2d(in_channels=channels[2],
                            out_channels=channels[2],
                            kernel_size=1,
                            padding=1),
            torch.nn.AdaptiveAvgPool2d(2, 2, padding=1),
            torch.nn.BatchNorm2d(channels[2]),
            torch.nn.ReLU(inplace=True))

        self.block4 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=channels[2],
                            out_channels=channels[3],
                            kernel_size= 3,
                            padding=1),
            torch.nn.Conv2d(in_channels=channels[3],
                            out_channels=channels[3],
                            kernel_size=1,
                            padding=1),
            torch.nn.AdaptiveAvgPool2d(2, 2, padding=1),
            torch.nn.BatchNorm2d(channels[3]),
            torch.nn.ReLU(inplace=True))

        self.block5 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=channels[3],
                            out_channels=channels[4],
                            kernel_size= 3,
                            padding=1),
            torch.nn.Conv2d(in_channels=channels[4],
                            out_channels=channels[4],
                            kernel_size= 1,
                            padding=1),
            torch.nn.AdaptiveAvgPool2d(2, 2, padding=1),
            torch.nn.BatchNorm2d(channels[4]),
            torch.nn.ReLU(inplace=True))

    def forward(self, x):
        """
        forward to create various blocks for neural network
        :param x:
        :return:
        """
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        return x

class MultiLabelClassifier(torch.nn.Module):

    def __init__(self):
        """
        Initialize with given params
        """
        super(MultiLabelClassifier, self).__init__()

        self.residual_block = ResidualBlock(channels=[3, 64, 128, 256, 512])
        self.classifier = nn.Sequential(
            nn.Linear(in_features=512*10*10,
                      out_features=1024),
            torch.nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Linear(in_features=1024,
                      out_features=1024),
            torch.nn.ReLU(inplace=True),
            nn.Dropout2d(),
            nn.Linear(in_features=1024,
                      out_features=11014),
        )

    def forward(self, x):
        """
        forward to create residual blocks and final model
        :param x:
        :return:
        """
        x = self.residual_block(x)

        x = x.view(-1, 512*10*10) # flatten
        out = self.classifier(x)
        return out


image_model = MultiLabelClassifier().to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(image_model.parameters(), lr=1e-4)
num_epochs = 20
test_frequency = 5
batch_size = 32

train_loader = torch.utils.data.DataLoader(dataset=train_df, batch_size=batch_size, shuffle=True, num_workers=1)


# In[33]:


# In[34]:


image_model.fit(x_arr, modified_label_arr, epochs=20)

# # Save model

# In[36]:

image_model.save('cnn_sivyati_model_25.h5')