captcha_main.py

# -*- coding: UTF-8 -*-
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
import my_dataset
import captcha_setting
from captcha_cnn_model import CNN
import logging
import one_hot_encoding as ohe

# Hyper Parameters
num_epochs = 2000
batch_size = 256
patience = 20

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(message)s',
                    datefmt='%m-%d %H:%M:%S',
                    filename='./result/log_info.log',
                    filemode='a')


def get_char_from_output_and_acc_count(val_out, val_lab, test_flag=False):
    """
    count the correct data in val_out according to val_lab
    :param val_out: the output of cnn model. shape(n_batch, d_encode) e.g.(512,144)
    :param val_lab: the label of val_data. shape(n_batch, d_encode) e.g.(512,144)
    :return:acc_number (int)
    """
    acc_number = 0
    bias = range(0, captcha_setting.ALL_CHAR_SET_LEN*captcha_setting.MAX_CAPTCHA + 1, captcha_setting.ALL_CHAR_SET_LEN)
    for i in range(val_out.shape[0]):
        predict_label = val_out[i].cpu()
        # captcha_setting.MAX_CAPTCHA == 4, so here : c0 c1 c2 c3
        c0 = captcha_setting.ALL_CHAR_SET[np.argmax(predict_label[bias[0]:bias[1]].data.numpy())]
        c1 = captcha_setting.ALL_CHAR_SET[np.argmax(predict_label[bias[1]:bias[2]].data.numpy())]
        c2 = captcha_setting.ALL_CHAR_SET[np.argmax(predict_label[bias[2]:bias[3]].data.numpy())]
        c3 = captcha_setting.ALL_CHAR_SET[np.argmax(predict_label[bias[3]:bias[4]].data.numpy())]
        predict_char = '%s%s%s%s' % (c0, c1, c2, c3)
        true_char = ohe.decode(val_lab[i].data.numpy())
        if predict_char == true_char:
            acc_number += 1
        # show result
        if test_flag:
            if predict_char == true_char:
                print("true_label:{}\t\tpredict:{}\t\tTrue!".format(true_char, predict_char))
            else:
                print("true_label:{}\t\tpredict:{}".format(true_char, predict_char))
    return acc_number


def acc_count(val_out, val_lab):
    """
    count the correct data in val_out according to val_lab
    :param val_out: the output of cnn model. shape(n_batch, d_encode) e.g.(512,144)
    :param val_lab: the label of val_data. shape(n_batch, d_encode) e.g.(512,144)
    :return:acc_number (int)
    """
    char_set_len = captcha_setting.ALL_CHAR_SET_LEN     # 36
    max_captcha = captcha_setting.MAX_CAPTCHA           # 4
    if val_out.shape != val_lab.shape:
        print("shape is different!")
        exit()
    # options on val_lab
    val_lab = val_lab.data.numpy()                     # one-hot-encoding
    val_index = np.where(val_lab == 1)[1]
    val_lab = val_index.reshape(-1, max_captcha)       # every line present one label,

    # options on val_out
    val_out = val_out.cpu().data.numpy()
    index_list = list()
    for i in range(max_captcha):
        bias = i * char_set_len
        val_char = val_out[:, bias:bias+char_set_len]                # get encoding of one char
        max_num = np.max(val_char, axis=1)                           # get max num of every line
        temp = list()
        for j, num in enumerate(max_num):
            temp.append(list(np.where(val_char[j] == num)[0])[0] + bias)    # the index of the char
        index_list.append(temp)
    val_out = np.array(index_list, dtype=np.int).transpose()

    # compare val_out and val_lab, acc_number++ when it is equal
    acc_number = 0
    for i in range(val_out.shape[0]):
        if (val_out[i] == val_lab[i]).all():
            acc_number += 1
    return acc_number


def model_evaluate(cnn, val_data, test_flag=False):
    """
    evaluate the model of cnn by using val_data
    :param cnn:         captcha broking model, generated by captcha_cnn_model.py
    :param val_data:    validation data, load by my_dataset.get_data() method
    :return:accuracy (float)
    """
    acc_num = 0
    total_num = 0
    with torch.no_grad():
        cnn.eval()
        for i, (val_img, val_lab) in enumerate(val_data):
            val_img = Variable(val_img).cuda()          # use gpu
            val_out = cnn(val_img)
            total_num += val_lab.shape[0]
            # acc_num += acc_count(val_out, val_lab)
            acc_num += get_char_from_output_and_acc_count(val_out, val_lab, test_flag)
    accuracy = (acc_num*1.0) / total_num
    return round(accuracy, 5)


def main():
    cnn = CNN()
    cnn.cuda()          # use gpu
    cnn.train()
    print('init net')
    criterion = nn.MultiLabelSoftMarginLoss()
    # optim is important
    optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)    # weight_decay=1e-08

    # get data
    train_data = my_dataset.get_data(captcha_setting.TRAIN_DATASET_PATH, batch=batch_size, shuffle=True)
    val_data = my_dataset.get_data(captcha_setting.VAL_DATASET_PATH, batch=batch_size, shuffle=False)
    test_data = my_dataset.get_data(captcha_setting.TEST_DATASET_PATH, batch=batch_size, shuffle=False)

    # logging information
    info_dic = {'-train_loss_best': 123.456, '-val_acc_best': -1, '-train_epoch': 0, '-val_epoch': 0}
    info_log = "-num_epochs:{}\t-batch:{}\t-lr:{}".format(num_epochs, batch_size, learning_rate)
    logging.info(info_log)
    print(info_log)

    # train and validation epoch
    train_patience = 0
    model_file = "./result/-lr:{}.pkl".format(learning_rate)
    for epoch in range(1, num_epochs+1):
        # train
        cnn.train()
        optimizer.zero_grad()
        train_loss_item = list()
        for i, (images, labels) in enumerate(train_data):
            images = Variable(images).cuda()            # use gpu
            labels = Variable(labels.float()).cuda()    # use gpu
            predict_labels = cnn(images)
            train_loss = criterion(predict_labels, labels)
            train_loss_item.append(train_loss.item())
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        train_loss = np.mean(np.array(train_loss_item, dtype=float))
        train_loss = round(train_loss, 5)

        # validation every epoch
        val_acc = model_evaluate(cnn, val_data)

        # early stop
        if val_acc > info_dic['-val_acc_best']:
            info_dic['-val_acc_best'] = val_acc
            info_dic['-val_epoch'] = epoch
            train_patience = 0
            # save model into .pkl file
            torch.save(cnn.state_dict(), model_file)
        else:
            train_patience += 1
            if train_patience == patience:
                info_log = "Early stop! -real_epoch:{}".format(epoch)
                print(info_log)
                logging.info(info_log)
                break

        # log information and show
        if train_loss < info_dic['-train_loss_best']:
            info_dic['-train_loss_best'] = train_loss
            info_dic['-train_epoch'] = epoch
        info_log = "epoch:{}\t-train_loss:{}\t-val_acc:{}\t-train_patience:{}" \
                   "\t-train_loss_best:{}\t-val_acc_best:{}".\
            format(epoch, train_loss, val_acc, train_patience,
                   info_dic['-train_loss_best'], info_dic['-val_acc_best'])
        logging.info(info_log)
        print(info_log)

    # test after training
    cnn = CNN()
    cnn.cuda()      # use gpu
    cnn.load_state_dict(torch.load(model_file))
    train_acc = model_evaluate(cnn, train_data)
    test_acc = model_evaluate(cnn, test_data)
    info_log = "-info_dic:{}\t-train_acc:{}\t-test_acc:{}\n\n".format(info_dic, train_acc, test_acc)
    logging.info(info_log)
    print(info_log)

    # save model into .pkl file
    # file_name = "./result/-lr:{}.pkl".format(learning_rate)
    # torch.save(cnn.state_dict(), file_name)
    # print("save model in {}".format(file_name))


if __name__ == '__main__':
    # update learning rate and restart
    # rate = range(1, 9, 1)     # rate[0] == 0
    # for i in rate:
    #     learning_rate = 0.001 * i if i != 0 else 0.001
    #     main()

    # lr_list = [0.0001, 0.0003, 0.0006, 0.0009, 0.01, 0.03, 0.06, 0.09]
    lr_list = [0.0009, 0.001, 0.0014, 0.0017, 0.002]
    for i in lr_list:
        learning_rate = i
        main()