diff --git a/.gitignore b/.gitignore index 24dae53..e3db644 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,5 @@ nosetests.xml .pydevproject test train +*.pkl +*.pckl diff --git a/.idea/workspace.xml b/.idea/workspace.xml index e6bac21..36e3c7f 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -15,7 +15,7 @@ - + @@ -35,8 +35,8 @@ - - + + @@ -44,11 +44,21 @@ + + + + + + + + + + - - + + @@ -75,6 +85,8 @@ thresho threshol threshold + 2 + 25 @@ -111,7 +123,6 @@ - @@ -192,11 +203,12 @@ + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + @@ -395,9 +520,6 @@ - - - @@ -441,9 +563,6 @@ - - - @@ -487,9 +606,6 @@ - - - @@ -541,46 +657,43 @@ - + - - - - - + + - + - - + + - + - - + + - + - + - - + + - - + + diff --git a/src.tgz b/src.tgz new file mode 100644 index 0000000..90253d4 Binary files /dev/null and b/src.tgz differ diff --git a/src/de/fu/violajones/AdaBoost.py b/src/de/fu/violajones/AdaBoost.py index 965310d..c0fb87c 100644 --- a/src/de/fu/violajones/AdaBoost.py +++ b/src/de/fu/violajones/AdaBoost.py @@ -3,6 +3,10 @@ from HaarLikeFeature import HaarLikeFeature from de.fu.violajones.HaarLikeFeature import FeatureTypes import sys +import pickle +import os +from pympler import summary, muppy + class AdaBoost(object): ''' @@ -16,53 +20,72 @@ def __init__(self, params): ''' def learn(positives, negatives, T): + if os.path.isfile('votes.pkl'): + images = [] + print('loading preprocessed votes..') + with open('votes.pkl', 'rb') as file: + votes = pickle.load(file) + f_votes = next(iter(votes.values())).tolist() + for img, _ in f_votes: + images.append(img) + images = np.array(images) + + else: + print('Generating data from scratch') + # construct initial weights + pos_weight = 1. / (2 * len(positives)) + neg_weight = 1. / (2 * len(negatives)) + for p in positives: + p.set_weight(pos_weight) + for n in negatives: + n.set_weight(neg_weight) - # construct initial weights - pos_weight = 1. / (2 * len(positives)) - neg_weight = 1. / (2 * len(negatives)) - for p in positives: - p.set_weight(pos_weight) - for n in negatives: - n.set_weight(neg_weight) - - # create column vector - images = np.hstack((positives, negatives)) + # create column vector + images = np.hstack((positives, negatives)) - print('Creating haar like features..') - features = [] - for f in FeatureTypes: - for width in range(f[0],25,f[0]): - for height in range(f[1],25,f[1]): - for x in range(25-width): - for y in range(25-height): - features.append(HaarLikeFeature(f, (x,y), width, height, 0, 1)) - print('..done.\n' + str(len(features)) + ' features created.\n') + print('Creating haar like features..') + features = [] + for f in FeatureTypes: + for width in range(f[0],20,f[0]): + for height in range(f[1],20,f[1]): + for x in range(20-width): + for y in range(20-height): + features.append(HaarLikeFeature(f, (x,y), width, height, 0, 1)) + print('..done.\n' + str(len(features)) + ' features created.\n') - print('Calculating scores for features..') - # dictionary of feature -> list of vote for each image: matrix[image, weight, vote]) - votes = dict() - i = 0 - for feature in features: - # calculate score for each image, also associate the image - feature_votes = np.array(list(map(lambda im: [im, feature.get_vote(im)], images))) - votes[feature] = feature_votes - i += 1 - if i % 1000 == 0: -# break #@todo: remove - print(str(i) + ' features of ' + str(len(features)) + ' done') + print('Calculating scores for features..') + # dictionary of feature -> list of vote for each image: matrix[image, weight, vote]) + votes = dict() + + i = 0 + + for feature in features: + # calculate score for each image, also associate the image + votes[feature] = np.array(list(map(lambda im: [im, feature.get_vote(im)], images))) + + i += 1 + if i % 1000 == 0: + print(str(i) + ' features of ' + str(len(features)) + ' done') + + # pickle our work from before + print('storing generated votes..') + with open('votes.pkl', 'wb') as file: + pickle.dump(votes, file) + print('..done.\n') - + + + # select classifiers - classifiers = [] used = [] - + n_features = len(votes) + print('Selecting classifiers..') -# sys.stdout.write('[' + ' '*20 + ']\r') -# sys.stdout.flush() + for i in range(T): - + print('picking feature # %d ..'%(i+1)) classification_errors = dict() # normalize weights @@ -71,6 +94,7 @@ def learn(positives, negatives, T): image.set_weight(image.weight * norm_factor) # compute information gains of the classifiers over the images + i_feature = 1 for feature, feature_votes in votes.items(): if feature in used: @@ -80,7 +104,12 @@ def learn(positives, negatives, T): # map error -> feature, use error as key to select feature with # smallest error later classification_errors[error] = feature - + if i_feature % 1000 == 0: + print('[ %d of %d ]\r'%(i_feature, n_features)) + + i_feature += 1 + + print("") # get best feature, i.e. with smallest error errors = list(classification_errors.keys()) best_error = errors[np.argmin(errors)] @@ -100,12 +129,9 @@ def learn(positives, negatives, T): else: im.set_weight(im.weight * np.sqrt(best_error/(1-best_error))) - if i % 100 == 0: - print(str(i) + ' features of ' + str(T) + ' selected') - -# sys.stdout.write('[' + '='*int((((i+1)*20)/T)) + ' '*int((20-(((i+1)*20)/T))) + ']\r') -# sys.stdout.flush() - print('..done.\n') + if (i+1) % 10 == 0: + with open('classifiers.pckl', 'wb') as file: + pickle.dump(classifiers, file) return classifiers \ No newline at end of file diff --git a/src/de/fu/violajones/IntegralImage.py b/src/de/fu/violajones/IntegralImage.py index fa6fec4..1379402 100644 --- a/src/de/fu/violajones/IntegralImage.py +++ b/src/de/fu/violajones/IntegralImage.py @@ -16,7 +16,7 @@ class IntegralImage: def __init__(self, imageSrc, label): - self.original = np.array(Image.open(imageSrc).resize((25,25))) + self.original = np.array(Image.open(imageSrc)) self.sum = 0 self.label = label self.calculate_integral() diff --git a/src/de/fu/violajones/ViolaJones.py b/src/de/fu/violajones/ViolaJones.py index d9f9626..65520e8 100644 --- a/src/de/fu/violajones/ViolaJones.py +++ b/src/de/fu/violajones/ViolaJones.py @@ -20,11 +20,12 @@ def classify(classifiers, image): # TODO: select optimal threshold for each feature # TODO: attentional cascading - + faces = [] + non_faces = [] print('Loading faces..') - faces = load_images('train/face', 1) +# faces = load_images('train/face', 1) print('..done. ' + str(len(faces)) + ' faces loaded.\n\nLoading non faces..') - non_faces = load_images('train/non-face', -1) +# non_faces = load_images('train/non-face', -1) print('..done. ' + str(len(non_faces)) + ' non faces loaded.\n') T = 3000 @@ -36,9 +37,6 @@ def classify(classifiers, image): non_faces = load_images('test/non-face', -1) print('..done. ' + str(len(non_faces)) + ' non faces loaded.\n') - with open('classifiers_' + str(T) + '_' + hex(random.getrandbits(16)) + '.pckl', 'wb') as file: - pickle.dump(classifiers, file) - print('Validating selected classifiers..') correct_faces = 0 correct_non_faces = 0