jakewilliami · jakewilliami · Nov 4, 2021 · Nov 1, 2021 · Nov 1, 2021 · Nov 1, 2021
diff --git a/docs/src/benchmarking.md b/docs/src/benchmarking.md
@@ -21,3 +21,10 @@ Platform Info:
   LIBM: libopenlibm
   LLVM: libLLVM-9.0.1 (ORCJIT, skylake)
 ```
+
+## 1.6 Update
+
+A few months after the release of Julia 1.6, I did some performance considerations (there are already quite a few nice features that come with 1.6).  Now these are the benchmarking results (see [`benchmark/basic.jl`](https://github.com/jakewilliami/FaceDetection.jl/blob/master/benchmark/basic.jl))
+Language of Implementation | Commit | Run Time in Seconds | Number of Allocations | Memory Usage
+--- | --- | --- | --- | ---
+[Julia](https://github.com/jakewilliami/FaceDetection.jl/) | [???]() | 8.165 | 249021919 | 5.01 GiB
diff --git a/examples/basic_ffhq_things.jl b/examples/basic_ffhq_things.jl
@@ -0,0 +1,124 @@
+# Adapted from https://github.com/Simon-Hohberg/Viola-Jones/
+
+# Faces dataset: [FFHQ](https://github.com/NVlabs/ffhq-dataset/) 70_001 images of faces
+# Non-faces dataset: [THINGS](https://osf.io/3fu6z/); 26_107 object images
+
+@info "Loading required libraries (it will take a moment to precompile if it is your first time doing this)..."
+
+using FaceDetection
+using Printf: @printf
+using Images: imresize
+
+@info("...done")
+
+"Return a random subset of the contents of directory `path` of size `n`."
+function rand_subset_ls(path::String, n::Int)
+	dir_contents = readdir(path, join=true, sort=false)
+	filter!(f -> !occursin(r".*\.DS_Store", f), dir_contents)
+	@assert(length(dir_contents) >= n, "Not enough files in given directory to select `n` random.")
+
+	subset_ls = Vector{String}(undef, n)
+	for i in 1:n
+		j = rand(1:length(dir_contents))
+		subset_ls[i] = dir_contents[j]
+		deleteat!(dir_contents, j)
+	end
+
+    return subset_ls
+end
+
+function main(
+	num_pos::Int,
+	num_neg::Int;
+	smart_choose_feats::Bool=false,
+	scale::Bool=true,
+	scale_to::Tuple=(128, 128)
+)
+	data_path = joinpath(dirname(@__DIR__), "data")
+
+	pos_training_path = joinpath(data_path, "ffhq", "thumbnails128x128")
+	neg_training_path = joinpath(data_path, "things", "object_images")
+
+	all_pos_images = rand_subset_ls(pos_training_path, 2num_pos)
+	all_neg_images = rand_subset_ls(neg_training_path, 2num_neg)
+
+	pos_training_images = all_pos_images[1:num_pos]
+	neg_training_images = all_neg_images[1:num_neg]
+
+	num_classifiers = 10
+	local min_size_img::Tuple{Int, Int}
+
+	if smart_choose_feats
+	    # For performance reasons restricting feature size
+	    @info("Selecting best feature width and height...")
+
+	    max_feature_width, max_feature_height, min_feature_height, min_feature_width, min_size_img =
+			determine_feature_size(vcat(pos_training_images, neg_training_images); scale = scale, scale_to = scale_to, show_progress = true)
+
+	    @info("...done.  Maximum feature width selected is $max_feature_width pixels; minimum feature width is $min_feature_width; maximum feature height is $max_feature_height pixels; minimum feature height is $min_feature_height.\n")
+	else
+		max_feature_width, max_feature_height, min_feature_height, min_feature_width = (67, 67, 65, 65)
+		min_size_img = (128, 128)
+	end
+
+    # classifiers are haar like features
+    classifiers = learn(pos_training_images, neg_training_images, num_classifiers, min_feature_height, max_feature_height, min_feature_width, max_feature_width; scale = scale, scale_to = scale_to)
+
+    @info("Testing selected classifiers...")
+	sleep(3) # sleep here because sometimes the threads from `learn` are still catching up and then `ensemble_vote_all` errors
+
+	pos_testing_images = all_pos_images[(num_pos + 1):2num_pos]
+	neg_testing_images = all_neg_images[(num_neg + 1):2num_neg]
+	num_faces = length(pos_testing_images)
+	num_non_faces = length(neg_testing_images)
+
+	correct_faces = sum(ensemble_vote_all(pos_testing_images, classifiers, scale=scale, scale_to=scale_to))
+	correct_non_faces = num_non_faces - sum(ensemble_vote_all(neg_testing_images, classifiers, scale=scale, scale_to=scale_to))
+	correct_faces_percent = (correct_faces / num_faces) * 100
+	correct_non_faces_percent = (correct_non_faces / num_non_faces) * 100
+
+    faces_frac = string(correct_faces, "/", num_faces)
+    faces_percent = string("(", correct_faces_percent, "% of faces were recognised as faces)")
+    non_faces_frac = string(correct_non_faces, "/", num_non_faces)
+    non_faces_percent = string("(", correct_non_faces_percent, "% of non-faces were identified as non-faces)")
+
+    @info("...done.\n")
+    @info("Result:\n")
+
+    @printf("%10.9s %10.15s %15s\n", "Faces:", faces_frac, faces_percent)
+    @printf("%10.9s %10.15s %15s\n\n", "Non-faces:", non_faces_frac, non_faces_percent)
+end
+
+@time main(2000, 2000; smart_choose_feats = false, scale = true, scale_to = (128, 128))
+
+#=
+[ Info: Loading required libraries (it will take a moment to precompile if it is your first time doing this)...
+[ Info: ...done
+[ Info: Creating Haar-like features...
+[ Info: ...finished processing; 169880 features created.
+[ Info: Loading images (1000 positive and 1000 negative images) and calculating their scores...
+Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:02:13
+[ Info: Selecting classifiers...
+Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:00:17
+[ Info: Testing selected classifiers...
+[ Info: ...done.
+[ Info: Result:
+    Faces:   757/1000 (75.7% of faces were recognised as faces)
+ Non-faces   749/1000 (74.9% of non-faces were identified as non-faces)
+=#
+
+#=
+[ Info: Loading required libraries (it will take a moment to precompile if it is your first time doing this)...
+[ Info: ...done
+[ Info: Creating Haar-like features...
+[ Info: ...finished processing; 169880 features created.
+[ Info: Loading images (2000 positive and 2000 negative images) and calculating their scores...
+Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:07:06
+[ Info: Selecting classifiers...
+Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:00:52
+[ Info: Testing selected classifiers...
+[ Info: ...done.
+[ Info: Result:
+    Faces:  1547/2000 (77.35% of faces were recognised as faces)
+ Non-faces  1400/2000 (70.0% of non-faces were identified as non-faces)
+=#
diff --git a/src/AdaBoost.jl b/src/AdaBoost.jl
@@ -1,17 +1,13 @@
 # TODO: select optimal threshold for each feature
 # TODO: attentional cascading
 
-using Base.Threads: @threads
-using Base.Iterators: partition
-using ProgressMeter: @showprogress, Progress, next!
-
 function β(i::T)::T where T
     return @fastmath(T(0.5) * log((one(i) - i) / i))
 end
 
 function get_feature_votes(
-    positive_path::AbstractString,
-    negative_path::AbstractString,
+    positive_files::Vector{String},
+    negative_files::Vector{String},
     num_classifiers::Integer=-one(Int32),
     min_feature_width::Integer=one(Int32),
     max_feature_width::Integer=-one(Int32),
@@ -39,8 +35,6 @@ function get_feature_votes(
     _1 = _Int(1)
 
     # get number of positive and negative image
-    positive_files = filtered_ls(positive_path)
-    negative_files = filtered_ls(negative_path)
     num_pos = length(positive_files)
     num_neg = length(negative_files)
     num_imgs = num_pos + num_neg
@@ -80,19 +74,40 @@ function get_feature_votes(
 
     return votes, features
 end
+function get_feature_votes(
+    positive_path::String,
+    negative_path::String,
+    num_classifiers::Integer=-one(Int32),
+    min_feature_width::Integer=one(Int32),
+    max_feature_width::Integer=-one(Int32),
+    min_feature_height::Integer=one(Int32),
+    max_feature_height::Integer=-one(Int32);
+    scale::Bool = false,
+    scale_to::Tuple = (Int32(200), Int32(200)),
+    show_progress::Bool = true
+)
+    positive_files = filtered_ls(positive_path)
+    negative_files = filtered_ls(negative_path)
+
+    return get_feature_votes(
+        positive_files, negative_files,
+        num_classifiers,
+        min_feature_width, max_feature_width,
+        min_feature_height, max_feature_height;
+        scale = scale, scale_to = scale_to,
+        show_progress = show_progress
+    )
+end
 
 function learn(
-    positive_path::AbstractString,
-    negative_path::AbstractString,
+    num_pos::Int, num_neg::Int,
     features::Array{HaarLikeObject, 1},
     votes::Matrix{Int8},
     num_classifiers::Integer=-one(Int32);
     show_progress::Bool = true
-    )
+)
 
     # get number of positive and negative images (and create a global variable of the total number of images——global for the @everywhere scope)
-    num_pos = length(filtered_ls(positive_path))
-    num_neg = length(filtered_ls(negative_path))
     num_imgs = num_pos + num_neg
 
     # Initialise weights $w_{1,i} = \frac{1}{2m}, \frac{1}{2l}$, for $y_i=0,1$ for negative and positive examples respectively
@@ -107,10 +122,10 @@ function learn(
     _neg1 = -_1
     labels = Vector{Int8}(undef, num_imgs)
     for i in 1:num_pos
-        labels[i] = _1
+        @inbounds labels[i] = _1
     end
     for j in (num_pos + 1):num_imgs
-        labels[j] = _neg1
+        @inbounds labels[j] = _neg1
     end
 
     # get number of features
@@ -120,7 +135,6 @@ function learn(
 
     # select classifiers
     @info("Selecting classifiers...")
-    # classifiers = HaarLikeObject[]
     classifiers = Vector{HaarLikeObject}(undef, num_classifiers)
     classification_errors = Vector{Float64}(undef, num_features)
 
@@ -132,20 +146,18 @@ function learn(
         weights .*= inv(sum(weights))
 
         # For each feature j, train a classifier $h_j$ which is restricted to using a single feature.  The error is evaluated with respect to $w_j,\varepsilon_j = \sum_i w_i\left|h_j\left(x_i\right)-y_i\right|$
-        @threads for j in 1:length(feature_indices)
+        @inbounds @threads for j in 1:length(feature_indices)
             feature_idx = feature_indices[j]
             classification_errors[j] = sum(weights[img_idx] for img_idx in 1:num_imgs if labels[img_idx] !== votes[img_idx, feature_idx])
         end
 
         # choose the classifier $h_t$ with the lowest error $\varepsilon_t$
         best_error, min_error_idx = findmin(classification_errors)
         best_feature_idx = feature_indices[min_error_idx]
-        best_feature = features[best_feature_idx]
 
         # set feature weight
         best_feature = features[best_feature_idx]
-        feature_weight = β(best_error)
-        best_feature.weight = feature_weight
+        best_feature.weight = β(best_error)
 
         # append selected features
         classifiers[t] = best_feature
@@ -162,7 +174,7 @@ function learn(
         end
 
         # remove feature (a feature can't be selected twice)
-        filter!(e -> e ∉ best_feature_idx, feature_indices) # note: without unicode operators, `e ∉ [a, b]` is `!(e in [a, b])`
+        deleteat!(feature_indices, best_feature_idx)
         resize!(classification_errors, length(feature_indices))
         next!(p) # increment progress bar
     end
@@ -171,8 +183,8 @@ function learn(
 end
 
 function learn(
-    positive_path::AbstractString,
-    negative_path::AbstractString,
+    positive_files::Vector{String},
+    negative_files::Vector{String},
     num_classifiers::Int=-1,
     min_feature_width::Int=1,
     max_feature_width::Int=-1,
@@ -184,29 +196,50 @@ function learn(
 )
 
     votes, features = get_feature_votes(
-        positive_path,
-        negative_path,
+        positive_files, negative_files,
         num_classifiers,
-        min_feature_width,
-        max_feature_width,
-        min_feature_height,
-        max_feature_height,
-        scale = scale,
-        scale_to = scale_to,
+        min_feature_width, max_feature_width,
+        min_feature_height, max_feature_height,
+        scale = scale, scale_to = scale_to,
         show_progress = show_progress
     )
 
-    return learn(positive_path, negative_path, features, votes, num_classifiers; show_progress = show_progress)
+    num_pos, num_neg = length(positive_files), length(negative_files)
+
+    return learn(num_pos, num_neg, features, votes, num_classifiers; show_progress = show_progress)
+end
+
+function learn(
+    positive_path::String,
+    negative_path::String,
+    num_classifiers::Int=-1,
+    min_feature_width::Int=1,
+    max_feature_width::Int=-1,
+    min_feature_height::Int=1,
+    max_feature_height::Int=-1;
+    scale::Bool = false,
+    scale_to::Tuple = (200, 200),
+    show_progress::Bool = true
+)
+
+    return learn(
+        filtered_ls(positive_path),
+        filtered_ls(negative_path),
+        num_classifiers,
+        min_feature_width, max_feature_width,
+        min_feature_height, max_feature_height;
+        scale = scale, scale_to = scale_to,
+        show_progress = show_progress
+    )
 end
 
 """
     create_features(
-        img_height::Integer,
-        img_width::Integer,
-        min_feature_width::Integer,
-        max_feature_width::Integer,
-        min_feature_height::Integer,
-        max_feature_height::Integer
+        img_height::Int, img_width::Int,
+        min_feature_width::Int,
+        max_feature_width::Int,
+        min_feature_height::Int,
+        max_feature_height::Int
     ) -> Array{HaarLikeObject, 1}
 
 Iteratively creates the Haar-like feautures
@@ -225,27 +258,26 @@ Iteratively creates the Haar-like feautures
 - `features::AbstractArray`: an array of Haar-like features found for an image
 """
 function create_features(
-    img_height::Int,
-    img_width::Int,
+    img_height::Int, img_width::Int,
     min_feature_width::Int,
     max_feature_width::Int,
     min_feature_height::Int,
     max_feature_height::Int
 )
-    @info("Creating Haar-like features...")
-    features = HaarLikeObject[]
-
     if img_width < max_feature_width || img_height < max_feature_height
         error("""
         Cannot possibly find classifiers whose size is greater than the image itself [(width,height) = ($img_width,$img_height)].
         """)
     end
 
+    @info("Creating Haar-like features...")
+    features = HaarLikeObject[]
+
     for (feature_first, feature_last) in values(FEATURE_TYPES) # (feature_types are just tuples)
         feature_start_width = max(min_feature_width, feature_first)
-        for feature_width in feature_start_width:feature_first:(max_feature_width)
+        for feature_width in feature_start_width:feature_first:max_feature_width
             feature_start_height = max(min_feature_height, feature_last)
-            for feature_height in feature_start_height:feature_last:(max_feature_height)
+            for feature_height in feature_start_height:feature_last:max_feature_height
                 for x in 1:(img_width - feature_width)
                     for y in 1:(img_height - feature_height)
                         push!(features, HaarLikeObject((feature_first, feature_last), (x, y), feature_width, feature_height, 0, 1))

diff --git a/src/FaceDetection.jl b/src/FaceDetection.jl
@@ -2,6 +2,9 @@ module FaceDetection
 
 import Base: size, getindex, LinearIndices
 using Images: Images, coords_spatial
+using ProgressMeter: Progress, next!
+using Base.Threads: @threads
+using Base.Iterators: partition
 
 export to_integral_image, sum_region
 export learn, get_feature_votes
@@ -10,10 +13,9 @@ export displaymatrix, filtered_ls, load_image, ensemble_vote_all,
     reconstruct, get_random_image, generate_validation_image,
     get_faceness, determine_feature_size
 
-
+include("IntegralImage.jl")
 include("HaarLikeFeature.jl")
 include("Utils.jl") # Utils.jl exports HaarLikeFeature.jl functions
-include("IntegralImage.jl")
 include("AdaBoost.jl")
 
 end # end module