Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimisation #57

Merged
merged 8 commits into from
Nov 4, 2021
7 changes: 7 additions & 0 deletions docs/src/benchmarking.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,10 @@ Platform Info:
LIBM: libopenlibm
LLVM: libLLVM-9.0.1 (ORCJIT, skylake)
```

## 1.6 Update

A few months after the release of Julia 1.6, I did some performance considerations (there are already quite a few nice features that come with 1.6). Now these are the benchmarking results (see [`benchmark/basic.jl`](https://github.com/jakewilliami/FaceDetection.jl/blob/master/benchmark/basic.jl))
Language of Implementation | Commit | Run Time in Seconds | Number of Allocations | Memory Usage
--- | --- | --- | --- | ---
[Julia](https://github.com/jakewilliami/FaceDetection.jl/) | [???]() | 8.165 | 249021919 | 5.01 GiB
124 changes: 124 additions & 0 deletions examples/basic_ffhq_things.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Adapted from https://github.com/Simon-Hohberg/Viola-Jones/

# Faces dataset: [FFHQ](https://github.com/NVlabs/ffhq-dataset/) 70_001 images of faces
# Non-faces dataset: [THINGS](https://osf.io/3fu6z/); 26_107 object images

@info "Loading required libraries (it will take a moment to precompile if it is your first time doing this)..."

using FaceDetection
using Printf: @printf
using Images: imresize

@info("...done")

"Return a random subset of the contents of directory `path` of size `n`."
function rand_subset_ls(path::String, n::Int)
dir_contents = readdir(path, join=true, sort=false)
filter!(f -> !occursin(r".*\.DS_Store", f), dir_contents)
@assert(length(dir_contents) >= n, "Not enough files in given directory to select `n` random.")

subset_ls = Vector{String}(undef, n)
for i in 1:n
j = rand(1:length(dir_contents))
subset_ls[i] = dir_contents[j]
deleteat!(dir_contents, j)
end

return subset_ls
end

function main(
num_pos::Int,
num_neg::Int;
smart_choose_feats::Bool=false,
scale::Bool=true,
scale_to::Tuple=(128, 128)
)
data_path = joinpath(dirname(@__DIR__), "data")

pos_training_path = joinpath(data_path, "ffhq", "thumbnails128x128")
neg_training_path = joinpath(data_path, "things", "object_images")

all_pos_images = rand_subset_ls(pos_training_path, 2num_pos)
all_neg_images = rand_subset_ls(neg_training_path, 2num_neg)

pos_training_images = all_pos_images[1:num_pos]
neg_training_images = all_neg_images[1:num_neg]

num_classifiers = 10
local min_size_img::Tuple{Int, Int}

if smart_choose_feats
# For performance reasons restricting feature size
@info("Selecting best feature width and height...")

max_feature_width, max_feature_height, min_feature_height, min_feature_width, min_size_img =
determine_feature_size(vcat(pos_training_images, neg_training_images); scale = scale, scale_to = scale_to, show_progress = true)

@info("...done. Maximum feature width selected is $max_feature_width pixels; minimum feature width is $min_feature_width; maximum feature height is $max_feature_height pixels; minimum feature height is $min_feature_height.\n")
else
max_feature_width, max_feature_height, min_feature_height, min_feature_width = (67, 67, 65, 65)
min_size_img = (128, 128)
end

# classifiers are haar like features
classifiers = learn(pos_training_images, neg_training_images, num_classifiers, min_feature_height, max_feature_height, min_feature_width, max_feature_width; scale = scale, scale_to = scale_to)

@info("Testing selected classifiers...")
sleep(3) # sleep here because sometimes the threads from `learn` are still catching up and then `ensemble_vote_all` errors

pos_testing_images = all_pos_images[(num_pos + 1):2num_pos]
neg_testing_images = all_neg_images[(num_neg + 1):2num_neg]
num_faces = length(pos_testing_images)
num_non_faces = length(neg_testing_images)

correct_faces = sum(ensemble_vote_all(pos_testing_images, classifiers, scale=scale, scale_to=scale_to))
correct_non_faces = num_non_faces - sum(ensemble_vote_all(neg_testing_images, classifiers, scale=scale, scale_to=scale_to))
correct_faces_percent = (correct_faces / num_faces) * 100
correct_non_faces_percent = (correct_non_faces / num_non_faces) * 100

faces_frac = string(correct_faces, "/", num_faces)
faces_percent = string("(", correct_faces_percent, "% of faces were recognised as faces)")
non_faces_frac = string(correct_non_faces, "/", num_non_faces)
non_faces_percent = string("(", correct_non_faces_percent, "% of non-faces were identified as non-faces)")

@info("...done.\n")
@info("Result:\n")

@printf("%10.9s %10.15s %15s\n", "Faces:", faces_frac, faces_percent)
@printf("%10.9s %10.15s %15s\n\n", "Non-faces:", non_faces_frac, non_faces_percent)
end

@time main(2000, 2000; smart_choose_feats = false, scale = true, scale_to = (128, 128))

#=
[ Info: Loading required libraries (it will take a moment to precompile if it is your first time doing this)...
[ Info: ...done
[ Info: Creating Haar-like features...
[ Info: ...finished processing; 169880 features created.
[ Info: Loading images (1000 positive and 1000 negative images) and calculating their scores...
Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:02:13
[ Info: Selecting classifiers...
Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:00:17
[ Info: Testing selected classifiers...
[ Info: ...done.
[ Info: Result:
Faces: 757/1000 (75.7% of faces were recognised as faces)
Non-faces 749/1000 (74.9% of non-faces were identified as non-faces)
=#

#=
[ Info: Loading required libraries (it will take a moment to precompile if it is your first time doing this)...
[ Info: ...done
[ Info: Creating Haar-like features...
[ Info: ...finished processing; 169880 features created.
[ Info: Loading images (2000 positive and 2000 negative images) and calculating their scores...
Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:07:06
[ Info: Selecting classifiers...
Progress: 100%|███████████████████████████████████████████████████████████████████████████████████████████| Time: 0:00:52
[ Info: Testing selected classifiers...
[ Info: ...done.
[ Info: Result:
Faces: 1547/2000 (77.35% of faces were recognised as faces)
Non-faces 1400/2000 (70.0% of non-faces were identified as non-faces)
=#
122 changes: 77 additions & 45 deletions src/AdaBoost.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
# TODO: select optimal threshold for each feature
# TODO: attentional cascading

using Base.Threads: @threads
using Base.Iterators: partition
using ProgressMeter: @showprogress, Progress, next!

function β(i::T)::T where T
return @fastmath(T(0.5) * log((one(i) - i) / i))
end

function get_feature_votes(
positive_path::AbstractString,
negative_path::AbstractString,
positive_files::Vector{String},
negative_files::Vector{String},
num_classifiers::Integer=-one(Int32),
min_feature_width::Integer=one(Int32),
max_feature_width::Integer=-one(Int32),
Expand Down Expand Up @@ -39,8 +35,6 @@ function get_feature_votes(
_1 = _Int(1)

# get number of positive and negative image
positive_files = filtered_ls(positive_path)
negative_files = filtered_ls(negative_path)
num_pos = length(positive_files)
num_neg = length(negative_files)
num_imgs = num_pos + num_neg
Expand Down Expand Up @@ -80,19 +74,40 @@ function get_feature_votes(

return votes, features
end
function get_feature_votes(
positive_path::String,
negative_path::String,
num_classifiers::Integer=-one(Int32),
min_feature_width::Integer=one(Int32),
max_feature_width::Integer=-one(Int32),
min_feature_height::Integer=one(Int32),
max_feature_height::Integer=-one(Int32);
scale::Bool = false,
scale_to::Tuple = (Int32(200), Int32(200)),
show_progress::Bool = true
)
positive_files = filtered_ls(positive_path)
negative_files = filtered_ls(negative_path)

return get_feature_votes(
positive_files, negative_files,
num_classifiers,
min_feature_width, max_feature_width,
min_feature_height, max_feature_height;
scale = scale, scale_to = scale_to,
show_progress = show_progress
)
end

function learn(
positive_path::AbstractString,
negative_path::AbstractString,
num_pos::Int, num_neg::Int,
features::Array{HaarLikeObject, 1},
votes::Matrix{Int8},
num_classifiers::Integer=-one(Int32);
show_progress::Bool = true
)
)

# get number of positive and negative images (and create a global variable of the total number of images——global for the @everywhere scope)
num_pos = length(filtered_ls(positive_path))
num_neg = length(filtered_ls(negative_path))
num_imgs = num_pos + num_neg

# Initialise weights $w_{1,i} = \frac{1}{2m}, \frac{1}{2l}$, for $y_i=0,1$ for negative and positive examples respectively
Expand All @@ -107,10 +122,10 @@ function learn(
_neg1 = -_1
labels = Vector{Int8}(undef, num_imgs)
for i in 1:num_pos
labels[i] = _1
@inbounds labels[i] = _1
end
for j in (num_pos + 1):num_imgs
labels[j] = _neg1
@inbounds labels[j] = _neg1
end

# get number of features
Expand All @@ -120,7 +135,6 @@ function learn(

# select classifiers
@info("Selecting classifiers...")
# classifiers = HaarLikeObject[]
classifiers = Vector{HaarLikeObject}(undef, num_classifiers)
classification_errors = Vector{Float64}(undef, num_features)

Expand All @@ -132,20 +146,18 @@ function learn(
weights .*= inv(sum(weights))

# For each feature j, train a classifier $h_j$ which is restricted to using a single feature. The error is evaluated with respect to $w_j,\varepsilon_j = \sum_i w_i\left|h_j\left(x_i\right)-y_i\right|$
@threads for j in 1:length(feature_indices)
@inbounds @threads for j in 1:length(feature_indices)
feature_idx = feature_indices[j]
classification_errors[j] = sum(weights[img_idx] for img_idx in 1:num_imgs if labels[img_idx] !== votes[img_idx, feature_idx])
end

# choose the classifier $h_t$ with the lowest error $\varepsilon_t$
best_error, min_error_idx = findmin(classification_errors)
best_feature_idx = feature_indices[min_error_idx]
best_feature = features[best_feature_idx]

# set feature weight
best_feature = features[best_feature_idx]
feature_weight = β(best_error)
best_feature.weight = feature_weight
best_feature.weight = β(best_error)

# append selected features
classifiers[t] = best_feature
Expand All @@ -162,7 +174,7 @@ function learn(
end

# remove feature (a feature can't be selected twice)
filter!(e -> e ∉ best_feature_idx, feature_indices) # note: without unicode operators, `e ∉ [a, b]` is `!(e in [a, b])`
deleteat!(feature_indices, best_feature_idx)
resize!(classification_errors, length(feature_indices))
next!(p) # increment progress bar
end
Expand All @@ -171,8 +183,8 @@ function learn(
end

function learn(
positive_path::AbstractString,
negative_path::AbstractString,
positive_files::Vector{String},
negative_files::Vector{String},
num_classifiers::Int=-1,
min_feature_width::Int=1,
max_feature_width::Int=-1,
Expand All @@ -184,29 +196,50 @@ function learn(
)

votes, features = get_feature_votes(
positive_path,
negative_path,
positive_files, negative_files,
num_classifiers,
min_feature_width,
max_feature_width,
min_feature_height,
max_feature_height,
scale = scale,
scale_to = scale_to,
min_feature_width, max_feature_width,
min_feature_height, max_feature_height,
scale = scale, scale_to = scale_to,
show_progress = show_progress
)

return learn(positive_path, negative_path, features, votes, num_classifiers; show_progress = show_progress)
num_pos, num_neg = length(positive_files), length(negative_files)

return learn(num_pos, num_neg, features, votes, num_classifiers; show_progress = show_progress)
end

function learn(
positive_path::String,
negative_path::String,
num_classifiers::Int=-1,
min_feature_width::Int=1,
max_feature_width::Int=-1,
min_feature_height::Int=1,
max_feature_height::Int=-1;
scale::Bool = false,
scale_to::Tuple = (200, 200),
show_progress::Bool = true
)

return learn(
filtered_ls(positive_path),
filtered_ls(negative_path),
num_classifiers,
min_feature_width, max_feature_width,
min_feature_height, max_feature_height;
scale = scale, scale_to = scale_to,
show_progress = show_progress
)
end

"""
create_features(
img_height::Integer,
img_width::Integer,
min_feature_width::Integer,
max_feature_width::Integer,
min_feature_height::Integer,
max_feature_height::Integer
img_height::Int, img_width::Int,
min_feature_width::Int,
max_feature_width::Int,
min_feature_height::Int,
max_feature_height::Int
) -> Array{HaarLikeObject, 1}

Iteratively creates the Haar-like feautures
Expand All @@ -225,27 +258,26 @@ Iteratively creates the Haar-like feautures
- `features::AbstractArray`: an array of Haar-like features found for an image
"""
function create_features(
img_height::Int,
img_width::Int,
img_height::Int, img_width::Int,
min_feature_width::Int,
max_feature_width::Int,
min_feature_height::Int,
max_feature_height::Int
)
@info("Creating Haar-like features...")
features = HaarLikeObject[]

if img_width < max_feature_width || img_height < max_feature_height
error("""
Cannot possibly find classifiers whose size is greater than the image itself [(width,height) = ($img_width,$img_height)].
""")
end

@info("Creating Haar-like features...")
features = HaarLikeObject[]

for (feature_first, feature_last) in values(FEATURE_TYPES) # (feature_types are just tuples)
feature_start_width = max(min_feature_width, feature_first)
for feature_width in feature_start_width:feature_first:(max_feature_width)
for feature_width in feature_start_width:feature_first:max_feature_width
feature_start_height = max(min_feature_height, feature_last)
for feature_height in feature_start_height:feature_last:(max_feature_height)
for feature_height in feature_start_height:feature_last:max_feature_height
for x in 1:(img_width - feature_width)
for y in 1:(img_height - feature_height)
push!(features, HaarLikeObject((feature_first, feature_last), (x, y), feature_width, feature_height, 0, 1))
Expand Down
6 changes: 4 additions & 2 deletions src/FaceDetection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ module FaceDetection

import Base: size, getindex, LinearIndices
using Images: Images, coords_spatial
using ProgressMeter: Progress, next!
using Base.Threads: @threads
using Base.Iterators: partition

export to_integral_image, sum_region
export learn, get_feature_votes
Expand All @@ -10,10 +13,9 @@ export displaymatrix, filtered_ls, load_image, ensemble_vote_all,
reconstruct, get_random_image, generate_validation_image,
get_faceness, determine_feature_size


include("IntegralImage.jl")
include("HaarLikeFeature.jl")
include("Utils.jl") # Utils.jl exports HaarLikeFeature.jl functions
include("IntegralImage.jl")
include("AdaBoost.jl")

end # end module
Loading