Skip to content

Commit

Permalink
Merge pull request #59 from jakewilliami/faceness
Browse files Browse the repository at this point in the history
Improve/correct faceness measure
  • Loading branch information
jakewilliami authored Jun 5, 2022
2 parents 2a1b1e3 + db6685a commit aad2bda
Show file tree
Hide file tree
Showing 25 changed files with 1,614 additions and 115 deletions.
23 changes: 22 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,25 @@ docs/site/
Manifest.toml

# output data
data/
examples/data/
data/*.csv
data/*.pdf
data/all-non-faces/
data/alt/
data/classifiers_*
data/faceness-scores-*
data/haarcascades
data/lfw-all
data/lizzie-testset
data/main
data/scores
data/wider
data/ffhq/thumbnails128x128/
data/ffhq/LICENSE.txt
data/ffhq/*.py
data/ffhq/*.json
data/things/object_images/
data/things/object_images_all/
data/things/password.txt
data/things/all_categories.txt
data/things/all_categories_filtered.txt
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FaceDetection"
uuid = "00808967-75e2-4046-a522-2ca211e35506"
authors = ["Jake W. Ireland <jakewilliami@icloud.com> and contributors"]
version = "1.0.2"
version = "1.1.0"

[deps]
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
Expand All @@ -11,6 +11,7 @@ ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1"
ImageView = "86fae568-95e7-573e-a6b2-d8a6b900c9ef"
Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
IntegralArrays = "1d092043-8f09-5a30-832f-7509e371ab51"
Netpbm = "f09324ee-3d7c-5217-9330-fc30815ba969"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
QuartzImageIO = "dca85d43-d64c-5e67-8c65-017450d5d020"
Expand Down
6 changes: 6 additions & 0 deletions data/ffhq/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
The [FFHQ database](https://github.com/NVlabs/ffhq-dataset/) is a great dataset for positive training images, as it has some 70,001 images of faces, mostly alone in the image.

To download this dataset, please run
```shell
$ bash setup.sh
```
22 changes: 22 additions & 0 deletions data/ffhq/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
echo "Please ensure you have followed the Google Drive API instructions listed here: https://docs.iterative.ai/PyDrive2/quickstart/"
sleep 5

pip3 install pydrive2
curl 'https://gist.githubusercontent.com/jakewilliami/6e361ca59df521c874a9021bde1d2c81/raw/2f277c36bcd725df71d30174e13f920d7bee7b97/download_ffhq_pydrive.py' > download_ffhq_pydrive.py
echo "Downloading image thumbnails"
python3 download_ffhq.py -t --pydrive --cmd_auth

echo "Moving the images into one directory and deleting subdirectories."
# move images out of their subdirectories
for d in thumbnails128x128/*; do
[ -d "$d" ] || continue
for f in "$d"/*; do
mv "$f" "thumbnails128x128/$(basename "$f")"
done
done
# clean up the subdirectories
for d in thumbnails128x128/*; do
if [ -d "$d" ]; then
rm -d "$d"
done
done
83 changes: 83 additions & 0 deletions data/things/misc_filter_categories.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
baby
bandanna
beanie
beard
blindfold
bobsled
bowler hat
braid
breathalyzer
chick
chicken2
chihuahua
cockatoo
costume
dalmatian
denture
doll
duckling
ear
earplug
eye
eye patch
eyeliner
face
figurine
football helmet
gargoyle
gas mask
gingerbread man
girl
glasses
goggles
gondola
groundhog
hair
hairnet
hat
headband
headdress
headlamp
headscarf
hearing aid
helmet
hood
jetski
kitten
lamb
man
mannequin
mascara
mask
mouth
mouthpiece
mustache
piggy bank
piglet
playpen
pogo stick
poodle
poster
pug
puppet
puppy
racehorse
ram
rickshaw
robot
sarcophagus
scarecrow
scarf
seagull
seal
skeleton
skull
snorkel
snowman
statue
tadpole
teddy bear
totem pole
toy
warthog
woman
79 changes: 79 additions & 0 deletions data/things/object_categories.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
get_category_from_image_name(s::String) = join(split(basename(s), '_')[1:(end - 1)], ' ')

# Return a list of object categories from the images
function get_object_categories(object_images::Vector{String})
object_categories = String[]
for object_image in object_images
object_image = basename(object_image)
object_category = get_category_from_image_name(object_image)
if object_category object_categories
push!(object_categories, object_category)
end
end
return object_categories
end
get_object_categories(object_image_dir::String) =
get_object_categories(readdir(object_image_dir))

# Filter out animals from the categories
function filter_out_animals(object_image_categories::Vector{String})
animals = readlines(download("https://gist.githubusercontent.com/atduskgreg/3cf8ef48cb0d29cf151bedad81553a54/raw/82f142562cf50b0f6fb8010f890b2f934093553e/animals.txt"))
animals = String[string(lowercase(animal)) for animal in animals]
filtered_categories = String[]
for image_category in object_image_categories
category_is_animal = image_category animals
# category_starts_with_animal = any(startswith(image_category, animal) for animal in animals)
if !category_is_animal # || !category_starts_with_animal
push!(filtered_categories, image_category)
end
end
return filtered_categories
end
filter_out_animals(object_image_dir::String) =
filter_out_animals(get_object_categories(object_image_dir))

# Get the category lists and write them to file
function main(all_object_image_dir::String)
outfile_all_categories_list = "all_categories.txt"
outfile_all_categories_filtered_list = "all_categories_filtered.txt"
misc_filter_categories_list = "misc_filter_categories.txt"

all_object_images = readdir(all_object_image_dir, sort = true, join = true)

all_categories = get_object_categories(all_object_images)
all_categories_filtered = filter_out_animals(all_categories)
misc_filter_categories = readlines(misc_filter_categories_list)
filter!(category -> category misc_filter_categories, all_categories_filtered)

open(outfile_all_categories_list, "w") do io
for category in all_categories
write(io, category, '\n')
end
end

open(outfile_all_categories_filtered_list, "w") do io
for category in all_categories_filtered
write(io, category, '\n')
end
end

@info "There are currently $(length(all_object_images)) images in your object directory"
categories_warned = String[]
removed = 0
for object_image in all_object_images
object_category = get_category_from_image_name(object_image)
if object_category all_categories_filtered
if object_category categories_warned
@warn("Removing images of the category \"$object_category\"")
push!(categories_warned, object_category)
end
rm(object_image)
removed += 1
end
end
@info "We have removed all of the images that needed removing, and are left with $(length(all_object_images) - removed) images in your object directory"

return nothing
end

main("object_images/")
22 changes: 22 additions & 0 deletions data/things/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
The [THINGS dataset](https://osf.io/3fu6z/) is a great dataset for object images, containing 26,107 object images. However, there are some categories of images that may interfere with our face detection results, if we are to use these images as negative training images. Of these images, there are 1854 unique categories. After filtering out [animals](https://gist.github.com/atduskgreg/3cf8ef48cb0d29cf151bedad81553a54) from this dataset, there are 1702 unique categories. Further removing some categories (manually selected) that contained humans or facial features (see below), there are 1619 unique categories.

To download the THINGS dataset in its entirety, run
```shell
$ bash setup.sh
```

Now that you have the dataset, please run
```shell
$ julia object_categories.jl
```

This will create two text files; one will have all unique categories of images (`all_categories.txt`); the other will contain that list (`all_categories_filtered.txt`), removing categories that are:
- Animals;
- Hat or hair related objects;
- Human-like objects;
- Specific parts of faces;
- Activities requiring humans.

The Julia script will filter these categories out of the downloaded images, as they contain too many faces/facial features. Beyond animals, this filter process uses a list of categories manually selected from `misc_filter_categories.txt`.

After filtering all the potentially interfering images out of the THINGS dataset, we are left with 22,558 images.
27 changes: 27 additions & 0 deletions data/things/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
wget -q 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5d4d7ec80f488d0017907d30?action=download&direct&version=2' -O 'password.txt'
echo "Downloading object_images_A-C.zip"
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89eef1d85b700286657a33?action=download&direct&version=1' -O 'object_images_A-C.zip'
echo "Downloading object_images_D-K.zip"
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f02b37b6bb0248309053?action=download&direct&version=1' -O 'object_images_D-K.zip'
echo "Downloading object_images_L-Q.zip"
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f10e37b6bb02483092bb?action=download&direct&version=2' -O 'object_images_L-Q.zip'
echo "Downloading object_images_R-S.zip"
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f218d85b700291656821?action=download&direct&version=1' -O 'object_images_R-S.zip'
echo "Downloading object_images_T-Z.zip"
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f30a37b6bb02483098c8?action=download&direct&version=1' -O 'object_images_T-Z.zip'

mkdir object_images
for z in ./*.zip; do
unzip -P 'things4all' "$z"
end

for d in ./object_images_*; do
[ -d "$d" ] || continue
for d2 in "$d"/*; do
for f in "$d2"/*; do
mv "$f" ./object_images/"$(basename "$f")"
done
done
rm -d "$d"
done
Loading

0 comments on commit aad2bda

Please sign in to comment.