-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #59 from jakewilliami/faceness
Improve/correct faceness measure
- Loading branch information
Showing
25 changed files
with
1,614 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
The [FFHQ database](https://github.com/NVlabs/ffhq-dataset/) is a great dataset for positive training images, as it has some 70,001 images of faces, mostly alone in the image. | ||
|
||
To download this dataset, please run | ||
```shell | ||
$ bash setup.sh | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
echo "Please ensure you have followed the Google Drive API instructions listed here: https://docs.iterative.ai/PyDrive2/quickstart/" | ||
sleep 5 | ||
|
||
pip3 install pydrive2 | ||
curl 'https://gist.githubusercontent.com/jakewilliami/6e361ca59df521c874a9021bde1d2c81/raw/2f277c36bcd725df71d30174e13f920d7bee7b97/download_ffhq_pydrive.py' > download_ffhq_pydrive.py | ||
echo "Downloading image thumbnails" | ||
python3 download_ffhq.py -t --pydrive --cmd_auth | ||
|
||
echo "Moving the images into one directory and deleting subdirectories." | ||
# move images out of their subdirectories | ||
for d in thumbnails128x128/*; do | ||
[ -d "$d" ] || continue | ||
for f in "$d"/*; do | ||
mv "$f" "thumbnails128x128/$(basename "$f")" | ||
done | ||
done | ||
# clean up the subdirectories | ||
for d in thumbnails128x128/*; do | ||
if [ -d "$d" ]; then | ||
rm -d "$d" | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
baby | ||
bandanna | ||
beanie | ||
beard | ||
blindfold | ||
bobsled | ||
bowler hat | ||
braid | ||
breathalyzer | ||
chick | ||
chicken2 | ||
chihuahua | ||
cockatoo | ||
costume | ||
dalmatian | ||
denture | ||
doll | ||
duckling | ||
ear | ||
earplug | ||
eye | ||
eye patch | ||
eyeliner | ||
face | ||
figurine | ||
football helmet | ||
gargoyle | ||
gas mask | ||
gingerbread man | ||
girl | ||
glasses | ||
goggles | ||
gondola | ||
groundhog | ||
hair | ||
hairnet | ||
hat | ||
headband | ||
headdress | ||
headlamp | ||
headscarf | ||
hearing aid | ||
helmet | ||
hood | ||
jetski | ||
kitten | ||
lamb | ||
man | ||
mannequin | ||
mascara | ||
mask | ||
mouth | ||
mouthpiece | ||
mustache | ||
piggy bank | ||
piglet | ||
playpen | ||
pogo stick | ||
poodle | ||
poster | ||
pug | ||
puppet | ||
puppy | ||
racehorse | ||
ram | ||
rickshaw | ||
robot | ||
sarcophagus | ||
scarecrow | ||
scarf | ||
seagull | ||
seal | ||
skeleton | ||
skull | ||
snorkel | ||
snowman | ||
statue | ||
tadpole | ||
teddy bear | ||
totem pole | ||
toy | ||
warthog | ||
woman |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
get_category_from_image_name(s::String) = join(split(basename(s), '_')[1:(end - 1)], ' ') | ||
|
||
# Return a list of object categories from the images | ||
function get_object_categories(object_images::Vector{String}) | ||
object_categories = String[] | ||
for object_image in object_images | ||
object_image = basename(object_image) | ||
object_category = get_category_from_image_name(object_image) | ||
if object_category ∉ object_categories | ||
push!(object_categories, object_category) | ||
end | ||
end | ||
return object_categories | ||
end | ||
get_object_categories(object_image_dir::String) = | ||
get_object_categories(readdir(object_image_dir)) | ||
|
||
# Filter out animals from the categories | ||
function filter_out_animals(object_image_categories::Vector{String}) | ||
animals = readlines(download("https://gist.githubusercontent.com/atduskgreg/3cf8ef48cb0d29cf151bedad81553a54/raw/82f142562cf50b0f6fb8010f890b2f934093553e/animals.txt")) | ||
animals = String[string(lowercase(animal)) for animal in animals] | ||
filtered_categories = String[] | ||
for image_category in object_image_categories | ||
category_is_animal = image_category ∈ animals | ||
# category_starts_with_animal = any(startswith(image_category, animal) for animal in animals) | ||
if !category_is_animal # || !category_starts_with_animal | ||
push!(filtered_categories, image_category) | ||
end | ||
end | ||
return filtered_categories | ||
end | ||
filter_out_animals(object_image_dir::String) = | ||
filter_out_animals(get_object_categories(object_image_dir)) | ||
|
||
# Get the category lists and write them to file | ||
function main(all_object_image_dir::String) | ||
outfile_all_categories_list = "all_categories.txt" | ||
outfile_all_categories_filtered_list = "all_categories_filtered.txt" | ||
misc_filter_categories_list = "misc_filter_categories.txt" | ||
|
||
all_object_images = readdir(all_object_image_dir, sort = true, join = true) | ||
|
||
all_categories = get_object_categories(all_object_images) | ||
all_categories_filtered = filter_out_animals(all_categories) | ||
misc_filter_categories = readlines(misc_filter_categories_list) | ||
filter!(category -> category ∉ misc_filter_categories, all_categories_filtered) | ||
|
||
open(outfile_all_categories_list, "w") do io | ||
for category in all_categories | ||
write(io, category, '\n') | ||
end | ||
end | ||
|
||
open(outfile_all_categories_filtered_list, "w") do io | ||
for category in all_categories_filtered | ||
write(io, category, '\n') | ||
end | ||
end | ||
|
||
@info "There are currently $(length(all_object_images)) images in your object directory" | ||
categories_warned = String[] | ||
removed = 0 | ||
for object_image in all_object_images | ||
object_category = get_category_from_image_name(object_image) | ||
if object_category ∉ all_categories_filtered | ||
if object_category ∉ categories_warned | ||
@warn("Removing images of the category \"$object_category\"") | ||
push!(categories_warned, object_category) | ||
end | ||
rm(object_image) | ||
removed += 1 | ||
end | ||
end | ||
@info "We have removed all of the images that needed removing, and are left with $(length(all_object_images) - removed) images in your object directory" | ||
|
||
return nothing | ||
end | ||
|
||
main("object_images/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
The [THINGS dataset](https://osf.io/3fu6z/) is a great dataset for object images, containing 26,107 object images. However, there are some categories of images that may interfere with our face detection results, if we are to use these images as negative training images. Of these images, there are 1854 unique categories. After filtering out [animals](https://gist.github.com/atduskgreg/3cf8ef48cb0d29cf151bedad81553a54) from this dataset, there are 1702 unique categories. Further removing some categories (manually selected) that contained humans or facial features (see below), there are 1619 unique categories. | ||
|
||
To download the THINGS dataset in its entirety, run | ||
```shell | ||
$ bash setup.sh | ||
``` | ||
|
||
Now that you have the dataset, please run | ||
```shell | ||
$ julia object_categories.jl | ||
``` | ||
|
||
This will create two text files; one will have all unique categories of images (`all_categories.txt`); the other will contain that list (`all_categories_filtered.txt`), removing categories that are: | ||
- Animals; | ||
- Hat or hair related objects; | ||
- Human-like objects; | ||
- Specific parts of faces; | ||
- Activities requiring humans. | ||
|
||
The Julia script will filter these categories out of the downloaded images, as they contain too many faces/facial features. Beyond animals, this filter process uses a list of categories manually selected from `misc_filter_categories.txt`. | ||
|
||
After filtering all the potentially interfering images out of the THINGS dataset, we are left with 22,558 images. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
wget -q 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5d4d7ec80f488d0017907d30?action=download&direct&version=2' -O 'password.txt' | ||
echo "Downloading object_images_A-C.zip" | ||
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89eef1d85b700286657a33?action=download&direct&version=1' -O 'object_images_A-C.zip' | ||
echo "Downloading object_images_D-K.zip" | ||
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f02b37b6bb0248309053?action=download&direct&version=1' -O 'object_images_D-K.zip' | ||
echo "Downloading object_images_L-Q.zip" | ||
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f10e37b6bb02483092bb?action=download&direct&version=2' -O 'object_images_L-Q.zip' | ||
echo "Downloading object_images_R-S.zip" | ||
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f218d85b700291656821?action=download&direct&version=1' -O 'object_images_R-S.zip' | ||
echo "Downloading object_images_T-Z.zip" | ||
wget 'https://files.osf.io/v1/resources/jum2f/providers/osfstorage/5f89f30a37b6bb02483098c8?action=download&direct&version=1' -O 'object_images_T-Z.zip' | ||
|
||
mkdir object_images | ||
for z in ./*.zip; do | ||
unzip -P 'things4all' "$z" | ||
end | ||
|
||
for d in ./object_images_*; do | ||
[ -d "$d" ] || continue | ||
for d2 in "$d"/*; do | ||
for f in "$d2"/*; do | ||
mv "$f" ./object_images/"$(basename "$f")" | ||
done | ||
done | ||
rm -d "$d" | ||
done |
Oops, something went wrong.