Skip to content

Commit

Permalink
add PerceptionHashExtend function (#18)
Browse files Browse the repository at this point in the history
goimagehash: Implement PerceptionHashExtend
  • Loading branch information
TokyoWolFrog authored and corona10 committed Feb 8, 2019
1 parent 81672d7 commit c41a59a
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 0 deletions.
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
- [Dominik Honnef](https://github.com/dominikh) dominik@honnef.co
- [Dong-hee Na](https://github.com/corona10/) donghee.na92@gmail.com
- [Gustavo Brunoro](https://github.com/brunoro/) git@hitnail.net
- [Alex Higashino](https://github.com/TokyoWolFrog/) TokyoWolFrog@mayxyou.com
29 changes: 29 additions & 0 deletions hashcompute.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,32 @@ func PerceptionHash(img image.Image) (*ImageHash, error) {
}
return phash, nil
}

// PerceptionHashExtend function returns phash of which the size can be set larger than uint64
// Some variable name refer to https://github.com/JohannesBuchner/imagehash/blob/master/imagehash/__init__.py
// Support 64bits phash (hashSize=8) and 256bits phash (hashSize=16)
func PerceptionHashExtend(img image.Image, hashSize int) (*ExtImageHash, error) {
if img == nil {
return nil, errors.New("Image object can not be nil")
}
highFreqFactor := 8
imgSize := hashSize * highFreqFactor

resized := resize.Resize(uint(imgSize), uint(imgSize), img, resize.Bilinear)
pixels := transforms.Rgb2Gray(resized)
dct := transforms.DCT2D(pixels, imgSize, imgSize)
flattens := transforms.FlattenPixels(dct, hashSize, hashSize)
median := etcs.MedianOfPixels(flattens)

lenOfUnit := 64
lenOfPhash := hashSize * hashSize
phash := make([]uint64, lenOfPhash/lenOfUnit)
for idx, p := range flattens {
indexOfArray := (lenOfPhash - 1 - idx) / lenOfUnit
indexOfBit := idx % lenOfUnit
if p > median {
phash[indexOfArray] |= 1 << uint(indexOfBit)
}
}
return NewExtImageHash(phash, PHash), nil
}
97 changes: 97 additions & 0 deletions hashcompute_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,100 @@ func BenchmarkDistanceDifferent(b *testing.B) {
h1.Distance(h2)
}
}

func TestExtImageHashCompute(t *testing.T) {
for _, tt := range []struct {
img1 string
img2 string
hashSize int
name string
distance int
}{
{"_examples/sample1.jpg", "_examples/sample1.jpg", 8, "PerceptionHashExtend", 0},
{"_examples/sample2.jpg", "_examples/sample2.jpg", 8, "PerceptionHashExtend", 0},
{"_examples/sample3.jpg", "_examples/sample3.jpg", 8, "PerceptionHashExtend", 0},
{"_examples/sample4.jpg", "_examples/sample4.jpg", 8, "PerceptionHashExtend", 0},
{"_examples/sample1.jpg", "_examples/sample2.jpg", 8, "PerceptionHashExtend", 32},
{"_examples/sample1.jpg", "_examples/sample3.jpg", 8, "PerceptionHashExtend", 2},
{"_examples/sample1.jpg", "_examples/sample4.jpg", 8, "PerceptionHashExtend", 30},
{"_examples/sample2.jpg", "_examples/sample3.jpg", 8, "PerceptionHashExtend", 34},
{"_examples/sample2.jpg", "_examples/sample4.jpg", 8, "PerceptionHashExtend", 20},
{"_examples/sample1.jpg", "_examples/sample1.jpg", 16, "PerceptionHashExtend", 0},
{"_examples/sample2.jpg", "_examples/sample2.jpg", 16, "PerceptionHashExtend", 0},
{"_examples/sample3.jpg", "_examples/sample3.jpg", 16, "PerceptionHashExtend", 0},
{"_examples/sample4.jpg", "_examples/sample4.jpg", 16, "PerceptionHashExtend", 0},
} {
file1, err := os.Open(tt.img1)
if err != nil {
t.Errorf("%s", err)
}
defer file1.Close()

file2, err := os.Open(tt.img2)
if err != nil {
t.Errorf("%s", err)
}
defer file2.Close()

img1, err := jpeg.Decode(file1)
if err != nil {
t.Errorf("%s", err)
}

img2, err := jpeg.Decode(file2)
if err != nil {
t.Errorf("%s", err)
}

hash1, err := PerceptionHashExtend(img1, tt.hashSize)
if err != nil {
t.Errorf("%s", err)
}
hash2, err := PerceptionHashExtend(img2, tt.hashSize)
if err != nil {
t.Errorf("%s", err)
}

dis1, err := hash1.Distance(hash2)
if err != nil {
t.Errorf("%s", err)
}

dis2, err := hash2.Distance(hash1)
if err != nil {
t.Errorf("%s", err)
}

if dis1 != dis2 {
t.Errorf("Distance should be identical %v vs %v", dis1, dis2)
}

if dis1 != tt.distance {
t.Errorf("%s: Distance between %v and %v is expected %v but got %v", tt.name, tt.img1, tt.img2, tt.distance, dis1)
}

if tt.hashSize == 8 {
hash0, err := PerceptionHash(img1)
if err != nil {
t.Errorf("%s", err)
}
hex0 := hash0.ToString()
hex1 := hash1.ToString()
if hex0 != hex1 {
t.Errorf("Hex is expected %v but got %v", hex0, hex1)
}
}
}
}

func BenchmarkExtImageHashDistanceDifferent(b *testing.B) {
h1 := &ExtImageHash{hash: []uint64{0xe48ae53c05e502f7}}
h2 := &ExtImageHash{hash: []uint64{0x678be53815e510f7}} // 8 bits flipped

for i := 0; i < b.N; i++ {
_, err := h1.Distance(h2)
if err != nil {
b.Errorf("%s", err)
}
}
}
107 changes: 107 additions & 0 deletions imagehash.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
package goimagehash

import (
"encoding/binary"
"encoding/hex"
"errors"
"fmt"
)
Expand All @@ -18,6 +20,12 @@ type ImageHash struct {
kind Kind
}

// ExtImageHash is a struct of big hash computation.
type ExtImageHash struct {
hash []uint64
kind Kind
}

const (
// Unknown is a enum value of the unknown hash.
Unknown Kind = iota
Expand Down Expand Up @@ -104,3 +112,102 @@ func (h *ImageHash) ToString() string {
}
return fmt.Sprintf(strFmt, kindStr, h.hash)
}

// NewExtImageHash function creates a new big hash
func NewExtImageHash(hash []uint64, kind Kind) *ExtImageHash {
return &ExtImageHash{hash: hash, kind: kind}
}

// Distance method returns a distance between two big hashes
func (h *ExtImageHash) Distance(other *ExtImageHash) (int, error) {
if h.GetKind() != other.GetKind() {
return -1, errors.New("Extended Image hashes's kind should be identical")
}

lHash := h.GetHash()
rHash := other.GetHash()
if len(lHash) != len(rHash) {
return -1, errors.New("Extended Image hashes's size should be identical")
}

var distance int
for idx, lh := range lHash {
rh := rHash[idx]
hamming := lh ^ rh
distance += popcnt(hamming)
}
return distance, nil
}

// GetHash method returns a big hash value
func (h *ExtImageHash) GetHash() []uint64 {
return h.hash
}

// GetKind method returns a kind of big hash
func (h *ExtImageHash) GetKind() Kind {
return h.kind
}

const extStrFmt = "%1s:%s"

// ExtImageHashFromString returns a big hash from a hex representation
func ExtImageHashFromString(s string) (*ExtImageHash, error) {
var kindStr string
var hashStr string
_, err := fmt.Sscanf(s, extStrFmt, &kindStr, &hashStr)
if err != nil {
return nil, errors.New("Couldn't parse string " + s)
}

hexBytes, err := hex.DecodeString(hashStr)
if err != nil {
return nil, err
}

var hash []uint64
lenOfByte := 8
for i := 0; i < len(hexBytes)/lenOfByte; i++ {
startIndex := i * lenOfByte
endIndex := startIndex + lenOfByte
hashUint64 := binary.BigEndian.Uint64(hexBytes[startIndex:endIndex])
hash = append(hash, hashUint64)
}

kind := Unknown
switch kindStr {
case "a":
kind = AHash
case "p":
kind = PHash
case "d":
kind = DHash
case "w":
kind = WHash
}
return NewExtImageHash(hash, kind), nil
}

// ToString returns a hex representation of big hash
func (h *ExtImageHash) ToString() string {
var hexBytes []byte
for _, hash := range h.hash {
hashBytes := make([]byte, 8)
binary.BigEndian.PutUint64(hashBytes, hash)
hexBytes = append(hexBytes, hashBytes...)
}
hexStr := hex.EncodeToString(hexBytes)

kindStr := ""
switch h.kind {
case AHash:
kindStr = "a"
case PHash:
kindStr = "p"
case DHash:
kindStr = "d"
case WHash:
kindStr = "w"
}
return fmt.Sprintf(extStrFmt, kindStr, hexStr)
}
23 changes: 23 additions & 0 deletions imagehash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,28 @@ func TestSerialization(t *testing.T) {
t.Errorf("Original and unserialized objects should be identical, got distance=%v; %v of '%v'", distance, methodStr, ex)
}
}

// test for ExtIExtImageHash
hashSizeList := []int{8, 16}
for _, hashSize := range hashSizeList {
hash, err := PerceptionHashExtend(img, hashSize)
checkErr(err)

hex := hash.ToString()
// len(kind) == 1, len(":") == 1
if len(hex) != hashSize*hashSize/4+2 {
t.Errorf("Got invalid hex string '%v'; %v of '%v'", hex, "PerceptionHashExtend", ex)
}

reHash, err := ExtImageHashFromString(hex)
checkErr(err)

distance, err := hash.Distance(reHash)
checkErr(err)

if distance != 0 {
t.Errorf("Original and unserialized objects should be identical, got distance=%v; %v of '%v'", distance, "PerceptionHashExtend", ex)
}
}
}
}

0 comments on commit c41a59a

Please sign in to comment.