-
Notifications
You must be signed in to change notification settings - Fork 13
/
distance.go
115 lines (94 loc) · 3.19 KB
/
distance.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
// Distance metrics for Tor objects.
package main
import (
"fmt"
tor "git.torproject.org/user/phw/zoossh.git"
cluster "github.com/NullHypothesis/mlgo/cluster"
levenshtein "github.com/arbovm/levenshtein"
statistics "github.com/mcgrew/gostats"
)
// RelayDistances contains a slice for relays and their corresponding distance
// to another relay.
type RelayDistances struct {
Distances []float32
Relays []*tor.RouterStatus
}
// Len implements the Sorter interface.
func (rd RelayDistances) Len() int {
return len(rd.Distances)
}
// Swap implements the Sorter interface.
func (rd RelayDistances) Swap(i, j int) {
rd.Distances[i], rd.Distances[j] = rd.Distances[j], rd.Distances[i]
rd.Relays[i], rd.Relays[j] = rd.Relays[j], rd.Relays[i]
}
// Less implements the Sorter interface.
func (rd RelayDistances) Less(i, j int) bool {
return rd.Distances[i] < rd.Distances[j]
}
// Add adds a new relay with its corresponding distance to the struct.
func (rd *RelayDistances) Add(relay *tor.RouterStatus, dist float32) {
rd.Distances = append(rd.Distances, dist)
rd.Relays = append(rd.Relays, relay)
}
// Distance quantifies the distance between the two given "Tor objects" (e.g.,
// router statuses or descriptors) as 32-bit float.
type Distance func(obj1, obj2 tor.Object) float32
// Levenshtein determines the Levenshtein distance, a string metric, between
// the given router statuses and descriptors. In contrast to
// LevenshteinVerbose, this function only returns the distance.
func Levenshtein(stat1, stat2 *tor.RouterStatus, desc1, desc2 *tor.RouterDescriptor) float32 {
distance, _ := LevenshteinVerbose(stat1, stat2, desc1, desc2)
return distance
}
// PearsonWrapper is a wrapper around PearsonCorrelation.
func PearsonWrapper(a, b cluster.Vector) float64 {
return 1 - PearsonCorrelation(a, b)
}
// PearsonCorrelation determines the Pearson correlation coefficient.
func PearsonCorrelation(a, b []float64) float64 {
return statistics.PearsonCorrelation(a, b)
}
// LevenshteinVerbose determines the Levenshtein distance, a string metric,
// between the given router statuses and descriptors.
func LevenshteinVerbose(status1, status2 *tor.RouterStatus, desc1, desc2 *tor.RouterDescriptor) (float32, string) {
var str1, str2 string
if desc1 == nil {
desc1 = new(tor.RouterDescriptor)
}
if desc2 == nil {
desc2 = new(tor.RouterDescriptor)
}
str1 = fmt.Sprintf("%s%s%d%d%s%s%s%d%d%s%s%d%s",
status1.Nickname,
status1.Address,
status1.Address.IPv4ORPort,
status1.Address.IPv4DirPort,
RouterFlagsToString(&status1.Flags),
status1.TorVersion,
status1.PortList,
desc1.BandwidthAvg,
desc1.BandwidthBurst,
desc1.OperatingSystem,
desc1.Published,
desc1.Uptime,
desc1.Contact)
str2 = fmt.Sprintf("%s%s%d%d%s%s%s%d%d%s%s%d%s",
status2.Nickname,
status2.Address,
status2.Address.IPv4ORPort,
status2.Address.IPv4DirPort,
RouterFlagsToString(&status2.Flags),
status2.TorVersion,
status2.PortList,
desc2.BandwidthAvg,
desc2.BandwidthBurst,
desc2.OperatingSystem,
desc2.Published,
desc2.Uptime,
desc2.Contact)
verbose := fmt.Sprintf("%s: %s\n%s: %s",
status1.Fingerprint[:8], str1,
status2.Fingerprint[:8], str2)
return float32(levenshtein.Distance(str1, str2)), verbose
}