Skip to content

Commit

Permalink
Terminology 'measure' used in place of 'metric' to reduce confusion w…
Browse files Browse the repository at this point in the history
…ith mathematical metric concept.
  • Loading branch information
grahamkirby committed May 5, 2022
1 parent c26178d commit 9cea2d6
Show file tree
Hide file tree
Showing 62 changed files with 865 additions and 826 deletions.
8 changes: 4 additions & 4 deletions src/main/java/uk/ac/standrews/cs/utilities/JSONReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -558,14 +558,14 @@ private void stringToValue(final String s) {
}
try {
if (s.indexOf('.') > -1 || s.indexOf('e') > -1 || s.indexOf('E') > -1) {
theDouble = Double.valueOf(s);
theDouble = Double.parseDouble(s);
setNextSymbol(DOUBLE);
return;
}

final Long myLong = new Long(s);
if (myLong == myLong.intValue()) {
theInteger = myLong.intValue();
final long myLong = Long.parseLong(s);
if (myLong == (int) myLong) {
theInteger = (int) myLong;
setNextSymbol(INTEGER);
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
*/
package uk.ac.standrews.cs.utilities.all_pairs;

import uk.ac.standrews.cs.utilities.metrics.Cosine;
import uk.ac.standrews.cs.utilities.metrics.implementation.FeatureVector;
import uk.ac.standrews.cs.utilities.metrics.implementation.KeyFreqPair;
import uk.ac.standrews.cs.utilities.measures.Cosine;
import uk.ac.standrews.cs.utilities.measures.implementation.FeatureVector;
import uk.ac.standrews.cs.utilities.measures.implementation.KeyFreqPair;

import java.util.ArrayList;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
*/
package uk.ac.standrews.cs.utilities.all_pairs;

import uk.ac.standrews.cs.utilities.metrics.implementation.FeatureVector;
import uk.ac.standrews.cs.utilities.metrics.implementation.KeyFreqPair;
import uk.ac.standrews.cs.utilities.measures.implementation.FeatureVector;
import uk.ac.standrews.cs.utilities.measures.implementation.KeyFreqPair;

import java.util.ArrayList;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package uk.ac.standrews.cs.utilities.all_pairs;

import uk.ac.standrews.cs.utilities.metrics.implementation.FeatureVector;
import uk.ac.standrews.cs.utilities.measures.implementation.FeatureVector;

/**
* Created by al on 27/09/2017.
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/uk/ac/standrews/cs/utilities/lsh/MinHash.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package uk.ac.standrews.cs.utilities.lsh;

import uk.ac.standrews.cs.utilities.metrics.coreConcepts.StringMetric;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.StringMeasure;

import java.util.*;

Expand Down Expand Up @@ -77,7 +77,7 @@ private static int hashFunction(Object inputData, int seedOne, int seedTwo) {
*/
public static Integer[] createMinHashSignature(String src, int sig_size, int shingle_size) {

Set<String> set_ngrams = StringMetric.extractNGrams(src, shingle_size);
Set<String> set_ngrams = StringMeasure.extractNGrams(src, shingle_size);

// Create a min hash array initialized to all int max values
Integer[] signature = new Integer[sig_size];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
*/
package uk.ac.standrews.cs.utilities.lsh;

import uk.ac.standrews.cs.utilities.metrics.Jaccard;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.StringMetric;
import uk.ac.standrews.cs.utilities.measures.Jaccard;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.StringMeasure;

import java.util.Arrays;
import java.util.Set;
Expand All @@ -32,8 +32,8 @@ public static void main(String[] args) {
String input1 = "The attribute to awe and majesty.";
String input2 = "But there's but one in all doth hold his place.";

Set<String> input1_2grams = StringMetric.extractNGrams(input1, 2);
Set<String> input2_2grams = StringMetric.extractNGrams(input2, 2);
Set<String> input1_2grams = StringMeasure.extractNGrams(input1, 2);
Set<String> input2_2grams = StringMeasure.extractNGrams(input2, 2);

Integer[] input1_minHashSignature = MinHash.createMinHashSignature(input1, 50, 2);
Integer[] input2_minHashSignature = MinHash.createMinHashSignature(input2, 50, 2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
*/
package uk.ac.standrews.cs.utilities.lsh;

import uk.ac.standrews.cs.utilities.metrics.Jaccard;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.StringMetric;
import uk.ac.standrews.cs.utilities.measures.Jaccard;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.StringMeasure;

import java.util.Arrays;
import java.util.HashMap;
Expand Down Expand Up @@ -89,8 +89,8 @@ public static void showSimilarities(MinHash mh) {
for (String sentence : ozs_words) {
for (String key : map.keySet()) {

Set<String> sentence_2grams = StringMetric.extractNGrams(sentence, 2);
Set<String> key_2grams = StringMetric.extractNGrams(key, 2);
Set<String> sentence_2grams = StringMeasure.extractNGrams(sentence, 2);
Set<String> key_2grams = StringMeasure.extractNGrams(key, 2);

Integer[] sentence_minHashSignature = MinHash.createMinHashSignature(sentence, 50, 2);
Integer[] key_minHashSignature = MinHash.createMinHashSignature(key, 50, 2);
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/uk/ac/standrews/cs/utilities/m_tree/MTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
package uk.ac.standrews.cs.utilities.m_tree;

import uk.ac.standrews.cs.utilities.archive.Diagnostic;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.DataDistance;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.Metric;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.DataDistance;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.Measure;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -47,15 +47,15 @@ public class MTree<T> {
public Node root = null;
int number_of_entries = 0;
private final int max_level_size; // size of a level
final Metric<T> distance_wrapper;
final Measure<T> distance_wrapper;

public MTree(final Metric<T> d, final int max_level_size) {
public MTree(final Measure<T> d, final int max_level_size) {

distance_wrapper = d;
this.max_level_size = max_level_size;
}

public MTree(final Metric<T> d) {
public MTree(final Measure<T> d) {

this(d, DEFAULT_MAX_LEVEL_SIZE);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class MTreeSizeCheck {
private int increment = 5000000; // 5 million
private int max = 30000000; // 30 million.

private EuclideanDistance distance_metric = new EuclideanDistance();
private EuclideanDistance distance_measure = new EuclideanDistance();

private void loadTest() {

Expand All @@ -50,7 +50,7 @@ private void loadTest() {
*/
private void createTree(int size) {

MTree<Point> tree = new MTree<>(distance_metric);
MTree<Point> tree = new MTree<>(distance_measure);

long time = System.currentTimeMillis();
System.out.println("Creating tree of size " + size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@

import uk.ac.standrews.cs.utilities.FileManipulation;
import uk.ac.standrews.cs.utilities.m_tree.MTree;
import uk.ac.standrews.cs.utilities.metrics.Levenshtein;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.DataDistance;
import uk.ac.standrews.cs.utilities.metrics.coreConcepts.Metric;
import uk.ac.standrews.cs.utilities.measures.Levenshtein;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.DataDistance;

import java.io.IOException;
import java.nio.file.Paths;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,25 @@
*/
package uk.ac.standrews.cs.utilities.m_tree.experiments.euclidean;

import uk.ac.standrews.cs.utilities.metrics.coreConcepts.Metric;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.Measure;

public class EuclideanDistance extends Metric<Point> {
public class EuclideanDistance extends Measure<Point> {

public double calculateDistance(Point p1, Point p2) {

double x_distance = p1.x - p2.x;
double y_distance = p1.y - p2.y;

return normaliseArbitraryPositiveDistance(Math.sqrt((x_distance * x_distance) + (y_distance * y_distance)));
return Math.sqrt((x_distance * x_distance) + (y_distance * y_distance));
}

@Override
public String getMetricName() {
public String getMeasureName() {
return "EuclideanDistance (2D)";
}

@Override
public boolean maxDistanceIsOne() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
* You should have received a copy of the GNU General Public License along with utilities. If not, see
* <http://www.gnu.org/licenses/>.
*/
package uk.ac.standrews.cs.utilities.metrics;
package uk.ac.standrews.cs.utilities.measures;

import uk.ac.standrews.cs.utilities.metrics.coreConcepts.StringMetric;
import uk.ac.standrews.cs.utilities.measures.coreConcepts.StringMeasure;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -30,43 +30,40 @@
* "String Matching with Metric Trees Using an Approximate Distance"
* Ilaria Bartolini, Paolo Ciaccia and Marco Patella,
* in Proceedings of the 9th International Symposium on String Processing
* and Information Retrieval, Lisbone, Purtugal, September 2002.
* and Information Retrieval, Lisbone, Portugal, September 2002.
*/
public class BagDistance extends StringMetric {
public class BagDistance extends StringMeasure {

@Override
public String getMetricName() {
public String getMeasureName() {
return "BagDistance";
}

public double calculateStringDistance(String str1, String str2) {
return 1 - similarity(str1, str2);
}

public double similarity(String str1, String str2) {
@Override
public boolean maxDistanceIsOne() { return true; }

int n = str1.length();
int m = str2.length();
@Override
public double calculateDistance(final String x, final String y) {

List<Character> list1 = toList(str1);
List<Character> list2 = toList(str2);
final List<Character> list1 = toList(x);
final List<Character> list2 = toList(y);

for (Character ch : list1) {
list2.remove(ch); // only removes if in the list
}

// ch must be typed as Character not char, since otherwise we call List.remove(index)...

for (Character ch : toList(str2)) { // note make a copy of list2 because we have removed characters from original above.
for (Character ch : toList(y)) { // note make a copy of list2 because we have removed characters from original above.
list1.remove(ch); // only removes if in the list
}

return 1.0 - (((double) Math.max(list1.size(), list2.size())) / Math.max(n, m));
return (((double) Math.max(list1.size(), list2.size())) / Math.max(x.length(), y.length()));
}

private List<Character> toList(String input) {
private static List<Character> toList(String input) {

List<Character> result = new ArrayList<>();
final List<Character> result = new ArrayList<>();
for (char ch : input.toCharArray()) {
result.add(ch);
}
Expand Down
75 changes: 75 additions & 0 deletions src/main/java/uk/ac/standrews/cs/utilities/measures/Cosine.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright 2021 Systems Research Group, University of St Andrews:
* <https://github.com/stacs-srg>
*
* This file is part of the module utilities.
*
* utilities is free software: you can redistribute it and/or modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* utilities is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with utilities. If not, see
* <http://www.gnu.org/licenses/>.
*/
package uk.ac.standrews.cs.utilities.measures;

import uk.ac.standrews.cs.utilities.measures.coreConcepts.StringMeasure;
import uk.ac.standrews.cs.utilities.measures.implementation.FeatureVector;
import uk.ac.standrews.cs.utilities.measures.implementation.QgramDistribution;
import uk.ac.standrews.cs.utilities.measures.implementation.SparseDistribution;

public class Cosine extends StringMeasure {

@Override
public String getMeasureName() {
return "Cosine";
}

@Override
public boolean maxDistanceIsOne() { return true; }

@Override
protected double calculateDistance(final String x, final String y) {

return distance(new SparseDistribution(topAndTail(x)), new SparseDistribution(topAndTail(y)));
}

public double distance(final FeatureVector x, final FeatureVector y) {

return distance(new SparseDistribution(x), new SparseDistribution(y));
}

private double distance(final SparseDistribution x, final SparseDistribution y) {

x.convertToProbabilityBased();
y.convertToProbabilityBased();

double dot_product = 0.0d;

for (QgramDistribution qgram : x) {

QgramDistribution qi = y.getEntry(qgram.key);

if (qi != null) {
dot_product += qgram.count * qi.count;
}
}

final double cosine_similarity = dot_product / (x.magnitude() * y.magnitude());
final double angular_distance = 2.0 * Math.acos(Math.min(cosine_similarity, 1d)) / Math.PI; // Truncate at 1.0 in case of rounding error.

if (Double.isNaN(angular_distance)) {
throw new RuntimeException("Cosine.distance returned Nan");
}

return angular_distance;
}

public static void main(String[] args) {

new Cosine().printExamples();
}
}
Loading

0 comments on commit 9cea2d6

Please sign in to comment.