Skip to content

Commit

Permalink
Removed shift and alphabet size from SA
Browse files Browse the repository at this point in the history
  • Loading branch information
williamfiset committed Apr 1, 2019
1 parent e0f960e commit 5442b56
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 116 deletions.
84 changes: 51 additions & 33 deletions com/williamfiset/datastructures/suffixarray/SuffixArray.java
Original file line number Diff line number Diff line change
@@ -1,52 +1,72 @@
/**
* Abstract class that captures the behavior of a suffix array.
*
* @author William Fiset, william.alexandre.fiset@gmail.com
*/

package com.williamfiset.datastructures.suffixarray;

public abstract class SuffixArray {

protected static final int DEFAULT_ALPHABET_SHIFT = 0;
protected static final int DEFAULT_ALPHABET_SIZE = 256;

// Length of the suffix array
public final int N;

protected int shift = DEFAULT_ALPHABET_SHIFT;

protected int alphabetSize = DEFAULT_ALPHABET_SIZE;
protected final int N;

// T is the text
public int[] T;
protected int[] T;

// The sorted suffix array values.
public int[] sa;
protected int[] sa;

// Longest Common Prefix array
public int [] lcp;
protected int [] lcp;

private boolean constructedSa = false;
private boolean constructedLcpArray = false;

// Designated constructor
public SuffixArray(int[] text, int shift, int alphabetSize) {

if (text == null || alphabetSize <= 0)
throw new IllegalArgumentException();

public SuffixArray(int[] text) {
if (text == null)
throw new IllegalArgumentException("Text cannot be null.");
this.T = text;
this.N = text.length;

this.shift = shift;
this.alphabetSize = alphabetSize;

// Build suffix array
}

public int getTextLength() {
return T.length;
}

// Returns the suffix array.
public int[] getSa() {
buildSuffixArray();
return sa;
}

// Returns the LCP array.
public int[] getLcpArray() {
buildLcpArray();
return lcp;
}

// Builds the suffix array by calling the construct() method.
private void buildSuffixArray() {
if (constructedSa) return;
construct();

// Build LCP array
constructedSa = true;
}

// Builds the LCP array by first creating the SA and then running the kasai algorithm.
private void buildLcpArray() {
if (constructedLcpArray) return;
buildSuffixArray();
kasai();

constructedLcpArray = true;
}

protected static int[] toIntArray(String s) {
if (s == null) return null;
int[] text = new int[s.length()];
int[] t = new int[s.length()];
for(int i = 0; i < s.length(); i++)
text[i] = s.charAt(i);
return text;
t[i] = s.charAt(i);
return t;
}

// The suffix array construction algorithm is left undefined
Expand All @@ -69,23 +89,21 @@ private void kasai() {
}
}

@Override public String toString() {

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("-----i-----SA-----LCP---Suffix\n");

for(int i = 0; i < N; i++) {
int suffixLen = N - sa[i];
char[] suffixArray = new char[suffixLen];
for (int j = sa[i], k = 0; j < N; j++, k++)
suffixArray[k] = (char)(T[j] - shift);
suffixArray[k] = (char) T[j];
String suffix = new String(suffixArray);
String formattedStr = String.format("% 7d % 7d % 7d %s\n", i, sa[i], lcp[i], suffix);
sb.append(formattedStr);
}

return sb.toString();

}

}
25 changes: 10 additions & 15 deletions com/williamfiset/datastructures/suffixarray/SuffixArrayFast.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,27 @@

public class SuffixArrayFast extends SuffixArray {

private static final int DEFAULT_ALPHABET_SIZE = 256;

int alphabetSize;
int[] sa2, rank, tmp, c;

public SuffixArrayFast(String text) {
super(toIntArray(text), DEFAULT_ALPHABET_SHIFT, DEFAULT_ALPHABET_SIZE);
this(toIntArray(text), DEFAULT_ALPHABET_SIZE);
}

public SuffixArrayFast(int[] text) {
super(text, DEFAULT_ALPHABET_SHIFT, DEFAULT_ALPHABET_SIZE);
}

// TODO(williamfiset): Get rid of these constructors in favor of
// automatically detecting the alphabet size shift required
public SuffixArrayFast(String text, int shift) {
super(toIntArray(text), shift, DEFAULT_ALPHABET_SHIFT);
}
public SuffixArrayFast(int[] text, int shift) {
super(text, shift, DEFAULT_ALPHABET_SIZE);
this(text, DEFAULT_ALPHABET_SIZE);
}

// Designated constructor
public SuffixArrayFast(int[] text, int shift, int alphabetSize) {
super(text, shift, alphabetSize);
public SuffixArrayFast(int[] text, int alphabetSize) {
super(text);
this.alphabetSize = alphabetSize;
}

@Override protected void construct() {
@Override
protected void construct() {
sa = new int[N];
sa2 = new int[N];
rank = new int[N];
Expand Down
34 changes: 10 additions & 24 deletions com/williamfiset/datastructures/suffixarray/SuffixArrayMed.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,36 @@

public class SuffixArrayMed extends SuffixArray {

// Helper class which sorts suffix ranks
static class SuffixRankTuple implements Comparable <SuffixRankTuple> {
// Wrapper class to help sort suffix ranks
static class SuffixRankTuple implements Comparable<SuffixRankTuple> {

int firstHalf, secondHalf, originalIndex;

// Sort Suffix ranks first on the first half then the second half
@Override public int compareTo(SuffixRankTuple other) {
@Override
public int compareTo(SuffixRankTuple other) {
int cmp = Integer.compare(firstHalf, other.firstHalf);
if (cmp == 0) return Integer.compare(secondHalf, other.secondHalf);
return cmp;
}

@Override public String toString() {
@Override
public String toString() {
return originalIndex + " -> (" + firstHalf + ", " + secondHalf + ")";
}

}

public SuffixArrayMed(String text) {
super(toIntArray(text), DEFAULT_ALPHABET_SHIFT, DEFAULT_ALPHABET_SIZE);
super(toIntArray(text));
}

public SuffixArrayMed(int[] text) {
super(text, DEFAULT_ALPHABET_SHIFT, DEFAULT_ALPHABET_SIZE);
}

// TODO(williamfiset): Get rid of these constructors in favor of
// automatically detecting the alphabet size shift required
public SuffixArrayMed(String text, int shift) {
super(toIntArray(text), shift, DEFAULT_ALPHABET_SHIFT);
}
public SuffixArrayMed(int[] text, int shift) {
super(text, shift, DEFAULT_ALPHABET_SIZE);
}

// Designated constructor
public SuffixArrayMed(int[] text, int shift, int alphabetSize) {
super(text, shift, alphabetSize);
super(text);
}

// Construct a suffix array in O(nlog^2(n))
@Override protected void construct() {

@Override
protected void construct() {
sa = new int[N];

// Maintain suffix ranks in both a matrix with two rows containing the
Expand Down Expand Up @@ -114,7 +101,6 @@ public SuffixArrayMed(int[] text, int shift, int alphabetSize) {
suffixRanks[0] = suffixRanks[1] = null;
suffixRanks = null;
ranks = null;

}

public static void main(String[] args) {
Expand Down
39 changes: 11 additions & 28 deletions com/williamfiset/datastructures/suffixarray/SuffixArraySlow.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@

public class SuffixArraySlow extends SuffixArray {

static class Suffix implements Comparable <Suffix> {

private static class Suffix implements Comparable<Suffix> {
// Starting position of suffix in text
final int index, len;
final int[] text;
Expand All @@ -22,7 +21,8 @@ public Suffix(int[] text, int index) {
}

// Compare the two suffixes inspired by Robert Sedgewick and Kevin Wayne
@Override public int compareTo(Suffix other) {
@Override
public int compareTo(Suffix other) {
if (this == other) return 0;
int min_len = Math.min(len, other.len);
for (int i = 0; i < min_len; i++) {
Expand All @@ -32,42 +32,27 @@ public Suffix(int[] text, int index) {
return len - other.len;
}

@Override public String toString() {
@Override
public String toString() {
return new String(text, index, len);
}

}

// Contains all the suffixes of the SuffixArray
Suffix[] suffixes;

public SuffixArraySlow(String text) {
super(toIntArray(text), DEFAULT_ALPHABET_SHIFT, DEFAULT_ALPHABET_SIZE);
super(toIntArray(text));
}

public SuffixArraySlow(int[] text) {
super(text, DEFAULT_ALPHABET_SHIFT, DEFAULT_ALPHABET_SIZE);
}

// TODO(williamfiset): Get rid of these constructors in favor of
// automatically detecting the alphabet size shift required
public SuffixArraySlow(String text, int shift) {
super(toIntArray(text), shift, DEFAULT_ALPHABET_SHIFT);
}
public SuffixArraySlow(int[] text, int shift) {
super(text, shift, DEFAULT_ALPHABET_SIZE);
super(text);
}

// Designated constructor
public SuffixArraySlow(int[] text, int shift, int alphabetSize) {
super(text, shift, alphabetSize);
}

// Suffix array construction. This acutally takes O(n^2log(n))
// time since sorting takes on average O(nlog(n)) and each String
// comparision takes O(n)
@Override protected void construct() {

// Suffix array construction. This actually takes O(n^2log(n)) time since sorting takes on
// average O(nlog(n)) and each String comparison takes O(n).
@Override
protected void construct() {
sa = new int[N];
suffixes = new Suffix[N];

Expand All @@ -83,12 +68,10 @@ public SuffixArraySlow(int[] text, int shift, int alphabetSize) {
}

suffixes = null;

}

public static void main(String[] args) {
SuffixArraySlow sa = new SuffixArraySlow("ABBABAABAA");
System.out.println(sa);
}

}
Loading

0 comments on commit 5442b56

Please sign in to comment.