diff --git a/src/main/java/edu/princeton/cs/algs4/LempelZivWelch.java b/src/main/java/edu/princeton/cs/algs4/LempelZivWelch.java
new file mode 100644
index 0000000..ae74ee9
--- /dev/null
+++ b/src/main/java/edu/princeton/cs/algs4/LempelZivWelch.java
@@ -0,0 +1,156 @@
+ * Compilation: javac LempelZivWelch.java
+ * Execution: java LempelZivWelch - < input.txt (compress)
+ * Execution: java LempelZivWelch + < input.txt (expand)
+ * Dependencies: BinaryIn.java BinaryOut.java
+ * Data files: https://algs4.cs.princeton.edu/55compression/abraLZW.txt
+ * https://algs4.cs.princeton.edu/55compression/ababLZW.txt
+ *
+ * Compress or expand binary input from standard input using LZW.
+ *
+ *
+ ******************************************************************************/
+package edu.princeton.cs.algs4;
+import edu.princeton.cs.algs4.TernarySearchTrie;
+import edu.princeton.cs.algs4.BinaryStdIn;
+import edu.princeton.cs.algs4.BinaryStdOut;
+ * The {@code LempelZivWelch} class provides static methods for compressing and expanding a binary
+ * input using LempelZivWelch compression over the 8-bit extended ASCII alphabet with 12-bit
+ * codewords.
+ *
+ *
Starting with Oracle Java 7u6, the substring method takes time and space linear in the length
+ * of the extracted substring (instead of constant time an space as in earlier versions). As a
+ * result, compression takes quadratic time in the original {@code LZW} class. See this article for more
+ * details.
+ *
+ *
This class, along with {@code TernarySearchTrie} - the enhanced version of {@code TST}, fixes
+ * the above issue.The key to the fix is to use the new method {@code
+ * TernarySearchTrie.longestPrefixOf(String query, int startIndex)} in {@code compress()}.
+ *
+ *
For additional documentation, see Section 5.5 of Algorithms, 4th
+ * Edition by Robert Sedgewick and Kevin Wayne.
+ *
+ * @author Robert Sedgewick
+ * @author Kevin Wayne
+ */
+public class LempelZivWelch {
+ private static final int R = 256; // number of input chars
+ private static final int L = 4096; // number of codewords = 2^W
+ private static final int W = 12; // codeword width
+ // Do not instantiate.
+ private LempelZivWelch() {}
+ /**
+ * Reads a sequence of 8-bit bytes from standard input; compresses them using LempelZivWelch
+ * compression with 12-bit codewords; and writes the results to standard output.
+ */
+ public static void compress() {
+ String input = BinaryStdIn.readString();
+ TernarySearchTrie st = new TernarySearchTrie();
+ // since TernarySearchTrie is not balanced, it would be better to insert in a different order
+ for (int i = 0; i < R; i++) {
+ st.put("" + (char) i, i);
+ }
+ int code = R + 1; // R is codeword for EOF
+ int index = 0;
+ while (index < input.length()) {
+ String s = st.longestPrefixOf(input, index); // Find max prefix match s.
+ BinaryStdOut.write(st.get(s), W); // Print s's encoding.
+ int t = s.length();
+ if (t < input.length() && code < L) {
+ // Add s to symbol table.
+ st.put(input.substring(index, index + t + 1), code++);
+ }
+ index += t; // Scan past s in input.
+ }
+ BinaryStdOut.write(R, W);
+ BinaryStdOut.close();
+ }
+ /**
+ * Reads a sequence of bit encoded using LempelZivWelch compression with 12-bit codewords from
+ * standard input; expands them; and writes the results to standard output.
+ */
+ public static void expand() {
+ String[] st = new String[L];
+ int i; // next available codeword value
+ // initialize symbol table with all 1-character strings
+ for (i = 0; i < R; i++) {
+ st[i] = "" + (char) i;
+ }
+ st[i++] = ""; // (unused) lookahead for EOF
+ int codeword = BinaryStdIn.readInt(W);
+ if (codeword == R) {
+ return; // expanded message is empty string
+ }
+ String val = st[codeword];
+ while (true) {
+ BinaryStdOut.write(val);
+ codeword = BinaryStdIn.readInt(W);
+ if (codeword == R) {
+ break;
+ }
+ String s = st[codeword];
+ if (i == codeword) { // special case hack
+ s = val + val.charAt(0);
+ }
+ if (i < L) {
+ st[i++] = val + s.charAt(0);
+ }
+ val = s;
+ }
+ BinaryStdOut.close();
+ }
+ /**
+ * Sample client that calls {@code compress()} if the command-line argument is "-" an {@code
+ * expand()} if it is "+".
+ *
+ * @param args the command-line arguments
+ */
+ public static void main(String[] args) {
+ if (args[0].equals("-")) {
+ compress();
+ } else if (args[0].equals("+")) {
+ expand();
+ } else {
+ throw new IllegalArgumentException("Illegal command line argument");
+ }
+ }
+ * Copyright 2002-2020, Robert Sedgewick and Kevin Wayne.
+ *
+ * This file is part of algs4.jar, which accompanies the textbook
+ *
+ * Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
+ * Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
+ * http://algs4.cs.princeton.edu
+ *
+ *
+ * algs4.jar is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * algs4.jar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with algs4.jar. If not, see http://www.gnu.org/licenses.
+ ******************************************************************************/
diff --git a/src/main/java/edu/princeton/cs/algs4/TernarySearchTrie.java b/src/main/java/edu/princeton/cs/algs4/TernarySearchTrie.java
new file mode 100644
index 0000000..d56fb5d
--- /dev/null
+++ b/src/main/java/edu/princeton/cs/algs4/TernarySearchTrie.java
@@ -0,0 +1,381 @@
+ * Compilation: javac TernarySearchTrie.java
+ * Execution: java TernarySearchTrie < words.txt
+ * Dependencies: StdIn.java
+ * Data files: https://algs4.cs.princeton.edu/52trie/shellsST.txt
+ *
+ * Symbol table with string keys, implemented using a ternary search
+ * trie (TernarySearchTrie).
+ *
+ *
+ * % java TernarySearchTrie < shellsST.txt
+ * keys(""):
+ * by 4
+ * sea 6
+ * sells 1
+ * she 0
+ * shells 3
+ * shore 7
+ * the 5
+ *
+ * longestPrefixOf("shellsort"):
+ * shells
+ *
+ * keysWithPrefix("shor"):
+ * shore
+ *
+ * keysThatMatch(".he.l."):
+ * shells
+ *
+ * % java TernarySearchTrie
+ * theory the now is the time for all good men
+ *
+ * Remarks
+ * --------
+ * - can't use a key that is the empty string ""
+ *
+ ******************************************************************************/
+package edu.princeton.cs.algs4;
+import edu.princeton.cs.algs4.Queue;
+import edu.princeton.cs.algs4.StdIn;
+import edu.princeton.cs.algs4.StdOut;
+ * The {@code TernarySearchTrie} class represents an symbol table of key-value pairs, with string
+ * keys and generic values. It supports the usual put, get, contains,
+ * delete, size, and is-empty methods. It also provides character-based
+ * methods for finding the string in the symbol table that is the longest prefix of a given
+ * prefix, finding all strings in the symbol table that start with a given prefix, and
+ * finding all strings in the symbol table that match a given pattern. A symbol table
+ * implements the associative array abstraction: when associating a value with a key that
+ * is already in the symbol table, the convention is to replace the old value with the new value.
+ * Unlike {@link java.util.Map}, this class uses the convention that values cannot be {@code
+ * null}—setting the value associated with a key to {@code null} is equivalent to deleting the key
+ * from the symbol table.
+ *
+ * This implementation uses a ternary search trie.
+ *
+ *
For additional documentation, see Section
+ * 5.2 of Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne.
+ */
+public class TernarySearchTrie {
+ private int size; // size
+ private Node root; // root of TernarySearchTrie
+ private static class Node {
+ private char ch; // character
+ private Node left;
+ private Node mid;
+ private Node right;
+ private T val; // value associated with string
+ }
+ /** Initializes an empty string symbol table. */
+ public TernarySearchTrie() {}
+ /**
+ * Returns the number of key-value pairs in this symbol table.
+ *
+ * @return the number of key-value pairs in this symbol table
+ */
+ public int size() {
+ return size;
+ }
+ /**
+ * check if this symbol table contains the given key.
+ *
+ * @param key the key
+ * @return {@code true} if this symbol table contains {@code key} and {@code false} otherwise
+ * @throws IllegalArgumentException if {@code key} is {@code null}
+ */
+ public boolean contains(String key) {
+ if (key == null) {
+ throw new IllegalArgumentException("argument to contains() is null");
+ }
+ return get(key) != null;
+ }
+ /**
+ * Returns the value associated with the given key.
+ *
+ * @param key the key
+ * @return the value associated with the given key if the key is in the symbol table and {@code
+ * null} if the key is not in the symbol table
+ * @throws IllegalArgumentException if {@code key} is {@code null}
+ */
+ public T get(String key) {
+ if (key == null) {
+ throw new IllegalArgumentException("calls get() with null argument");
+ }
+ if (key.length() == 0) {
+ throw new IllegalArgumentException("key must have length >= 1");
+ }
+ Node x = get(root, key, 0);
+ if (x == null) {
+ return null;
+ }
+ return x.val;
+ }
+ // return subtrie corresponding to given key
+ private Node get(Node x, String key, int d) {
+ if (x == null) {
+ return null;
+ }
+ if (key.length() == 0) {
+ throw new IllegalArgumentException("key must have length >= 1");
+ }
+ char c = key.charAt(d);
+ if (c < x.ch) {
+ return get(x.left, key, d);
+ } else if (c > x.ch) {
+ return get(x.right, key, d);
+ } else if (d < key.length() - 1) {
+ return get(x.mid, key, d + 1);
+ } else {
+ return x;
+ }
+ }
+ /**
+ * Inserts the key-value pair into the symbol table, overwriting the old value with the new value
+ * if the key is already in the symbol table. If the value is {@code null}, this effectively
+ * deletes the key from the symbol table.
+ *
+ * @param key the key
+ * @param val the value
+ * @throws IllegalArgumentException if {@code key} is {@code null}
+ */
+ public void put(String key, T val) {
+ if (key == null) {
+ throw new IllegalArgumentException("calls put() with null key");
+ }
+ if (!contains(key)) {
+ size++;
+ } else if (val == null) { // delete existing key
+ size--;
+ }
+ root = put(root, key, val, 0);
+ }
+ private Node put(Node x, String key, T val, int d) {
+ char c = key.charAt(d);
+ if (x == null) {
+ x = new Node();
+ x.ch = c;
+ }
+ if (c < x.ch) {
+ x.left = put(x.left, key, val, d);
+ } else if (c > x.ch) {
+ x.right = put(x.right, key, val, d);
+ } else if (d < key.length() - 1) {
+ x.mid = put(x.mid, key, val, d + 1);
+ } else {
+ x.val = val;
+ }
+ return x;
+ }
+ /**
+ * Returns the string in the symbol table that is the longest prefix of {@code query}, or {@code
+ * null}, if no such string.
+ *
+ * @param query the query string
+ * @return the string in the symbol table that is the longest prefix of {@code query}, or {@code
+ * null} if no such string
+ * @throws IllegalArgumentException if {@code query} is {@code null}
+ */
+ public String longestPrefixOf(String query) {
+ return longestPrefixOf(query, 0);
+ }
+ /**
+ * Returns the string in the symbol table that is the longest prefix of {@code query}, or {@code
+ * null}, if no such string.
+ *
+ * @param query the query string
+ * @param startIndex the start index in query string
+ * @return the string in the symbol table that is the longest prefix of {@code query}, or {@code
+ * null} if no such string
+ * @throws IllegalArgumentException if {@code query} is {@code null}
+ */
+ public String longestPrefixOf(String query, int startIndex) {
+ if (query == null || startIndex < 0 || startIndex >= query.length()) {
+ throw new IllegalArgumentException("calls longestPrefixOf() with wrong arguments");
+ }
+ int length = 0;
+ Node x = root;
+ int i = 0;
+ while (x != null && i + startIndex < query.length()) {
+ char c = query.charAt(i + startIndex);
+ if (c < x.ch) {
+ x = x.left;
+ } else if (c > x.ch) {
+ x = x.right;
+ } else {
+ i++;
+ if (x.val != null) {
+ length = i;
+ }
+ x = x.mid;
+ }
+ }
+ return query.substring(startIndex, startIndex + length);
+ }
+ /**
+ * Returns all keys in the symbol table as an {@code Iterable}. To iterate over all of the keys in
+ * the symbol table named {@code st}, use the foreach notation: {@code for (Key key : st.keys())}.
+ *
+ * @return all keys in the symbol table as an {@code Iterable}
+ */
+ public Iterable keys() {
+ Queue queue = new Queue();
+ collect(root, new StringBuilder(), queue);
+ return queue;
+ }
+ /**
+ * Returns all of the keys in the set that start with {@code prefix}.
+ *
+ * @param prefix the prefix
+ * @return all of the keys in the set that start with {@code prefix}, as an iterable
+ * @throws IllegalArgumentException if {@code prefix} is {@code null}
+ */
+ public Iterable keysWithPrefix(String prefix) {
+ if (prefix == null) {
+ throw new IllegalArgumentException("calls keysWithPrefix() with null argument");
+ }
+ Queue queue = new Queue();
+ Node x = get(root, prefix, 0);
+ if (x == null) {
+ return queue;
+ }
+ if (x.val != null) {
+ queue.enqueue(prefix);
+ }
+ collect(x.mid, new StringBuilder(prefix), queue);
+ return queue;
+ }
+ // all keys in subtrie rooted at x with given prefix
+ private void collect(Node x, StringBuilder prefix, Queue queue) {
+ if (x == null) {
+ return;
+ }
+ collect(x.left, prefix, queue);
+ if (x.val != null) {
+ queue.enqueue(prefix.toString() + x.ch);
+ }
+ collect(x.mid, prefix.append(x.ch), queue);
+ prefix.deleteCharAt(prefix.length() - 1);
+ collect(x.right, prefix, queue);
+ }
+ private void collect(
+ Node x, StringBuilder prefix, int i, String pattern, Queue queue) {
+ if (x == null) {
+ return;
+ }
+ char c = pattern.charAt(i);
+ if (c == '.' || c < x.ch) {
+ collect(x.left, prefix, i, pattern, queue);
+ }
+ if (c == '.' || c == x.ch) {
+ if (i == pattern.length() - 1 && x.val != null) {
+ queue.enqueue(prefix.toString() + x.ch);
+ }
+ if (i < pattern.length() - 1) {
+ collect(x.mid, prefix.append(x.ch), i + 1, pattern, queue);
+ prefix.deleteCharAt(prefix.length() - 1);
+ }
+ }
+ if (c == '.' || c > x.ch) {
+ collect(x.right, prefix, i, pattern, queue);
+ }
+ }
+ /**
+ * Returns all of the keys in the symbol table that match {@code pattern}, where the character '.'
+ * is interpreted as a wildcard character.
+ *
+ * @param pattern the pattern
+ * @return all of the keys in the symbol table that match {@code pattern}, as an iterable, where .
+ * is treated as a wildcard character.
+ */
+ public Iterable keysThatMatch(String pattern) {
+ Queue queue = new Queue();
+ collect(root, new StringBuilder(), 0, pattern, queue);
+ return queue;
+ }
+ /**
+ * Unit tests the {@code TernarySearchTrie} data type.
+ *
+ * @param args the command-line arguments
+ */
+ public static void main(String[] args) {
+ // build symbol table from standard input
+ TernarySearchTrie st = new TernarySearchTrie();
+ for (int i = 0; !StdIn.isEmpty(); i++) {
+ String key = StdIn.readString();
+ st.put(key, i);
+ }
+ // print results
+ if (st.size() < 100) {
+ StdOut.println("keys(\"\"):");
+ for (String key : st.keys()) {
+ StdOut.println(key + " " + st.get(key));
+ }
+ StdOut.println();
+ }
+ StdOut.println("longestPrefixOf(\"shellsort\"):");
+ StdOut.println(st.longestPrefixOf("shellsort"));
+ StdOut.println();
+ StdOut.println("longestPrefixOf(\"shell\"):");
+ StdOut.println(st.longestPrefixOf("shell"));
+ StdOut.println();
+ StdOut.println("keysWithPrefix(\"shor\"):");
+ for (String s : st.keysWithPrefix("shor")) {
+ StdOut.println(s);
+ }
+ StdOut.println();
+ StdOut.println("keysThatMatch(\".he.l.\"):");
+ for (String s : st.keysThatMatch(".he.l.")) {
+ StdOut.println(s);
+ }
+ }
+ * Copyright 2002-2020, Robert Sedgewick and Kevin Wayne.
+ *
+ * This file is part of algs4.jar, which accompanies the textbook
+ *
+ * Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
+ * Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
+ * http://algs4.cs.princeton.edu
+ *
+ *
+ * algs4.jar is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * algs4.jar is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with algs4.jar. If not, see http://www.gnu.org/licenses.
+ ******************************************************************************/
diff --git a/tools/bwt_compress.sh b/tools/bwt_compress.sh
index 64ca37c..4e2b78b 100755
--- a/tools/bwt_compress.sh
+++ b/tools/bwt_compress.sh
@@ -1,4 +1,4 @@
-# !/bin/bash
cd `dirname $0`/..
diff --git a/tools/huf_compress.sh b/tools/huf_compress.sh
index 8a63310..f099fd4 100755
--- a/tools/huf_compress.sh
+++ b/tools/huf_compress.sh
@@ -1,4 +1,4 @@
-# !/bin/bash
cd `dirname $0`/..
diff --git a/tools/lzw.sh b/tools/lzw.sh
new file mode 100755
index 0000000..3e339cc
--- /dev/null
+++ b/tools/lzw.sh
@@ -0,0 +1,46 @@
+cd `dirname $0`/..
+mysize=$(stat -f%z "$FILE_NAME")
+echo "${FILE_NAME} size: ${mysize} bytes"
+start_ms=$(ruby -e 'puts (Time.now.to_f * 1000).to_i')
+if [ "$CLASS_NAME" == "" ]
+ CLASS_NAME=LempelZivWelch
+tools/run.sh edu.princeton.cs.algs4.$CLASS_NAME - < $FILE_NAME | \
+ tools/run.sh edu.princeton.cs.algs4.HexDump 64 | tail -1
+end_ms=$(ruby -e 'puts (Time.now.to_f * 1000).to_i')
+elapsed_ms=$((end_ms - start_ms))
+echo "$elapsed_ms ms used"
+# based on improved LZW ==> LempelZivWelch
+# tools/lzw.sh src/test/data/burrows/dickens_512K.txt
+# src/test/data/burrows/dickens_512K.txt size: 512000 bytes
+# 2018344 bits
+# 1825 ms used
+# based on original LZW of algs4 book
+# tools/lzw.sh src/test/data/burrows/dickens_512K.txt LZW
+# src/test/data/burrows/dickens_512K.txt size: 512000 bytes
+# 2018344 bits
+# 4723 ms used
+# irb(main):001:0> 2018344.0/512000
+# => 3.942078125
+# based on improved LZW ==> LempelZivWelch
+# algs4 % tools/lzw.sh src/test/data/burrows/dickens.txt
+# src/test/data/burrows/dickens.txt size: 28965453 bytes
+# 118099584 bits
+# 39963 ms used
+# irb(main):002:0> 118099584.0/28965453
+# => 4.077256585629785
diff --git a/tools/lzw_compress.sh b/tools/lzw_compress.sh
index 4a04156..ab90f5f 100755
--- a/tools/lzw_compress.sh
+++ b/tools/lzw_compress.sh
@@ -1,4 +1,4 @@
-# !/bin/bash
cd `dirname $0`/..