Skip to content
This repository has been archived by the owner on Nov 27, 2023. It is now read-only.

Commit

Permalink
Refactor data pipeline, add Caesar algorithm as IV
Browse files Browse the repository at this point in the history
* Refactor data analysis pipeline to use DataFileReader as helper class
* Add CaesarDecryptionMethod enum and parameter for data analysis and Vigenere class (with Kerckhoff/Chi square as default/fallback)
* Remove cipher interface to simplify package
* Output folders now use the format "KEYLENGTHMETHOD_CAESARMETHOD"
  • Loading branch information
varunsingh87 committed Mar 7, 2023
1 parent 12db59a commit d08a532
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 56 deletions.
11 changes: 7 additions & 4 deletions src/main/java/dataanalysis/DataCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.FileWriter;
import java.io.IOException;

import frequencyanalysissimulator.crypto.CaesarDecryptionMethod;
import frequencyanalysissimulator.crypto.KeyLengthMethod;
import frequencyanalysissimulator.crypto.Vigenere;

Expand All @@ -15,10 +16,11 @@ public class DataCollector {
* arg[0]: Input must be one line
* arg[1]: Trial Id for output file
* arg[2]: (Optional) key length calculation algorithm
* arg[3]: (Optional): key
* arg[3]: (Optional) Caesar decryption algorithm
* arg[4]: (Optional): key
*/
public static void main(String[] args) {
final String key = args.length > 3 ? args[3] : "DONQUIXOTECOYOTEWILL";
final String key = args.length > 4 ? args[4] : "DONQUIXOTECOYOTEWILL";
String expectedText = args[0].toUpperCase();
String output = String.format("Len,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,Avg(%s)\n", args[1]);

Expand All @@ -31,7 +33,7 @@ public static void main(String[] args) {
String ciphertext = Vigenere.encrypt(input, subKey);
Vigenere v = args[2] != null ? new Vigenere(ciphertext, KeyLengthMethod.valueOf(args[2].toUpperCase()))
: new Vigenere(ciphertext);
String decryptedText = v.decrypt();
String decryptedText = v.decrypt(CaesarDecryptionMethod.valueOf(args[3]));
double accuracy = percentageSimilarity(decryptedText, input);

output += Math.round(accuracy) + ",";
Expand All @@ -42,7 +44,8 @@ public static void main(String[] args) {
}

try (FileWriter writer = new FileWriter(
new File(String.format("data/outputs/%s/Trial %s.csv", args[2], args[1])))) {
new File(String.format("data/outputs/%s_%s/Trial %s.csv", args[2].toLowerCase(), args[3].toLowerCase(),
args[1])))) {
writer.append(output);
} catch (IOException e) {
e.printStackTrace();
Expand Down
17 changes: 3 additions & 14 deletions src/main/java/dataanalysis/DataFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,8 @@
import java.nio.file.Path;

public class DataFileReader {
/**
* Reads from data/inputs/arg[0].txt
*
* @param args
* arg[0] = trial id
*/
public static void main(String... args) {
try {
String expectedText = Files.readString(Path.of(String.format("data/inputs/%s.txt", args[0])));
expectedText = expectedText.replace(System.getProperty("line.separator"), " ");
DataCollector.main(new String[] { expectedText, args[0], args[1] });
} catch (IOException e) {
e.printStackTrace();
}
static String readInput(String id) throws IOException {
String expectedText = Files.readString(Path.of(String.format("data/inputs/%s.txt", id)));
return expectedText.replace(System.getProperty("line.separator"), " ");
}
}
26 changes: 18 additions & 8 deletions src/main/java/dataanalysis/DataPopulater.java
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
package dataanalysis;

import java.io.File;
import java.io.IOException;

import frequencyanalysissimulator.crypto.CaesarDecryptionMethod;
import frequencyanalysissimulator.crypto.KeyLengthMethod;

/**
* Runs DataAnalysis without arguments, taking contents of every input file and feeding this to
* DataFileWriter, which in return feeds the content to DataAnalysis
* Used for populating all data, takes ~20 seconds to run and collects 5000 data points
* Used for populating all data, takes ~10 seconds to run and collects 5000 data points
*/
public class DataPopulater {
DataPopulater(String method) {
DataPopulater(String keyLenAlg, String caesarAlg) {
File inputFolder = new File("data/inputs");
File[] allInputs = inputFolder.listFiles();

for (int i = 0; i < allInputs.length; i++) {
System.out.printf("Collecting data on %s...%-20s", allInputs[i].getName(), ' ');

String fileNameWithoutExtension = allInputs[i].getName().substring(0,
allInputs[i].getName().lastIndexOf("."));
DataFileReader.main(fileNameWithoutExtension, method);
try {
String fileNameWithoutExtension = allInputs[i].getName().substring(0,
allInputs[i].getName().lastIndexOf("."));
String inputText = DataFileReader.readInput(fileNameWithoutExtension);
DataCollector.main(new String[] { inputText, fileNameWithoutExtension, keyLenAlg, caesarAlg });
} catch (IOException e) {
e.printStackTrace();
}

System.out.print("\rData collection complete. Moving to next file...\n");
}
Expand All @@ -29,15 +36,18 @@ public class DataPopulater {
*
* @param args
* args[0]: key length calculation algorithm
* args[1]: Caesar decryption algorithm
* @throws InterruptedException
*/
public static void main(String[] args) throws InterruptedException {
if (args[0] == null || args[0].equals("all")) {
for (KeyLengthMethod method : KeyLengthMethod.values()) {
new DataPopulater(method.name());
for (KeyLengthMethod keyLenAlg : KeyLengthMethod.values()) {
for (CaesarDecryptionMethod keyAlg : CaesarDecryptionMethod.values()) {
new DataPopulater(keyLenAlg.name(), keyAlg.name());
}
}
} else
new DataPopulater(args[0]);
new DataPopulater(args[0], args[1]);

System.out.print("\rData population complete");
}
Expand Down
28 changes: 20 additions & 8 deletions src/main/java/frequencyanalysissimulator/crypto/Caesar.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package frequencyanalysissimulator.crypto;

public class Caesar implements Cipher {
public class Caesar {
private String ciphertext;
private int[] ciphertextAsNumbers;

Expand All @@ -9,6 +9,25 @@ public Caesar(String t) {
ciphertextAsNumbers = new int[t.length()];
}

/**
* Decrypts a Caesar cipher using the passed method of frequency analysis
*
* @param m
* The CaesarDecryptionMethod (enum)
* @return The decrypted text
*/
public String decrypt(CaesarDecryptionMethod m) {
switch (m) {
case KASISKI:
return decryptByKasiski();
case KERCKHOFF:
default:
char keyLet = getKeyByChiSquare();
int keyLetAsNum = 27 - (keyLet - 64);
return Caesar.encrypt(ciphertext, keyLetAsNum);
}
}

/**
* Use most common occurence, assume it's e, and shift the rest using this (Kasiski's original
* method of finding the key)
Expand Down Expand Up @@ -81,13 +100,6 @@ public char getKeyByChiSquare() {
return letter;
}

@Override
public String decrypt() {
char keyLet = getKeyByChiSquare();
int keyLetAsNum = 27 - (keyLet - 64);
return Caesar.encrypt(ciphertext, keyLetAsNum);
}

/**
* Encrypts a text with the key where A = key with Casar Shift
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package frequencyanalysissimulator.crypto;

public enum CaesarDecryptionMethod {
KERCKHOFF, KASISKI
}
5 changes: 0 additions & 5 deletions src/main/java/frequencyanalysissimulator/crypto/Cipher.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

public enum KeyLengthMethod {
// KASISKI,
INDEX_OF_COINCIDENCE, FRIEDMAN
IOC, FRIEDMAN
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package frequencyanalysissimulator.crypto;

class Possibility {

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
*
* @author Varun Singh on 2/20/2023
*/
public class SimpleSubstitutionCipher implements Cipher {
public class SimpleSubstitutionCipher {
private String ciphertext;
private double[] frequencies;

@Override
public String decrypt() {
return null;
}
Expand Down
26 changes: 12 additions & 14 deletions src/main/java/frequencyanalysissimulator/crypto/Vigenere.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import java.util.Map;
import java.util.TreeMap;

public class Vigenere implements Cipher {
public class Vigenere {
private String cipherText;
private String letterOnlyCipherText;
private int keylength;
Expand All @@ -17,22 +17,20 @@ public Vigenere(String cipher, KeyLengthMethod m) {
cipherText = cipher.replaceAll("\\s+", " ");
letterOnlyCipherText = removeNonLetters();
method = m;
if (m.equals(KeyLengthMethod.INDEX_OF_COINCIDENCE)) {
if (m.equals(KeyLengthMethod.IOC)) {
keylength = this.calculateKeyLengthByIndexOfCoincidence();
} else if (m.equals(KeyLengthMethod.FRIEDMAN)) {
keylength = this.calculateKeyLengthByFriedmanTest();
}
}

public Vigenere(int keylen, String cipher) {
cipherText = cipher.replaceAll("\\s+", " ");
letterOnlyCipherText = removeNonLetters();
setCipherText(cipher);
keylength = keylen;
}

public Vigenere(String cipher) {
cipherText = cipher.replaceAll("\\s+", " ");
letterOnlyCipherText = removeNonLetters();
setCipherText(cipher);
keylength = this.calculateKeyLengthByIndexOfCoincidence();
}

Expand All @@ -41,7 +39,7 @@ public String getCipherText() {
}

public void setCipherText(String newCipher) {
cipherText = newCipher;
cipherText = newCipher.replaceAll("\\s+", " ");
letterOnlyCipherText = removeNonLetters();
}

Expand Down Expand Up @@ -103,7 +101,7 @@ int calculateKeyLengthByIndexOfCoincidence() {

}

int calculateKeylengthByKasiskiExamination() {
int calculateKeyLengthByKasiskiExamination() {
Map<String, Integer> ngrams = new HashMap<>();
Map<String, Integer> repeatedNGrams = new HashMap<>();
Map<Integer, Integer> possibleKeyLengths = new TreeMap<>();
Expand Down Expand Up @@ -222,14 +220,13 @@ public String getKey() {
return key.toString();
}

@Override
public String decrypt() {
public String decrypt(CaesarDecryptionMethod keyAlg) {
StringBuilder plaintext = new StringBuilder("");
String[] cosets = this.distributeCiphertextIntoCosets();

for (int i = 0; i < keylength; i++) {
Caesar coset = new Caesar(cosets[i]);
cosets[i] = coset.decrypt();
cosets[i] = coset.decrypt(keyAlg);
}

for (int i = 0; i < letterOnlyCipherText.length(); i++) {
Expand All @@ -240,15 +237,16 @@ public String decrypt() {

for (int i = 0; i < nonLetterLocations.size(); i++) {
int index = nonLetterLocations.get(i);
// System.out.print("\r" + plaintext.toString());
// System.out.println("Inserting " + cipherText.charAt(index) + " at index " + index);
// System.out.println("New length of plaintext: " + plaintext.length());
plaintext.insert(index, cipherText.charAt(index));
}

return plaintext.toString();
}

public String decrypt() {
return decrypt(null);
}

public static String encrypt(String plaintext, String key) {
StringBuilder ciphertext = new StringBuilder("");
ArrayList<Integer> nonLetters = new ArrayList<>();
Expand Down

0 comments on commit d08a532

Please sign in to comment.