Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds option for user to use their own, "custom" word list #14

Merged
merged 14 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "phraze"
description = "Passphrase generator"
version = "0.2.1"
version = "0.3.0"
edition = "2021"
license = "MPL-2.0"
readme = "readme.markdown"
Expand All @@ -10,6 +10,7 @@ authors = ["sts10 <sschlinkert@gmail.com>"]
[dependencies]
rand = "0.8.5"
clap = { version = "4.4.7", features = ["derive"] }
unicode-normalization = "0.1.22"

[dev-dependencies]
criterion = "0.5.1"
Expand Down
13 changes: 10 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod unicode_normalization_check;
use rand::{seq::SliceRandom, thread_rng, Rng};

// Pull in the wordlists as constants for us to use later.
Expand Down Expand Up @@ -78,11 +79,14 @@ pub fn fetch_list(list_choice: List) -> &'static [&'static str] {
}

/// Actually generate the passphrase, given a couple neccessary parameters.
pub fn generate_passphrase(
/// This function uses some Rust magic to be able to accept a word list as
/// either a &[&str] (built-in word lists) or as a &[String] if user provides a file
/// as word list.
pub fn generate_passphrase<T: AsRef<str> + std::fmt::Display>(
number_of_words_to_put_in_passphrase: usize,
separator: &str,
title_case: bool,
list: &'static [&'static str],
list: &[T], // Either type!
) -> String {
let mut rng = thread_rng();
// Create a blank String to put words into to create our passphrase
Expand Down Expand Up @@ -139,7 +143,10 @@ fn get_random_number(rng: &mut impl Rng) -> String {

/// Give an array of words, pick a random element and make it a String for
/// simplicity's sake.
fn get_random_element(rng: &mut impl Rng, word_list: &[&str]) -> String {
fn get_random_element<T: AsRef<str>>(rng: &mut impl Rng, word_list: &[T]) -> String
where
T: std::fmt::Display,
{
match word_list.choose(rng) {
Some(word) => word.to_string(),
None => panic!("Couldn't pick a random word"),
Expand Down
111 changes: 100 additions & 11 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
use crate::unicode_normalization_check::uniform_unicode_normalization;
use clap::Parser;
use phraze::*;
use std::fs::File;
use std::io;
use std::io::BufRead;
use std::io::BufReader;
use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;

/// Generate random passphrases
#[derive(Parser, Debug)]
Expand Down Expand Up @@ -69,6 +77,12 @@ struct Args {
#[clap(short = 'l', long = "list", value_parser=parse_list_choice, default_value="m")]
list_choice: List,

/// Provide a text file with a list of words to randomly generate passphrase from.
///
/// Should be a text file with one per line.
#[clap(short = 'c', long = "custom-list", conflicts_with = "list_choice")]
custom_list_file_path: Option<PathBuf>,

/// Use Title Case for words in generated usernames
#[clap(short = 't', long = "title-case")]
title_case: bool,
Expand All @@ -81,16 +95,35 @@ struct Args {
fn main() {
let opt = Args::parse();

// Fetch requested word list
let list = fetch_list(opt.list_choice);
if opt.custom_list_file_path.is_some() && opt.separator.is_empty() && !opt.title_case {
panic!("Must use a separator or title case when using a custom word list");
}

// We need two different variables here, one for a user-inputted list and another for
// the built-in list (whether chosen or the default). This is because we use different
// variable types for each case.
let (custom_list, built_in_list) = match opt.custom_list_file_path {
Some(custom_list_file_path) => (Some(read_in_custom_list(&custom_list_file_path)), None),
None => (None, Some(fetch_list(opt.list_choice))),
};

// If a "custom_list" was given by the user, we're going to use that list.
// Otherwise we use the built-in list (a default list if the user didn't choose one).

// To get the length of the list we're going to use, we need to check if a
// custom_list was given.
let list_length = match custom_list {
Some(ref custom_list) => custom_list.len(),
None => built_in_list.unwrap().len(), // pretty sure we're safe to unwrap here...
};

// Since user can define a minimum entropy, we might have to do a little math to
// figure out how many words we need to include in this passphrase.
let number_of_words_to_put_in_passphrase = calculate_number_words_needed(
opt.number_of_words,
opt.minimum_entropy,
opt.strength_count,
list.len(),
list_length,
);

// If user enabled verbose option
Expand All @@ -99,27 +132,38 @@ fn main() {
// to the terminal
print_entropy(
number_of_words_to_put_in_passphrase,
list.len(),
list_length,
opt.n_passphrases,
);
}

// Now we can (finally) generate and print some number of passphrases
for _ in 0..opt.n_passphrases {
// Generate and print passphrase
println!(
"{}",
generate_passphrase(
// Again, we have more code than we should because of this pesky list type situation...
let passphrase = match (&custom_list, built_in_list) {
(Some(ref custom_list), _) => generate_passphrase(
number_of_words_to_put_in_passphrase,
&opt.separator,
opt.title_case,
list,
)
);
custom_list,
),
(None, Some(built_in_list)) => generate_passphrase(
number_of_words_to_put_in_passphrase,
&opt.separator,
opt.title_case,
built_in_list,
),
(None, None) => panic!("List selection error!"),
};
println!("{}", passphrase);
}
}

/// Print the calculated (estimated) entropy of a passphrase, based on three variables
fn print_entropy(number_of_words: usize, list_length: usize, n_passphrases: usize) {
let passphrase_entropy = (list_length as f64).log2() * number_of_words as f64;
// Depending on how many different passphrases the user wants printed, change the printed text
// accordingly
if n_passphrases == 1 {
eprintln!(
"Passphrase has an estimated {:.2} bits of entropy.",
Expand Down Expand Up @@ -149,3 +193,48 @@ fn parse_list_choice(list_choice: &str) -> Result<List, String> {
)),
}
}

/// Read text file into a Vec<String>. Also trims whitespace, avoids adding blank strings,
/// sorts, de-duplicates, and checks for uniform Unicode normalization.
fn read_in_custom_list(file_path: &Path) -> Vec<String> {
let file_input: Vec<String> = match read_by_line(file_path.to_path_buf()) {
Ok(r) => r,
Err(e) => panic!("Error reading word list file: {}", e),
};
let mut word_list: Vec<String> = vec![];
for line in file_input {
// Don't add blank lines or lines made up purely of whitespace
if line.trim() != "" {
// Remove any starting or trailing whitespace before adding word to list
word_list.push(line.trim().to_string());
}
}
// Remove any duplicate words, since duplicate words would undermine entropy estimates.
word_list.sort();
word_list.dedup();
if !uniform_unicode_normalization(&word_list) {
eprintln!("WARNING: Custom word list has multiple Unicode normalizations. Consider normalizing the Unicode of all words on the list before making a passphrase.");
}
word_list
}

/// Generatic function that reads a file in, line by line.
/// Not sure if all of this is necessary, but it gets the job done.
fn read_by_line<T: FromStr>(file_path: PathBuf) -> io::Result<Vec<T>>
where
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mut vec = Vec::new();
let f = match File::open(file_path) {
Ok(res) => res,
Err(e) => return Err(e),
};
let file = BufReader::new(&f);
for line in file.lines() {
match line?.parse() {
Ok(l) => vec.push(l),
Err(e) => panic!("Error parsing line from file: {:?}", e),
}
}
Ok(vec)
}
59 changes: 59 additions & 0 deletions src/unicode_normalization_check.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use std::collections::HashSet;
use unicode_normalization::is_nfc_quick;
use unicode_normalization::is_nfd_quick;
use unicode_normalization::is_nfkc_quick;
use unicode_normalization::is_nfkd_quick;
use unicode_normalization::IsNormalized;

/// Given a slice of Strings, this function will attempt to detect the Unicode normalization used
/// in each String.
/// There are 4 different Unicode normalizations: NFC, NFD, NFKC, NFKD. Which ever one lists uses
/// isn't a concern. What IS a concern is if one list uses MORE THAN ONE normalization.
/// Thus, this functions counts how many DIFFERENT normalizations it finds. If it's more than 1
/// type, it returns false, since the list does not have what I call "uniform Unicdoe
/// normalization." Elsewhere, we warn the user about this.
pub fn uniform_unicode_normalization(list: &[String]) -> bool {
let mut types_of_normalizations_discovered = HashSet::new();
for word in list {
if is_nfc_quick(word.chars()) == IsNormalized::Yes {
types_of_normalizations_discovered.insert("NFC");
} else if is_nfd_quick(word.chars()) == IsNormalized::Yes {
types_of_normalizations_discovered.insert("NFD");
} else if is_nfkc_quick(word.chars()) == IsNormalized::Yes {
types_of_normalizations_discovered.insert("NFKC");
} else if is_nfkd_quick(word.chars()) == IsNormalized::Yes {
types_of_normalizations_discovered.insert("NFKD");
}
// If we've already found more than 1 normalization, we can skip the
// rest of the list and return false
if types_of_normalizations_discovered.len() > 1 {
return false;
}
}
types_of_normalizations_discovered.len() == 1
}

#[test]
fn can_detect_non_uniform_unicode_normalization_in_a_given_list() {
let version1 = "sécréter";
let version2 = "sécréter";
let non_uniform_list = vec![version1.to_string(), version2.to_string()];
assert!(!uniform_unicode_normalization(&non_uniform_list));

let uniform_list = vec![
"alpha".to_string(),
"beta".to_string(),
"charlie".to_string(),
];
assert!(uniform_unicode_normalization(&uniform_list));

let uniform_list2 = vec![
"alpha".to_string(),
"beta".to_string(),
version1.to_string(), // add one word with an accented character
"charlie".to_string(),
version1.to_string(), // twice
];
// Should still be detected as uniform
assert!(uniform_unicode_normalization(&uniform_list2));
}