Skip to content

Commit

Permalink
Prepare unicode_names2_generator for initial release
Browse files Browse the repository at this point in the history
It now needs to be published, as it is a build-dependency of
unicode_names2 itself.
  • Loading branch information
progval committed Aug 13, 2023
1 parent e0d668f commit 88c8b30
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 13 deletions.
6 changes: 4 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
use std::{env, path::PathBuf};
use unicode_names2_generator as generator;

const UNICODE_DATA: &str = include_str!("data/UnicodeData.txt");

fn main() {
let out_dir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
{
let mut generated_path = out_dir.clone();
generated_path.push("generated.rs");
generator::generate(Some(&generated_path), None);
generator::generate(UNICODE_DATA, Some(&generated_path), None);
}
{
let mut generated_phf_path = out_dir;
generated_phf_path.push("generated_phf.rs");
generator::generate_phf(Some(&generated_phf_path), None, 3, 2);
generator::generate_phf(UNICODE_DATA, Some(&generated_phf_path), None, 3, 2);
}
}
10 changes: 9 additions & 1 deletion generator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,16 @@

name = "unicode_names2_generator"
edition = "2018"
version = "0.0.1"
rust-version = "1.63.0"
version = "1.0.0"
authors = ["Huon Wilson <dbau.pp@gmail.com>"]
homepage = "https://github.com/progval/unicode_names2"
repository = "https://github.com/progval/unicode_names2"
documentation = "https://docs.rs/unicode_names2/"
license = "MIT/Apache-2.0"
description = """
Generates the perfect-hash function used by `unicode_names2`.
"""

[features]
unstable = []
Expand Down
18 changes: 8 additions & 10 deletions generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,14 @@ mod phf;
mod trie;
mod util;

const UNICODE_DATA: &str = include_str!("../../data/UnicodeData.txt");

const SPLITTERS: &[u8] = b"-";

struct TableData {
codepoint_names: Vec<(char, &'static str)>,
cjk_ideograph_ranges: Vec<(char, char)>,
}

fn get_table_data() -> TableData {
fn get_table_data(unicode_data: &'static str) -> TableData {
fn extract(line: &'static str) -> Option<(char, &'static str)> {
let splits: Vec<_> = line.splitn(15, ';').collect();
assert_eq!(splits.len(), 15);
Expand All @@ -41,7 +39,7 @@ fn get_table_data() -> TableData {
Some((c, name))
}

let mut iter = UNICODE_DATA.split('\n');
let mut iter = unicode_data.split('\n');

let mut codepoint_names = vec![];
let mut cjk_ideograph_ranges = vec![];
Expand Down Expand Up @@ -362,20 +360,20 @@ fn make_context(path: Option<&Path>) -> Context {

#[allow(clippy::type_complexity)]
fn get_truncated_table_data(
truncate: Option<usize>,
unicode_data: &'static str, truncate: Option<usize>,
) -> (Vec<(char, &'static str)>, Vec<(char, char)>) {
let TableData {
mut codepoint_names,
cjk_ideograph_ranges: cjk,
} = get_table_data();
} = get_table_data(unicode_data);
if let Some(n) = truncate {
codepoint_names.truncate(n)
}
(codepoint_names, cjk)
}

pub fn generate_phf(path: Option<&Path>, truncate: Option<usize>, lambda: usize, tries: usize) {
let (codepoint_names, _) = get_truncated_table_data(truncate);
pub fn generate_phf(unicode_data: &'static str, path: Option<&Path>, truncate: Option<usize>, lambda: usize, tries: usize) {
let (codepoint_names, _) = get_truncated_table_data(unicode_data, truncate);

let mut ctxt = make_context(path);
let (n, disps, data) = phf::create_phf(&codepoint_names, lambda, tries);
Expand All @@ -390,8 +388,8 @@ pub fn generate_phf(path: Option<&Path>, truncate: Option<usize>, lambda: usize,
}
}

pub fn generate(path: Option<&Path>, truncate: Option<usize>) {
let (codepoint_names, cjk) = get_truncated_table_data(truncate);
pub fn generate(unicode_data: &'static str, path: Option<&Path>, truncate: Option<usize>) {
let (codepoint_names, cjk) = get_truncated_table_data(unicode_data, truncate);
let mut ctxt = make_context(path);

write_cjk_ideograph_ranges(&mut ctxt, &cjk);
Expand Down

0 comments on commit 88c8b30

Please sign in to comment.