Skip to content

Commit

Permalink
Find line breaks according to the Unicode line breaking algorithm
Browse files Browse the repository at this point in the history
This adds a new optional dependency on the unicode-linebreak crate,
which implements the line breaking algorithm from [Unicode Standard
Annex #14](https://www.unicode.org/reports/tr14/).

The new dependency is enabled by default since these line breaks are
more correct than what you get by splitting on whitespace.

This should help address #220 and #80, though I’m no expert on
non-Western languages. More feedback from the community would be
needed here.
  • Loading branch information
mgeisler committed Apr 14, 2021
1 parent 3b77e9c commit fdfa47f
Show file tree
Hide file tree
Showing 10 changed files with 623 additions and 206 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ harness = false
path = "benches/linear.rs"

[features]
default = ["unicode-width", "smawk"]
default = ["unicode-linebreak", "unicode-width", "smawk"]

[dependencies]
hyphenation = { version = "0.8", optional = true, features = ["embed_en-us"] }
smawk = { version = "0.3", optional = true }
terminal_size = { version = "0.1", optional = true }
unicode-linebreak = { version = "0.1", optional = true }
unicode-width = { version= "0.1", optional = true }

[dev-dependencies]
Expand Down
19 changes: 17 additions & 2 deletions benches/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,25 @@ pub fn benchmark(c: &mut Criterion) {

#[cfg(feature = "smawk")]
{
#[cfg(feature = "unicode-linebreak")]
{
let options = textwrap::Options::new(LINE_LENGTH)
.wrap_algorithm(textwrap::core::WrapAlgorithm::OptimalFit)
.line_breaks(textwrap::UnicodeLineBreaks);
group.bench_with_input(
BenchmarkId::new("fill_optimal_fit_unicode", length),
&text,
|b, text| {
b.iter(|| textwrap::fill(text, &options));
},
);
}

let options = textwrap::Options::new(LINE_LENGTH)
.wrap_algorithm(textwrap::core::WrapAlgorithm::OptimalFit);
.wrap_algorithm(textwrap::core::WrapAlgorithm::OptimalFit)
.line_breaks(textwrap::AsciiSpaceLineBreaks);
group.bench_with_input(
BenchmarkId::new("fill_optimal_fit", length),
BenchmarkId::new("fill_optimal_fit_ascii_space", length),
&text,
|b, text| {
b.iter(|| textwrap::fill(text, &options));
Expand Down
4 changes: 2 additions & 2 deletions examples/interactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ mod unix_only {

fn draw_text<'a>(
text: &str,
options: &Options<'a>,
options: &Options<'a, textwrap::DefaultLineBreakAlgorithm>,
splitter_label: &str,
stdout: &mut RawTerminal<io::Stdout>,
) -> Result<(), io::Error> {
Expand Down Expand Up @@ -256,7 +256,7 @@ mod unix_only {
}

let mut label = labels.pop().unwrap();
let mut options: Options = Options::new(35).splitter(Box::new(HyphenSplitter));
let mut options: Options<_> = Options::new(35).splitter(Box::new(HyphenSplitter));
options.break_words = false;
options.splitter = splitters.pop().unwrap();

Expand Down
2 changes: 1 addition & 1 deletion examples/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fn main() {
Zero-cost abstractions.";
let mut prev_lines = vec![];

let mut options: Options = Options::new(0).splitter(Box::new(HyphenSplitter));
let mut options = Options::new(0).splitter(Box::new(HyphenSplitter));
#[cfg(feature = "hyphenation")]
{
use hyphenation::Load;
Expand Down
17 changes: 13 additions & 4 deletions examples/multi-layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@ use hyphenation::{Language, Load, Standard};
mod library {
use textwrap::{Options, WordSplitter};

/// Default word finder.
#[cfg(feature = "unicode-linebreak")]
pub type DefaultLineBreakAlgorithm = textwrap::UnicodeLineBreaks;
#[cfg(not(feature = "unicode-linebreak"))]
pub type DefaultLineBreakAlgorithm = textwrap::AsciiSpaceLineBreaks;

#[derive(Debug, Default)]
pub struct Layout<'a> {
// Use trait-objects which can be easily converted from any concrete `Options`
styles: Vec<Box<Options<'a, dyn WordSplitter + 'a>>>,
styles: Vec<Box<Options<'a, DefaultLineBreakAlgorithm, dyn WordSplitter + 'a>>>,
}

impl<'a> Layout<'a> {
Expand All @@ -19,7 +25,10 @@ mod library {
}

// Similar signature like `wrap` has, so it takes (nearly) everything that `warp` takes.
pub fn add<S: WordSplitter + 'a, T: Into<Options<'a, S>>>(&mut self, option: T) {
pub fn add<S: WordSplitter + 'a, T: Into<Options<'a, DefaultLineBreakAlgorithm, S>>>(
&mut self,
option: T,
) {
self.styles.push(Box::new(option.into()));
}

Expand All @@ -35,7 +44,7 @@ mod library {

// Just use the textwrap functions as usual.
// However, we have to first coerce it into a trait-object
let dyn_opt: &Options<'a, dyn WordSplitter> = opt;
let dyn_opt: &Options<'a, DefaultLineBreakAlgorithm, dyn WordSplitter> = opt;
println!("{}", textwrap::fill(text, dyn_opt));
}
}
Expand Down Expand Up @@ -65,7 +74,7 @@ pub fn main() {
layout.add(opt.clone()); // notice, here we pass opt by-value instead of by-reference

// We can use boxed splitters too (however, we have to coerce the Options)
let opt: Options = opt.splitter(Box::new(NoHyphenation));
let opt: Options<_> = opt.splitter(Box::new(NoHyphenation));
layout.add(opt);

// We can also pass-in references, however, those need to outlive the local
Expand Down
44 changes: 44 additions & 0 deletions examples/wasm/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions examples/wasm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use wasm_bindgen::prelude::*;
use wasm_bindgen::JsCast;

use textwrap::core;
use textwrap::{core, LineBreakAlgorithm};

#[wasm_bindgen]
extern "C" {
Expand Down Expand Up @@ -160,7 +160,7 @@ pub fn draw_wrapped_text(

let mut lineno = 0;
for line in text.split('\n') {
let words = core::find_words(line);
let words = options.line_breaks.break_line(line);
let split_words = core::split_words(words, &options);

let canvas_words = split_words
Expand Down
Loading

0 comments on commit fdfa47f

Please sign in to comment.