Skip to content

Commit

Permalink
feat(text) wrap/fill can break long words, honours width
Browse files Browse the repository at this point in the history
  • Loading branch information
Tieske committed Jan 5, 2022
1 parent 0d94139 commit b60cf6a
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 57 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,22 @@ see [CONTRIBUTING.md](CONTRIBUTING.md#release-instructions-for-a-new-version) fo
[#396](https://github.com/lunarmodules/Penlight/pull/396)
- fix: `text.dedent` didn't handle declining indents nor empty lines
[#402](https://github.com/lunarmodules/Penlight/pull/402)
<<<<<<< HEAD
- fix: `dir.getfiles`, `dir.getdirectories`, and `dir.getallfiles` now have the
directory optional, as was already documented
[#405](https://github.com/lunarmodules/Penlight/pull/405)
- feat: `array2d.default_range` now also takes a spreadsheet range, which means
also other functions now take a range. [#404](https://github.com/lunarmodules/Penlight/pull/404)
- fix: `lapp` enums allow [patterns magic characters](https://www.lua.org/pil/20.2.html)
[#393](https://github.com/lunarmodules/Penlight/pull/393)
=======
- fix: `text.wrap` and `text.fill` numerous fixes for handling whitespace,
accented characters, honouring width, etc.
[#400](https://github.com/lunarmodules/Penlight/pull/400)
- feat: `text.wrap` and `text.fill` have a new parameter to forcefully break words
longer than the width given.
[#400](https://github.com/lunarmodules/Penlight/pull/400)
>>>>>>> 7b3efef4 (feat(text) wrap/fill can break long words, honours width)
## 1.11.0 (2021-08-18)

Expand Down
81 changes: 55 additions & 26 deletions lua/pl/text.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
-- @module pl.text

local gsub = string.gsub
local concat,append = table.concat,table.insert
local concat, t_remove = table.concat, table.remove
local utils = require 'pl.utils'
local bind1,usplit,assert_arg = utils.bind1,utils.split,utils.assert_arg
local is_callable = require 'pl.types'.is_callable
Expand Down Expand Up @@ -88,39 +88,68 @@ function text.dedent (s)
return concat(lst,'\n')..'\n'
end

--- format a paragraph into lines so that they fit into a line width.
-- It will not break long words, so lines can be over the length
-- to that extent.
-- @tparam string s the string to format
-- @tparam[opt=70] integer width the margin width
-- @return a list of lines (List object), use `fill` to return a string instead of a `List`.
-- @see pl.List
-- @see fill
function text.wrap (s,width)
assert_arg(1,s,'string')
width = width or 70
s = s:gsub('\n',' ')
local i,nxt = 1
local lines,line = {}
repeat
nxt = i+width
if s:find("%S",nxt) then -- inside a word
nxt = s:find('%s',nxt) -- so find word boundary

do
local buildline = function(words, size, breaklong)
-- if overflow is set, a word longer than size, will overflow the size
-- otherwise it will be chopped in line-length pieces
local line = {}
if #words[1] > size then
-- word longer than line
if not breaklong then
line[1] = words[1]
t_remove(words, 1)
else
line[1] = words[1]:sub(1, size)
words[1] = words[1]:sub(size + 1, -1)
end
else
local len = 0
while words[1] and (len + #words[1] <= size) or
(len == 0 and #words[1] == size) do
if words[1] ~= "" then
line[#line+1] = words[1]
len = len + #words[1] + 1
end
line = s:sub(i,nxt)
i = i + #line
append(lines,strip(line))
until i > (#s - 1)
return makelist(lines)
t_remove(words, 1)
end
end
return strip(concat(line, " ")), words
end

--- format a paragraph into lines so that they fit into a line width.
-- It will not break long words by default, so lines can be over the length
-- to that extent.
-- @tparam string s the string to format
-- @tparam[opt=70] integer width the margin width
-- @tparam[opt=false] boolean breaklong if truthy, words longer than the width given will be forced split.
-- @return a list of lines (List object), use `fill` to return a string instead of a `List`.
-- @see pl.List
-- @see fill
text.wrap = function(s, width, breaklong)
s = s:gsub('\n',' ') -- remove line breaks
s = strip(s) -- remove leading/trailing whitespace
if s == "" then
return { "" }
end
width = width or 70
local out = {}
local words = split(s, "%s")
while words[1] do
out[#out+1], words = buildline(words, width, breaklong)
end
return makelist(out)
end
end

--- format a paragraph so that it fits into a line width.
-- @tparam string s the string to format
-- @tparam[opt=70] integer width the margin width
-- @tparam[opt=false] boolean breaklong if truthy, words longer than the width given will be forced split.
-- @return a string, use `wrap` to return a list of lines instead of a string.
-- @see wrap
function text.fill (s,width)
return concat(text.wrap(s,width),'\n') .. '\n'
function text.fill (s,width,breaklong)
return concat(text.wrap(s,width,breaklong),'\n') .. '\n'
end


Expand Down
136 changes: 105 additions & 31 deletions spec/text_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -165,34 +165,6 @@ three

describe("fill()/wrap()", function()

it("word-wraps a text", function()
assert.equal([[
It is often said of Lua
that it does not include
batteries. That is because
the goal of Lua is to
produce a lean expressive
language that will be
used on all sorts of
machines, (some of which
don't even have hierarchical
filesystems). The Lua
language is the equivalent
of an operating system
kernel; the creators
of Lua do not see it
as their responsibility
to create a full software
ecosystem around the
language. That is the
role of the community.
]], text.fill("It is often said of Lua that it does not include batteries. That is because the goal of Lua is to produce a lean expressive language that will be used on all sorts of machines, (some of which don't even have hierarchical filesystems). The Lua language is the equivalent of an operating system kernel; the creators of Lua do not see it as their responsibility to create a full software ecosystem around the language. That is the role of the community.", 20))
end)

it("wraps single letters", function()
assert.same({"a"}, text.wrap("a"))
end)

it("wraps width over limit", function()
assert.same({
"abc",
Expand All @@ -205,34 +177,136 @@ role of the community.
"abc",
"def"
}, text.wrap("abc def", 3))
assert.same({
"a c",
"d f"
}, text.wrap("a c d f", 3))
end)

it("wraps single letters", function()
assert.same({"a"}, text.wrap("a"))
end)

it("wraps empty strings", function()
assert.same({""}, text.wrap(""))
assert.same({""}, text.wrap(" "))
end)

it("handles leading/trailing whitespace", function()
assert.same({"hello"}, text.wrap(" hello ", 10))
assert.same({"hello"}, text.wrap(" hello ", 2))
assert.same({"he", "ll", "o"}, text.wrap(" hello ", 2, true))
end)

it("handles line-breaks", function()
assert.same({"Hello", "Dolly"}, text.wrap("Hello\nDolly", 10))
assert.same({"Hello Dolly"}, text.wrap("Hello\nDolly", 20))
end)

it("doesn't split on accented characters", function()
assert.same({"àbcdéfghîj"}, text.wrap("àbcdéfghîj"))
end)

it("word-wraps a text", function()
local binstring = require("luassert.formatters.binarystring")
assert:add_formatter(binstring)
assert.equal([[
It is often said of
Lua that it does not
include batteries.
That is because the
goal of Lua is to
produce a lean
expressive language
that will be used on
all sorts of
machines, (some of
which don't even
have hierarchical
filesystems). The
Lua language is the
equivalent of an
operating system
kernel; the creators
of Lua do not see it
as their
responsibility to
create a full
software ecosystem
around the language.
That is the role of
the community.
]], text.fill("It is often said of Lua that it does not include batteries. That is because the goal of Lua is to produce a lean expressive language that will be used on all sorts of machines, (some of which don't even have hierarchical filesystems). The Lua language is the equivalent of an operating system kernel; the creators of Lua do not see it as their responsibility to create a full software ecosystem around the language. That is the role of the community.", 20))
end)


it("generic wrap test", function()
local t = [[
hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
]]

assert.same({
"hello",
'"world"',
"'this'",
"-is-",
"a bb",
"a",
"bb",
"ccc",
"dddd",
"test...",
"but",
"wouldn't",
"it pass???",
"it",
"pass???",
"final.",
"word-that-can-be-broken",
}, text.wrap(t, 3))
end)

it("generic wrap test, with overflow breaking", function()
local t = [[
hello "world" 'this' -is- a bb ccc dddd test... but wouldn't it pass??? final. word-that-can-be-broken
]]

assert.same({
"hel",
"lo",
'"wo',
'rld',
'"',
"'th",
"is'",
"-is",
"- a",
"bb",
"ccc",
"ddd",
"d",
"tes",
"t..",
".",
"but",
"wou",
"ldn",
"'t",
"it",
"pas",
"s??",
"?",
"fin",
"al.",
"wor",
"d-t",
"hat",
"-ca",
"n-b",
"e-b",
"rok",
"en",
}, text.wrap(t, 3, true))
end)

end)

end)
Expand Down

0 comments on commit b60cf6a

Please sign in to comment.