diff --git a/lua/hlchunk/mods/chunk/init.lua b/lua/hlchunk/mods/chunk/init.lua index 064ddd0..3b1329e 100644 --- a/lua/hlchunk/mods/chunk/init.lua +++ b/lua/hlchunk/mods/chunk/init.lua @@ -78,46 +78,117 @@ function ChunkMod:get_chunk_data(range, virt_text_list, row_list, virt_text_win_ local start_col = math.max(math.min(beg_blank_len, end_blank_len) - self.meta.shiftwidth, 0) if beg_blank_len > 0 then - local virt_text_len = beg_blank_len - start_col - local beg_virt_text = self.conf.chars.left_top - .. self.conf.chars.horizontal_line:rep(virt_text_len - 2) - .. self.conf.chars.left_arrow - local virt_text, virt_text_win_col = chunkHelper.calc(beg_virt_text, start_col, self.meta.leftcol) - local char_list = fn.reverse(utf8Split(virt_text)) - vim.list_extend(virt_text_list, char_list) - vim.list_extend(row_list, vim.fn["repeat"]({ range.start }, #char_list)) - vim.list_extend(virt_text_win_col_list, rangeFromTo(virt_text_win_col + #char_list - 1, virt_text_win_col, -1)) + local virt_text_width = beg_blank_len - start_col + local left_top_width = chunkHelper.virtTextStrWidth(self.conf.chars.left_top, self.meta.shiftwidth) + local left_arrow_width = chunkHelper.virtTextStrWidth(self.conf.chars.left_arrow, self.meta.shiftwidth) + ---@type string + local beg_virt_text + if left_top_width + left_arrow_width <= virt_text_width then + -- ╭─>` to indent, we should make sure the line would not encounter the indent char - for i = 1, mid_char_nums do - local char = Pos.get_char_at_pos(Pos(range.bufnr, range.start + i, start_col), self.meta.shiftwidth) - if not char:match("%s") and #char ~= 0 then + for i = 1, mid_row_nums do + local line = cFunc.get_line(range.bufnr, range.start + i) + local vertical_line_width = + -- here we need to stop virtTextStrWidth at NULL; + -- "mid" virtual texts are not separated and they will be terminated on NULL + chunkHelper.virtTextStrWidth(self.conf.chars.vertical_line, self.meta.shiftwidth, true) + local end_col = start_col + vertical_line_width + if not chunkHelper.checkCellsBlank(line, start_col + 1, end_col, self.meta.shiftwidth) then chars[i] = "" end end end - vim.list_extend(virt_text_list, chars) + chunkHelper.list_extend(virt_text_list, chars) if end_blank_len > 0 then - local virt_text_len = end_blank_len - start_col - local end_virt_text = self.conf.chars.left_bottom - .. self.conf.chars.horizontal_line:rep(virt_text_len - 2) - .. self.conf.chars.right_arrow - local virt_text, virt_text_win_col = chunkHelper.calc(end_virt_text, start_col, self.meta.leftcol) + local virt_text_width = end_blank_len - start_col + local left_bottom_width = chunkHelper.virtTextStrWidth(self.conf.chars.left_bottom, self.meta.shiftwidth) + local right_arrow_width = chunkHelper.virtTextStrWidth(self.conf.chars.right_arrow, self.meta.shiftwidth) + ---@type string + local end_virt_text + if left_bottom_width + right_arrow_width <= virt_text_width then + -- │ + -- ╰─>} + -- │ + -- ╰>} + end_virt_text = self.conf.chars.left_bottom + .. chunkHelper.repeatToWidth( + self.conf.chars.horizontal_line, + virt_text_width - left_bottom_width - right_arrow_width, + self.meta.shiftwidth + ) + .. self.conf.chars.right_arrow + elseif left_bottom_width <= virt_text_width then + -- │ + -- ╰─} + -- │ + -- ╰} + end_virt_text = self.conf.chars.left_bottom + .. chunkHelper.repeatToWidth( + self.conf.chars.horizontal_line, + virt_text_width - left_bottom_width, + self.meta.shiftwidth + ) + else + -- │ + -- } + end_virt_text = string.rep(" ", virt_text_width) + end + local virt_text, virt_text_win_col = + chunkHelper.calc(end_virt_text, start_col, self.meta.leftcol, self.meta.shiftwidth) local char_list = utf8Split(virt_text) - vim.list_extend(virt_text_list, char_list) - vim.list_extend(row_list, vim.fn["repeat"]({ range.finish }, virt_text_len)) - vim.list_extend(virt_text_win_col_list, rangeFromTo(virt_text_win_col, virt_text_win_col + virt_text_len - 1)) + chunkHelper.list_extend(virt_text_list, char_list) + chunkHelper.list_extend(row_list, chunkHelper.repeated(range.finish, #char_list)) + chunkHelper.list_extend( + virt_text_win_col_list, + chunkHelper.getColList(char_list, virt_text_win_col, self.meta.shiftwidth) + ) end end diff --git a/lua/hlchunk/utils/chunkHelper.lua b/lua/hlchunk/utils/chunkHelper.lua index e4415af..6f217ce 100644 --- a/lua/hlchunk/utils/chunkHelper.lua +++ b/lua/hlchunk/utils/chunkHelper.lua @@ -87,6 +87,67 @@ local function get_chunk_range_by_treesitter(pos) return chunkHelper.CHUNK_RANGE_RET.NO_CHUNK, Scope(pos.bufnr, -1, -1) end +---@param char string +---@param shiftwidth integer +---@return integer +local function virt_text_char_width(char, shiftwidth) + local b1 = char:byte(1) + if b1 == 0x00 then + -- NULL is treated as a terminator when used in virtual text + return 0 + elseif b1 == 0x09 then + return shiftwidth + elseif b1 <= 0x1F or b1 == 0x7F then + -- ASCII control chars other than NULL and TAB are two cells wide + return 2 + elseif b1 <= 0x7F then + -- other ASCII chars are single cell wide + return 1 + else + return vim.api.nvim_strwidth(char) + end +end + +---faster alternative to `vim.fn.reverse()` +---unlike the original, this only supports lists +---@generic T +---@param list T[] +---@return T[] +function chunkHelper.listReverse(list) + local dst = {} + for i, v in ipairs(list) do + dst[#list + 1 - i] = v + end + return dst +end + +---faster alternative to `vim.fn.repeat()` +---unlike the original, the input will be repeated as-is and the output will always be a list +---@generic T +---@param input T +---@param count integer +---@return T[] +function chunkHelper.repeated(input, count) + local dst = {} + for i = 1, count do + dst[i] = input + end + return dst +end + +---faster alternative to `vim.list_extend()` (mutates dst!) +---unlike the original, this function lacks validation and range support +---@generic T +---@param dst T[] +---@param src T[] +---@return T[] dst +function chunkHelper.list_extend(dst, src) + for i = 1, #src do + dst[#dst + 1] = src[i] + end + return dst +end + ---@param opts? {pos: Pos, use_treesitter: boolean} ---@return CHUNK_RANGE_RETCODE enum ---@return Scope @@ -100,8 +161,13 @@ function chunkHelper.get_chunk_range(opts) end end -function chunkHelper.calc(str, col, leftcol) - local len = vim.api.nvim_strwidth(str) +---@param str string +---@param col integer +---@param leftcol integer +---@param shiftwidth integer +---@return string, integer +function chunkHelper.calc(str, col, leftcol, shiftwidth) + local len = chunkHelper.virtTextStrWidth(str, shiftwidth) if col < leftcol then local byte_idx = math.min(leftcol - col, len) local utf_beg = vim.str_byteindex(str, byte_idx) @@ -113,10 +179,12 @@ function chunkHelper.calc(str, col, leftcol) return str, col end +---@param inputstr string +---@return string[] function chunkHelper.utf8Split(inputstr) local list = {} for uchar in string.gmatch(inputstr, "[^\128-\191][\128-\191]*") do - table.insert(list, uchar) + list[#list + 1] = uchar end return list end @@ -128,11 +196,81 @@ function chunkHelper.rangeFromTo(i, j, step) local t = {} step = step or 1 for x = i, j, step do - table.insert(t, x) + t[#t + 1] = x + end + return t +end + +---@param char_list string[] +---@param leftcol integer +---@param shiftwidth integer +---@return integer[] +function chunkHelper.getColList(char_list, leftcol, shiftwidth) + local t = {} + local next_col = leftcol + for i = 1, #char_list do + t[#t + 1] = next_col + next_col = next_col + virt_text_char_width(char_list[i], shiftwidth) end return t end +---@param str string +---@param width integer +---@param shiftwidth integer +function chunkHelper.repeatToWidth(str, width, shiftwidth) + local str_width = chunkHelper.virtTextStrWidth(str, shiftwidth) + + -- "1" -> "1111" + if str_width == 1 then + return str:rep(width) + end + + -- "12" -> "1212" + if width % str_width == 0 then + return str:rep(width / str_width) + end + + -- "12" -> "12121" + -- "1" -> "11 " + -- "⏻ " -> "⏻ ⏻ " + local repeatable_len = math.floor(width / str_width) + local s = str:rep(repeatable_len) + local chars = chunkHelper.utf8Split(str) + local current_width = str_width * repeatable_len + local i = 1 + while i <= #chars do + local char_width = virt_text_char_width(chars[i], shiftwidth) + ---if true, the char is assumed to be an out-of-bounds char (like in nerd fonts), followed by a whitespace + local likely_oob_char = + -- single-cell + char_width == 1 + -- followed by a whitespace + and chars[i + 1] == " " + -- non-ASCII + and chars[i]:byte(1) > 0x7F + local char = likely_oob_char and chars[i] .. " " or chars[i] + local next_width = current_width + (likely_oob_char and 2 or char_width) + if next_width < width then + s = s .. char + current_width = next_width + elseif next_width == width then + s = s .. char + break + else + s = s .. string.rep(" ", width - current_width) + break + end + if likely_oob_char then + -- skip the whitespace part of out-of-bounds char + " " + i = i + 2 + else + i = i + 1 + end + end + return s +end + function chunkHelper.shallowCmp(t1, t2) if #t1 ~= #t2 then return false @@ -147,4 +285,129 @@ function chunkHelper.shallowCmp(t1, t2) return flag end +---@param line string +---@param start_col integer +---@param end_col integer +---@param shiftwidth integer +---@return boolean +function chunkHelper.checkCellsBlank(line, start_col, end_col, shiftwidth) + local current_col = 1 + local current_char = 1 + local chars = chunkHelper.utf8Split(line) + while current_char <= #chars and current_col <= end_col do + local char = chars[current_char] + local b1, b2, b3 = char:byte(1, 3) + ---@type integer + local next_col + local next_char = current_char + 1 + if char == " " then + next_col = current_col + 1 + elseif char == "\t" then + next_col = current_col + shiftwidth + elseif b1 <= 0x1F or char == "\127" then + -- despite nvim_strwidth returning 0 or 1, ASCII control chars are 2 cells wide + next_col = current_col + 2 + elseif b1 <= 0x7F then + -- other ASCII chars are single cell wide + next_col = current_col + 1 + else + local char_width = vim.api.nvim_strwidth(char) + if char_width == 1 and chars[current_char + 1] == " " then + -- the char is assumed to be an out-of-bounds char (like in nerd fonts), + -- followed by a whitespace + next_col = current_col + 2 + -- skip the whitespace part of out-of-bounds char + " " + next_char = next_char + 1 + else + next_col = current_col + char_width + end + end + -- we're going to match these characters manually + -- as we can't use "%s" to check blank cells + -- (e.g. "%s" matches to "\v" but it will be printed as ^K) + if + (current_col >= start_col or next_col - 1 >= start_col) + -- Indent characters + -- + -- Unicode Scripts Z* + -- 0020 - SPACE + and char ~= " " + -- + -- Unicode Scripts C* + -- 0009 - TAB + -- control characters except TAB should be rendered like "^[" or "<200b>" + and char ~= " " + -- + -- Non indent characters + -- + -- Unicode Scripts Z* + -- 00A0 - NO-BREAK SPACE + and char ~= " " + --[[ + -- 1680 - OGHAM SPACE MARK + -- usually rendered as "-" + -- see https://www.unicode.org/charts/PDF/U1680.pdf + and char ~= " " + ]] + -- 2000..200A - EN QUAD..HAIR SPACE + -- " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " " + and not (b1 == 0xe2 and b2 == 0x80 and b3 >= 0x80 and b3 <= 0x8a) + -- 202F - NARROW NO-BREAK SPACE + and char ~= " " + -- 205F - MEDIUM MATHEMATICAL SPACE + and char ~= " " + -- 3000 - IDEOGRAPHIC SPACE + and char ~= " " + --[[ + -- 2028 - LINE SEPARATOR + -- some fonts lacks this and may render it as "?" or "█" + -- as this character is usually treated as a line-break + and char ~= "
" + ]] + --[[ + -- 2029 - PARAGRAPH SEPARATOR + -- some fonts lacks this and may render it as "?" or "█" + -- as this character is usually treated as a line-break + and char ~= "
" + ]] + -- + -- Others + -- + -- 2800 - BRAILLE PATTERN BLANK + and char ~= "⠀" + --[[ + -- 3164 - HANGUL FILLER + -- technically "blank" but can easily break the rendering + and "\227\133\164" -- do not replace this with a literal notation + ]] + --[[ + -- FFA0 - HALFWIDTH HANGUL FILLER + -- technically "blank" but can easily break the rendering + and "\239\190\160" -- do not replace this with a literal notation + ]] + then + return false + end + current_col = next_col + current_char = next_char + end + return true +end + +---@param str string +---@param shiftwidth integer +---@param stop_on_null? boolean +---@return integer +function chunkHelper.virtTextStrWidth(str, shiftwidth, stop_on_null) + local current_width = 0 + for _, char in ipairs(chunkHelper.utf8Split(str)) do + if stop_on_null and char == "\0" then + -- NULL is treated as a terminator when used in virtual text + return current_width + end + current_width = current_width + virt_text_char_width(char, shiftwidth) + end + return current_width +end + return chunkHelper diff --git a/test/features/chunkHelper_spec.lua b/test/features/chunkHelper_spec.lua index 9288110..7c39031 100644 --- a/test/features/chunkHelper_spec.lua +++ b/test/features/chunkHelper_spec.lua @@ -23,7 +23,7 @@ describe("indentHelper", function() expect_res = testCase.render_res expect_offset = testCase.offset - local render_res, render_offset = chunkHelper.calc(str, col, leftcol) + local render_res, render_offset = chunkHelper.calc(str, col, leftcol, 1) assert.equals(render_res, expect_res) assert.equals(render_offset, expect_offset) end @@ -84,4 +84,177 @@ describe("indentHelper", function() assert.equals(res, testCase.res) end end) + + it("getColList happy path", function() + local inputList = { + { char_list = { "a", "b", "c" }, text_width = 3, leftcol = 0, res = { 0, 1, 2 } }, + { char_list = { "a", "b", "c" }, text_width = 3, leftcol = 2, res = { 2, 3, 4 } }, + -- ascii gt + { char_list = { "╰", "─", "─", ">" }, text_width = 4, leftcol = 0, res = { 0, 1, 2, 3 } }, + -- unicode box drawings light left + { char_list = { "╰", "─", "─", "╴" }, text_width = 4, leftcol = 2, res = { 2, 3, 4, 5 } }, + -- nerdfont nf-fa-arrow_circle_right + whitespace + { char_list = { "╰", "─", "", " " }, text_width = 4, leftcol = 4, res = { 4, 5, 6, 7 } }, + -- cjk + { char_list = { "你", "好" }, text_width = 4, leftcol = 0, res = { 0, 2 } }, + -- emoji + { char_list = { ">", "⏩", ">" }, text_width = 4, leftcol = 2, res = { 2, 3, 5 } }, + { + char_list = { "1", "⏫", "-", "2", "3" }, + text_width = 5, + leftcol = 0, + res = { 0, 1, 3, 4, 5 }, + }, + } + + for _, testCase in ipairs(inputList) do + local res = chunkHelper.getColList(testCase.char_list, testCase.leftcol, 1) + assert.same(res, testCase.res) + end + end) + + it("repeatToWidth happy path", function() + local inputList = { + { str = "1", width = 4, res = "1111" }, + { str = "12", width = 4, res = "1212" }, + { str = "12", width = 9, res = "121212121" }, + { str = "1", width = 1, res = " " }, + { str = "1", width = 9, res = "1111 " }, + { str = "12", width = 9, res = "121212" }, + { str = "12", width = 9, res = "121212" }, + { str = "12", width = 10, res = "121212 " }, + { str = "12", width = 10, res = "1212121" }, + { str = "⏻ ", width = 8, res = "⏻ ⏻ ⏻ ⏻ " }, + { str = "⏻ ", width = 9, res = "⏻ ⏻ ⏻ ⏻ " }, + } + + for _, testCase in ipairs(inputList) do + local res = chunkHelper.repeatToWidth(testCase.str, testCase.width, 1) + assert.same(res, testCase.res) + end + end) + + it("listReverse happy path", function() + local inputList = { + { t = {}, res = {} }, + { t = { 1 }, res = { 1 } }, + { t = { 1, 2, 3 }, res = { 3, 2, 1 } }, + { t = { 1, 2, 3, 4 }, res = { 4, 3, 2, 1 } }, + } + + for _, testCase in ipairs(inputList) do + local res = chunkHelper.listReverse(testCase.t) + assert.same(res, testCase.res) + end + end) + + it("repeated happy path", function() + local inputList = { + { input = 1, repeat_to = 1, res = { 1 } }, + { input = 1, repeat_to = 3, res = { 1, 1, 1 } }, + } + + for _, testCase in ipairs(inputList) do + local res = chunkHelper.repeated(testCase.input, testCase.repeat_to) + assert.same(res, testCase.res) + end + end) + + it("checkCellsBlank happy path", function() + -- bunch of edge cases + local inputList = { + { line = "", start_col = 1, end_col = 4, shiftwidth = 4, res = true }, + { line = "", start_col = 3, end_col = 3, shiftwidth = 4, res = true }, + { line = " ", start_col = 1, end_col = 4, shiftwidth = 4, res = true }, + { line = " ", start_col = 3, end_col = 3, shiftwidth = 4, res = true }, + { line = " a", start_col = 1, end_col = 4, shiftwidth = 4, res = true }, + { line = " a", start_col = 3, end_col = 3, shiftwidth = 4, res = true }, + { line = "a ", start_col = 2, end_col = 5, shiftwidth = 4, res = true }, + { line = "a a", start_col = 2, end_col = 5, shiftwidth = 4, res = true }, + { line = "  a", start_col = 1, end_col = 5, shiftwidth = 4, res = true }, + { line = "  a", start_col = 1, end_col = 6, shiftwidth = 4, res = false }, + { line = "a  a", start_col = 2, end_col = 5, shiftwidth = 4, res = true }, + { line = "a  a", start_col = 2, end_col = 6, shiftwidth = 4, res = false }, + { line = "     a", start_col = 1, end_col = 5, shiftwidth = 4, res = true }, + { line = "     a", start_col = 1, end_col = 6, shiftwidth = 4, res = false }, + { line = "a    a", start_col = 2, end_col = 5, shiftwidth = 4, res = true }, + { line = "a    a", start_col = 2, end_col = 6, shiftwidth = 4, res = false }, + { line = "aaaa a", start_col = 5, end_col = 5, shiftwidth = 4, res = true }, + { line = "aaaa a", start_col = 5, end_col = 6, shiftwidth = 4, res = false }, + { line = "a你a a", start_col = 5, end_col = 5, shiftwidth = 4, res = true }, + { line = "a你a a", start_col = 5, end_col = 6, shiftwidth = 4, res = false }, + { line = "aa  a", start_col = 2, end_col = 5, shiftwidth = 4, res = false }, + { line = "aa  a", start_col = 3, end_col = 3, shiftwidth = 4, res = true }, + { line = "aa  a", start_col = 3, end_col = 5, shiftwidth = 4, res = true }, + { line = "aa  a", start_col = 3, end_col = 6, shiftwidth = 4, res = false }, + { line = "aa  a", start_col = 4, end_col = 4, shiftwidth = 4, res = true }, + { line = "\ta ", start_col = 1, end_col = 4, shiftwidth = 4, res = true }, + { line = "\ta ", start_col = 1, end_col = 4, shiftwidth = 3, res = false }, + { line = " \ta", start_col = 1, end_col = 4, shiftwidth = 3, res = true }, + { line = "\0 a", start_col = 1, end_col = 5, shiftwidth = 4, res = false }, + { line = "\0 a", start_col = 2, end_col = 5, shiftwidth = 4, res = false }, + { line = "\0 a", start_col = 3, end_col = 5, shiftwidth = 4, res = true }, + { line = "你  a", start_col = 1, end_col = 5, shiftwidth = 4, res = false }, + { line = "你  a", start_col = 2, end_col = 5, shiftwidth = 4, res = false }, + { line = "你  a", start_col = 3, end_col = 5, shiftwidth = 4, res = true }, + { line = "   a", start_col = 1, end_col = 5, shiftwidth = 4, res = false }, + { line = "   a", start_col = 2, end_col = 5, shiftwidth = 4, res = false }, + { line = "   a", start_col = 3, end_col = 5, shiftwidth = 4, res = true }, + { line = "你  好", start_col = 3, end_col = 5, shiftwidth = 4, res = true }, + { line = "你  好", start_col = 2, end_col = 5, shiftwidth = 4, res = false }, + { line = "你  好", start_col = 3, end_col = 6, shiftwidth = 4, res = false }, + { line = "    ", start_col = 3, end_col = 5, shiftwidth = 4, res = true }, + { line = "    ", start_col = 2, end_col = 5, shiftwidth = 4, res = false }, + { line = "    ", start_col = 3, end_col = 6, shiftwidth = 4, res = false }, + } + + for _, testCase in ipairs(inputList) do + local res = + chunkHelper.checkCellsBlank(testCase.line, testCase.start_col, testCase.end_col, testCase.shiftwidth) + assert.same(res, testCase.res) + end + end) + + it("virtTextStrWidth happy path", function() + local inputList = { + { input = "\0", shiftwidth = 4, res = 0 }, + { input = "\1", shiftwidth = 4, res = 2 }, + { input = "\127", shiftwidth = 4, res = 2 }, + { input = " ", shiftwidth = 4, res = 1 }, + { input = "\t", shiftwidth = 4, res = 4 }, + { input = "a", shiftwidth = 4, res = 1 }, + { input = "A", shiftwidth = 4, res = 1 }, + { input = "你", shiftwidth = 4, res = 2 }, + { input = " ", shiftwidth = 4, res = 2 }, + { input = "\0\0", shiftwidth = 4, res = 0 }, + { input = "\1\1", shiftwidth = 4, res = 4 }, + { input = "\127\127", shiftwidth = 4, res = 4 }, + { input = " ", shiftwidth = 4, res = 2 }, + { input = "\t\t", shiftwidth = 4, res = 8 }, + { input = "ab", shiftwidth = 4, res = 2 }, + { input = "AB", shiftwidth = 4, res = 2 }, + { input = "你好", shiftwidth = 4, res = 4 }, + { input = "  ", shiftwidth = 4, res = 4 }, + { input = "a\0b", shiftwidth = 4, stop_on_null = false, res = 2 }, + { input = "a\0b", shiftwidth = 4, stop_on_null = true, res = 1 }, + } + + for _, testCase in ipairs(inputList) do + local res = chunkHelper.virtTextStrWidth(testCase.input, testCase.shiftwidth, testCase.stop_on_null) + assert.same(res, testCase.res) + end + end) + + it("list_extend happy path", function() + local inputList = { + { dst = { 1, 2, 3 }, src = {}, res = { 1, 2, 3 } }, + { dst = {}, src = { 4, 5, 6 }, res = { 4, 5, 6 } }, + { dst = { 1, 2, 3 }, src = { 4, 5, 6 }, res = { 1, 2, 3, 4, 5, 6 } }, + } + + for _, testCase in ipairs(inputList) do + chunkHelper.list_extend(testCase.dst, testCase.src) + assert.same(testCase.dst, testCase.res) + end + end) end)