From ed2d32f09d949eb43673a3681ee3ce1b0d6e694b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 27 Jun 2017 12:57:21 +0200 Subject: [PATCH 1/4] Make `getindex` for `String` check if all indices are valid Closes #22548 fixes a bug with use of prevind in dates/io.jl --- NEWS.md | 3 +++ base/dates/io.jl | 2 +- base/repl/LineEdit.jl | 2 +- base/repl/REPL.jl | 2 +- base/strings/string.jl | 13 +++++++++---- test/strings/basic.jl | 5 +++-- 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 28cd3d2cfd618..7a82d02ca9224 100644 --- a/NEWS.md +++ b/NEWS.md @@ -100,6 +100,9 @@ Breaking changes This section lists changes that do not have deprecation warnings. + * `getindex(s::String, r::UnitRange{Int})` now throws `UnicodeError` if `last(r)` + is not a valid index into `s` ([#22572]). + * `ntuple(f, n::Integer)` throws `ArgumentError` if `n` is negative. Previously an empty tuple was returned ([#21697]). diff --git a/base/dates/io.jl b/base/dates/io.jl index 78f9ef4bb5346..fdff3d9204386 100644 --- a/base/dates/io.jl +++ b/base/dates/io.jl @@ -330,7 +330,7 @@ function DateFormat(f::AbstractString, locale::DateLocale=ENGLISH) letters = String(collect(keys(CONVERSION_SPECIFIERS))) for m in eachmatch(Regex("(?= 0 ? llength : strwidth(l[1:(line_pos + slength)])) + num_chars = (line_pos >= 0 ? llength : strwidth(l[1:prevind(l, line_pos + slength + 1)])) curs_row, curs_pos = divrem(lindent + num_chars - 1, cols) curs_row += cur_row curs_pos += 1 diff --git a/base/repl/REPL.jl b/base/repl/REPL.jl index ed793924fa0c8..b14a8a9c1427a 100644 --- a/base/repl/REPL.jl +++ b/base/repl/REPL.jl @@ -882,7 +882,7 @@ function setup_interface( end # Check if input line starts with "julia> ", remove it if we are in prompt paste mode jl_prompt_len = 7 - if (firstline || isprompt_paste) && (oldpos + jl_prompt_len <= sizeof(input) && input[oldpos:oldpos+jl_prompt_len-1] == JULIA_PROMPT) + if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT) isprompt_paste = true oldpos += jl_prompt_len # If we are prompt pasting and current statement does not begin with julia> , skip to next line diff --git a/base/strings/string.jl b/base/strings/string.jl index efccebda556d0..eedf01bc86241 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -235,8 +235,8 @@ isvalid(s::String, i::Integer) = function getindex(s::String, r::UnitRange{Int}) isempty(r) && return "" - i, j = first(r), last(r) l = sizeof(s) + i = first(r) if i < 1 || i > l throw(BoundsError(s, i)) end @@ -244,11 +244,16 @@ function getindex(s::String, r::UnitRange{Int}) if is_valid_continuation(si) throw(UnicodeError(UTF_ERR_INVALID_INDEX, i, si)) end + j = last(r) if j > l - throw(BoundsError()) + throw(BoundsError(s, j)) end - j = nextind(s,j)-1 - unsafe_string(pointer(s,i), j-i+1) + @inbounds sj = codeunit(s, j) + if is_valid_continuation(sj) + throw(UnicodeError(UTF_ERR_INVALID_INDEX, j, sj)) + end + j = nextind(s,j) + unsafe_string(pointer(s,i), j-i) end function search(s::String, c::Char, i::Integer = 1) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index f8ce84d56c402..9ca4d9e7f8386 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -159,9 +159,10 @@ end @test first('\x00':'\x7f') === '\x00' @test last('\x00':'\x7f') === '\x7f' -# make sure substrings handle last code unit even if not start of codepoint +# make sure substrings do not accept code unit if it is not start of codepoint let s = "x\u0302" - @test s[1:3] == s + @test_throws UnicodeError s[1:3] + @test s[1:2]==s end # issue #9781 From 8fb07dd2f8b56cf923baedd96f2119ae3ea511c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 19 Sep 2017 10:13:38 +0200 Subject: [PATCH 2/4] correct string indexing in REPLCompletions --- base/repl/REPLCompletions.jl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index 1dfd0690b6375..71ba0bd523d5a 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -482,9 +482,11 @@ function completions(string, pos) paths, r, success = complete_path(replace(string[r], r"\\ ", " "), pos) if inc_tag == :string && - length(paths) == 1 && # Only close if there's a single choice, - !isdir(expanduser(replace(string[startpos:start(r)-1] * paths[1], r"\\ ", " "))) && # except if it's a directory - (length(string) <= pos || string[pos+1] != '"') # or there's already a " at the cursor. + length(paths) == 1 && # Only close if there's a single choice, + !isdir(expanduser(replace(string[startpos:prevind(string, start(r))] * paths[1], + r"\\ ", " "))) && # except if it's a directory + (length(string) <= pos || + string[nextind(string,pos)] != '"') # or there's already a " at the cursor. paths[1] *= "\"" end @@ -534,10 +536,11 @@ function completions(string, pos) # /src/.jl # .jl/src/.jl if isfile(joinpath(dir, pname)) - endswith(pname, ".jl") && push!(suggestions, pname[1:end-3]) + endswith(pname, ".jl") && push!(suggestions, + pname[1:prevind(pname, end-2)]) else mod_name = if endswith(pname, ".jl") - pname[1:end - 3] + pname[1:prevind(pname, end-2)] else pname end From fcc68dfb6b96073bdc6214f23d64f26c51284d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 19 Sep 2017 16:23:15 +0200 Subject: [PATCH 3/4] properly handle sizeof(needle) --- base/repl/REPLCompletions.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index 71ba0bd523d5a..cac5384764560 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -11,7 +11,11 @@ function completes_global(x, name) end function appendmacro!(syms, macros, needle, endchar) - append!(syms, s[2:end-sizeof(needle)]*endchar for s in filter(x -> endswith(x, needle), macros)) + r = Regex("^.(.*)$needle\$") + for s in macros + m = match(r, s) + m === nothing || push!(syms, m[1]*endchar) + end end function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString, all::Bool=false, imported::Bool=false) From 9ccb13c639a68d3a9a3347d8eda38e5b07e8bdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 19 Sep 2017 21:01:13 +0200 Subject: [PATCH 4/4] fixed unescaped regexp --- base/repl/REPLCompletions.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index cac5384764560..6ba64e7a2152e 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -11,10 +11,12 @@ function completes_global(x, name) end function appendmacro!(syms, macros, needle, endchar) - r = Regex("^.(.*)$needle\$") for s in macros - m = match(r, s) - m === nothing || push!(syms, m[1]*endchar) + if endswith(s, needle) + from = nextind(s, start(s)) + to = prevind(s, sizeof(s)-sizeof(needle)+1) + push!(syms, s[from:to]*endchar) + end end end