Skip to content

Commit

Permalink
Add Compat.readuntil
Browse files Browse the repository at this point in the history
This is to allow for the keep keyword argument.
  • Loading branch information
carlobaldassi committed Jun 15, 2018
1 parent b374312 commit 588c846
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ Currently, the `@compat` macro supports the following syntaxes:

* `Compat.eachline` with `keep` keyword argument ([#25646])

* `Compat.readuntil` with `keep` keyword argument ([#25646])

* `take!` method for `Task`s since some functions now return `Channel`s instead of `Task`s ([#19841])

* The `isabstract`, `parameter_upper_bound`, `typename` reflection methods were added in Julia 0.6. This package re-exports these from the `Compat.TypeUtils` submodule. On earlier versions of julia, that module contains the same functions, but operating on the pre-0.6 type system representation.
Expand Down
155 changes: 155 additions & 0 deletions src/Compat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,161 @@ end
# chomp parameter preserved for compatibility with earliear Compat versions
readline(s::IO=STDIN; chomp::Bool=true, keep::Bool=!chomp) = Base.readline(s; chomp=!keep)
eachline(s; keep::Bool=false) = Base.eachline(s; chomp=!keep)

# NOTE: the readuntil code is copy-pasted from 0.7 Base code.
# Back-porting changes:
# AbstractChar->Char
# added Base prefix where needed
# a small change in readuntil_vector! to avoid using @isdefined

readuntil(io::Base.AbstractPipe, arg::UInt8; kw...) = readuntil(Base.pipe_reader(io), arg; kw...)
readuntil(io::Base.AbstractPipe, arg::Char; kw...) = readuntil(Base.pipe_reader(io), arg; kw...)
readuntil(io::Base.AbstractPipe, arg::AbstractString; kw...) = readuntil(Base.pipe_reader(io), arg; kw...)
readuntil(io::Base.AbstractPipe, arg::AbstractVector; kw...) = readuntil(Base.pipe_reader(io), arg; kw...)

# readuntil_string is useful below since it has
# an optimized method for s::IOStream
readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))

function readuntil(s::IO, delim::Char; keep::Bool=false)
if delim '\x7f'
return readuntil_string(s, delim % UInt8, keep)
end
out = IOBuffer()
while !eof(s)
c = read(s, Char)
if c == delim
keep && write(out, c)
break
end
write(out, c)
end
return String(take!(out))
end

function readuntil(s::IO, delim::T; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
while !eof(s)
c = read(s, T)
if c == delim
keep && push!(out, c)
break
end
push!(out, c)
end
return out
end

readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), filename)

# requires that indices for target are the integer unit range from firstindex to lastindex
# returns whether the delimiter was matched
# uses the Knuth–Morris–Pratt_algorithm, with the first and second cache entries unrolled
# For longer targets, the cache improves the big-O efficiency of scanning of sequences
# with repeated patterns
# Each cache entry tells us which index we should start the search at.
# We assume this is unlikely, so we only lazy-initialize as much of the cache as we need to use
# When we allocate the cache, we initialize it to 0 (and offset by the first index afterwards).
# Suppose target is:
# Index: 1245689
# Value: "aδcaδcx"
# We would set the cache to
# 0 0 0 1 2 3 4 0
# So after if we mismatch after the second aδc sequence,
# we can immediately jump back to index 5 (4 + 1).
function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) where {T}
first = firstindex(target)
last = lastindex(target)
len = last - first + 1
if len < 1
return true
end
pos = 0 # array-offset
max_pos = 1 # array-offset in cache
cache = Int[] # changed from Base code
output! = (isa(out, IO) ? write : push!)
while !eof(io)
c = read(io, T)
# Backtrack until the next target character matches what was found
while true
c1 = target[pos + first]
if c == c1
pos += 1
break
elseif pos == 0
break
elseif pos == 1
if !keep
output!(out, target[first])
end
pos = 0
else
# grow cache to contain up to `pos` (changed from Base code)
if isempty(cache)
resize!(cache, len)
fill!(cache, 0)
end
while max_pos < pos
ci = target[max_pos + first]
b = max_pos
max_pos += 1
while b != 0
b = cache[b]
cb = target[b + first]
if ci == cb
cache[max_pos] = b + 1
break
end
end
end
# read new position from cache
pos1 = cache[pos]
if !keep
# and add the removed prefix from the target to the output
# if not always keeping the match
for b in 1:(pos - pos1)
output!(out, target[b - 1 + first])
end
end
pos = pos1
end
end
if keep || pos == 0
output!(out, c)
end
pos == len && return true
end
if !keep
# failed early without finishing the match,
# add the partial match to the output
# if not always keeping the match
for b in 1:pos
output!(out, target[b - 1 + first])
end
end
return false
end

function readuntil(io::IO, target::AbstractString; keep::Bool=false)
# small-string target optimizations
isempty(target) && return ""
c, rest = first(target), Iterators.drop(target, 1)
if isempty(rest) && c <= '\x7f'
return readuntil_string(io, c % UInt8, keep)
end
# convert String to a utf8-byte-iterator
if !(target isa String) && !(target isa SubString{String})
target = String(target)
end
target = codeunits(target)::AbstractVector
return String(readuntil(io, target, keep=keep))
end

function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
out = (T === UInt8 ? StringVector(0) : Vector{T}())
readuntil_vector!(io, target, keep, out)
return out
end
end

# https://github.com/JuliaLang/julia/pull/18727
Expand Down
33 changes: 33 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,39 @@ end
@test collect(Compat.eachline(IOBuffer("x\ny"), keep=false)) == ["x", "y"]
@test collect(Compat.eachline(IOBuffer("x\ny"), keep=true)) == ["x\n", "y"]

# PR 25646
for (t, s, m, kept) in [
("a", "ab", "a", "a"),
("b", "ab", "b", "b"),
("α", "αγ", "α", "α"),
("ab", "abc", "ab", "ab"),
("bc", "abc", "bc", "bc"),
("αβ", "αβγ", "αβ", "αβ"),
("aaabc", "ab", "aa", "aaab"),
("aaabc", "ac", "aaabc", "aaabc"),
("aaabc", "aab", "a", "aaab"),
("aaabc", "aac", "aaabc", "aaabc"),
("αααβγ", "αβ", "αα", "αααβ"),
("αααβγ", "ααβ", "α", "αααβ"),
("αααβγ", "αγ", "αααβγ", "αααβγ"),
("barbarbarians", "barbarian", "bar", "barbarbarian"),
("abcaabcaabcxl", "abcaabcx", "abca", "abcaabcaabcx"),
("abbaabbaabbabbaax", "abbaabbabbaax", "abba", "abbaabbaabbabbaax"),
("abbaabbabbaabbaabbabbaax", "abbaabbabbaax", "abbaabbabba", "abbaabbabbaabbaabbabbaax"),
]
local t, s, m, kept
@test Compat.readuntil(IOBuffer(t), s) == m
@test Compat.readuntil(IOBuffer(t), s, keep=true) == kept
@test Compat.readuntil(IOBuffer(t), SubString(s, firstindex(s))) == m
@test Compat.readuntil(IOBuffer(t), SubString(s, firstindex(s)), keep=true) == kept
@test Compat.readuntil(IOBuffer(t), GenericString(s)) == m
@test Compat.readuntil(IOBuffer(t), GenericString(s), keep=true) == kept
@test Compat.readuntil(IOBuffer(t), Vector{UInt8}(codeunits(s))) == Vector{UInt8}(codeunits(m))
@test Compat.readuntil(IOBuffer(t), Vector{UInt8}(codeunits(s)), keep=true) == Vector{UInt8}(codeunits(kept))
@test Compat.readuntil(IOBuffer(t), collect(s)::Vector{Char}) == Vector{Char}(m)
@test Compat.readuntil(IOBuffer(t), collect(s)::Vector{Char}, keep=true) == Vector{Char}(kept)
end

# PR 18727
let
iset = Set([17, 4711])
Expand Down

0 comments on commit 588c846

Please sign in to comment.