Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Char::Reader#current_char?, #next_char?, #previous_char? #14012

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions spec/std/char/reader_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,55 @@ describe "Char::Reader" do
reader.current_char.should eq('語')
end

it "#current_char?" do
reader = Char::Reader.new("há日本語")
reader.current_char?.should eq('h')
reader.next_char
reader.current_char?.should eq('á')
reader.next_char
reader.current_char?.should eq('日')
reader.next_char
reader.current_char?.should eq('本')
reader.next_char
reader.current_char?.should eq('語')
reader.next_char
reader.current_char?.should be_nil
reader.previous_char
reader.current_char?.should eq('語')
end

it "#next_char?" do
reader = Char::Reader.new("há日本語")
reader.next_char?.should eq('á')
reader.pos.should eq(1)
reader.next_char?.should eq('日')
reader.pos.should eq(3)
reader.next_char?.should eq('本')
reader.pos.should eq(6)
reader.next_char?.should eq('語')
reader.pos.should eq(9)
reader.next_char?.should be_nil
reader.pos.should eq(12)
reader.next_char?.should be_nil
reader.pos.should eq(12)
end

it "#previous_char?" do
reader = Char::Reader.new("há日本語", pos: 12)
reader.previous_char?.should eq('語')
reader.pos.should eq(9)
reader.previous_char?.should eq('本')
reader.pos.should eq(6)
reader.previous_char?.should eq('日')
reader.pos.should eq(3)
reader.previous_char?.should eq('á')
reader.pos.should eq(1)
reader.previous_char?.should eq('h')
reader.pos.should eq(0)
reader.previous_char?.should be_nil
reader.pos.should eq(0)
end

it "errors if 0x80 <= first_byte < 0xC2" do
assert_invalid_byte_sequence Bytes[0x80]
assert_invalid_byte_sequence Bytes[0xC1]
Expand Down
119 changes: 93 additions & 26 deletions src/char/reader.cr
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ struct Char
# Returns the reader's String.
getter string : String

# Returns the current character.
# Returns the current character, or `'\0'` if the reader is at the end of
# the string.
#
# ```
# reader = Char::Reader.new("ab")
# reader.current_char # => 'a'
# reader.next_char
# reader.current_char # => 'b'
# reader.next_char
# reader.current_char # => '\0'
# ```
getter current_char : Char

Expand All @@ -37,7 +40,7 @@ struct Char
# ```
getter current_char_width : Int32

# Returns the position of the current character.
# Returns the byte position of the current character.
#
# ```
# reader = Char::Reader.new("ab")
Expand Down Expand Up @@ -71,40 +74,81 @@ struct Char
decode_previous_char
end

# Returns `true` if there is a character left to read.
# The terminating byte `'\0'` is considered a valid character
# by this method.
# Returns the current character.
#
# Returns `nil` if the reader is at the end of the string.
def current_char? : Char?
if has_next?
current_char
end
end

# Returns `true` if the reader is not at the end of the string.
#
# NOTE: This only means `#next_char` will successfully increment `#pos`; if
# the reader is already at the last character, `#next_char` will return the
# terminating null byte because there isn't really a next character.
#
# ```
# reader = Char::Reader.new("a")
# reader.has_next? # => true
# reader.peek_next_char # => '\0'
# reader = Char::Reader.new("ab")
# reader.has_next? # => true
# reader.next_char # => 'b'
# reader.has_next? # => true
# reader.next_char # => '\0'
# reader.has_next? # => false
# ```
def has_next? : Bool
@pos < @string.bytesize
end

# Reads the next character in the string,
# `#pos` is incremented. Raises `IndexError` if the reader is
# at the end of the `#string`.
# Tries to read the next character in the string.
#
# If the reader is at the end of the string before or after incrementing
# `#pos`, returns `nil`.
#
# ```
# reader = Char::Reader.new("ab")
# reader = Char::Reader.new("abc")
# reader.next_char? # => 'b'
# reader.next_char? # => 'c'
# reader.next_char? # => nil
# reader.current_char # => '\0'
# ```
def next_char? : Char?
next_pos = @pos + @current_char_width
if next_pos <= @string.bytesize
@pos = next_pos
decode_current_char
current_char?
end
end

# Reads the next character in the string.
#
# If the reader is at the end of the string after incrementing `#pos`,
# returns `'\0'`. If the reader is already at the end beforehand, raises
# `IndexError`.
#
# ```
# reader = Char::Reader.new("abc")
# reader.next_char # => 'b'
# reader.next_char # => 'c'
# reader.next_char # => '\0'
# reader.next_char # raise IndexError
# ```
def next_char : Char
@pos += @current_char_width
if @pos > @string.bytesize
next_pos = @pos + @current_char_width
if next_pos <= @string.bytesize
@pos = next_pos
decode_current_char
else
raise IndexError.new
end

decode_current_char
end

# Returns the next character in the `#string`
# without incrementing `#pos`.
# Raises `IndexError` if the reader is at
# the end of the `#string`.
# Returns the next character in the `#string` without incrementing `#pos`.
#
# Returns `'\0'` if the reader is at the last character of the string.
# Raises `IndexError` if the reader is at the end.
#
# ```
# reader = Char::Reader.new("ab")
Expand All @@ -123,16 +167,39 @@ struct Char
end
end

# Returns `true` if there are characters before
# the current one.
# Returns `true` if the reader is not at the beginning of the string.
def has_previous? : Bool
@pos > 0
end

# Returns the previous character, `#pos`
# is decremented.
# Raises `IndexError` if the reader is at the beginning of
# the `#string`
# Tries to read the previous character in the string.
#
# Returns `nil` if the reader is already at the beginning of the string.
# Otherwise decrements `#pos`.
#
# ```
# reader = Char::Reader.new(at_end: "abc")
# reader.previous_char? # => 'b'
# reader.previous_char? # => 'a'
# reader.previous_char? # => nil
# ```
def previous_char? : Char?
if has_previous?
decode_previous_char
end
end

# Reads the previous character in the string.
#
# Raises `IndexError` if the reader is already at the beginning of the
# string. Otherwise decrements `#pos`.
#
# ```
# reader = Char::Reader.new(at_end: "abc")
# reader.previous_char # => 'b'
# reader.previous_char # => 'a'
# reader.previous_char # raises IndexError
# ```
def previous_char : Char
unless has_previous?
raise IndexError.new
Expand Down
Loading