From abd7e471a1a3c01845c6cf8979f4e156121d860c Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Mon, 3 Jun 2019 13:42:13 +0900 Subject: [PATCH 1/3] replace the :continue option with a #continue_lex method This has the following advantages: * reduces calls to #assert_utf8! (was previously called for every delegation!) * avoids the hash object creation * avoids the control coupling code smell of a special boolean parameter to avoid some parts of the processing. --- lib/rouge/lexer.rb | 23 +++++++++++++++++------ lib/rouge/lexers/console.rb | 4 ++-- lib/rouge/regex_lexer.rb | 10 +++++----- spec/lexers/markdown_spec.rb | 2 +- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/rouge/lexer.rb b/lib/rouge/lexer.rb index f32bfa88db..4bf50857f2 100644 --- a/lib/rouge/lexer.rb +++ b/lib/rouge/lexer.rb @@ -23,6 +23,14 @@ def lex(stream, opts={}, &b) new(opts).lex(stream, &b) end + # In case #continue_lex is called statically, we simply + # begin a new lex from the beginning, since there is no state. + # + # @see #continue_lex + def continue_lex(*a, &b) + lex(*a, &b) + end + # Given a name in string, return the correct lexer class. # @param [String] name # @return [Class,nil] @@ -405,15 +413,18 @@ def reset! # Given a string, yield [token, chunk] pairs. If no block is given, # an enumerator is returned. - # - # @option opts :continue - # Continue the lex from the previous state (i.e. don't call #reset!) - def lex(string, opts={}, &b) - return enum_for(:lex, string, opts) unless block_given? + def lex(string, &b) + return enum_for(:lex, string) unless block_given? Lexer.assert_utf8!(string) + reset! + + continue_lex(string, &b) + end - reset! unless opts[:continue] + # Continue the lex from the the current state without resetting + def continue_lex(string, &b) + return enum_for(:continue_lex, string, &b) unless block_given? # consolidate consecutive tokens of the same type last_token = nil diff --git a/lib/rouge/lexers/console.rb b/lib/rouge/lexers/console.rb index 30ab082ca9..e7fbaec6ee 100644 --- a/lib/rouge/lexers/console.rb +++ b/lib/rouge/lexers/console.rb @@ -118,7 +118,7 @@ def process_line(input, &output) $' =~ /\A\s*/ yield Text, $& unless $&.empty? - lang_lexer.lex($', continue: true, &output) + lang_lexer.continue_lex($', &output) elsif comment_regex =~ input[0].strip puts "console: matched comment #{input[0].inspect}" if @debug output_lexer.reset! @@ -129,7 +129,7 @@ def process_line(input, &output) puts "console: matched output #{input[0].inspect}" if @debug lang_lexer.reset! - output_lexer.lex(input[0], continue: true, &output) + output_lexer.continue_lex(input[0], &output) end end end diff --git a/lib/rouge/regex_lexer.rb b/lib/rouge/regex_lexer.rb index 1f5157e8ad..8288a2ade4 100644 --- a/lib/rouge/regex_lexer.rb +++ b/lib/rouge/regex_lexer.rb @@ -352,10 +352,10 @@ def groups(*tokens) end end - # Delegate the lex to another lexer. The #lex method will be called - # with `:continue` set to true, so that #reset! will not be called. - # In this way, a single lexer can be repeatedly delegated to while - # maintaining its own internal state stack. + # Delegate the lex to another lexer. We use the `continue_lex` method + # so that #reset! will not be called. In this way, a single lexer + # can be repeatedly delegated to while maintaining its own internal + # state stack. # # @param [#lex] lexer # The lexer or lexer class to delegate to @@ -365,7 +365,7 @@ def delegate(lexer, text=nil) puts " delegating to #{lexer.inspect}" if @debug text ||= @current_stream[0] - lexer.lex(text, :continue => true) do |tok, val| + lexer.continue_lex(text) do |tok, val| puts " delegated token: #{tok.inspect}, #{val.inspect}" if @debug yield_token(tok, val) end diff --git a/spec/lexers/markdown_spec.rb b/spec/lexers/markdown_spec.rb index fbc53dfeb3..33a67e913f 100644 --- a/spec/lexers/markdown_spec.rb +++ b/spec/lexers/markdown_spec.rb @@ -31,7 +31,7 @@ it 'recognizes code block when lexer is continued' do subject.lex("```ruby\n").to_a - actual = subject.lex("@foo\n```\n",continue: true).map { |token, value| [ token.qualname, value ] } + actual = subject.continue_lex("@foo\n```\n").map { |token, value| [ token.qualname, value ] } assert { ["Name.Variable.Instance", "@foo"] == actual.first } end From 4de367d1b8ff400ad535c7ec2e423f1e60a8e688 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Mon, 3 Jun 2019 17:15:07 +0900 Subject: [PATCH 2/3] properly deprecate the options hash for Formatter#lex --- lib/rouge/lexer.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/rouge/lexer.rb b/lib/rouge/lexer.rb index 4bf50857f2..87b04e0205 100644 --- a/lib/rouge/lexer.rb +++ b/lib/rouge/lexer.rb @@ -413,7 +413,11 @@ def reset! # Given a string, yield [token, chunk] pairs. If no block is given, # an enumerator is returned. - def lex(string, &b) + def lex(string, opts=nil, &b) + if opts + warn 'the :continue option to Formatter#lex is deprecated, use #continue_lex instead.' + end + return enum_for(:lex, string) unless block_given? Lexer.assert_utf8!(string) From becb9fba3feed5950c786b97b7bd34637e02aeb1 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Mon, 3 Jun 2019 17:48:38 +0900 Subject: [PATCH 3/3] properly default to old behavior when warning about :continue --- lib/rouge/lexer.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/rouge/lexer.rb b/lib/rouge/lexer.rb index 87b04e0205..ce79d65e0c 100644 --- a/lib/rouge/lexer.rb +++ b/lib/rouge/lexer.rb @@ -416,6 +416,7 @@ def reset! def lex(string, opts=nil, &b) if opts warn 'the :continue option to Formatter#lex is deprecated, use #continue_lex instead.' + return continue_lex(string, &b) end return enum_for(:lex, string) unless block_given?