diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 46f6130357..6cfcecd9e5 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -225,66 +225,6 @@ def state end end - # Ripper doesn't include the rest of the token in the event, so we need to - # trim it down to just the content on the first line when comparing. - class EndContentToken < Token - def ==(other) # :nodoc: - [self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other - end - end - - # Tokens where state should be ignored - # used for :on_comment, :on_heredoc_end, :on_embexpr_end - class IgnoreStateToken < Token - def ==(other) # :nodoc: - self[0...-1] == other[0...-1] - end - end - - # Ident tokens for the most part are exactly the same, except sometimes we - # know an ident is a local when ripper doesn't (when they are introduced - # through named captures in regular expressions). In that case we don't - # compare the state. - class IdentToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Translation::Ripper::EXPR_LABEL | Translation::Ripper::EXPR_END) || - (other[3] & (Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_CMDARG) != 0) - ) - end - end - - # Ignored newlines can occasionally have a LABEL state attached to them, so - # we compare the state differently here. - class IgnoredNewlineToken < Token - def ==(other) # :nodoc: - return false unless self[0...-1] == other[0...-1] - - if self[3] == Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED - other[3] & Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED != 0 - else - self[3] == other[3] - end - end - end - - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a parent - # scope named bar because it hasn't pushed the local table yet. We do this - # more accurately, so we need to allow comparing against both END and - # END|LABEL. - class ParamToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Translation::Ripper::EXPR_END) || - (other[3] == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL) - ) - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -679,42 +619,9 @@ def result token = case event - when :on___end__ - EndContentToken.new([[lineno, column], event, value, lex_state]) - when :on_comment - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) when :on_heredoc_end - # Heredoc end tokens can be emitted in an odd order, so we don't - # want to bother comparing the state on them. last_heredoc_end = token.location.end_offset - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ident - if lex_state == Translation::Ripper::EXPR_END - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a - # parent scope named bar because it hasn't pushed the local table - # yet. We do this more accurately, so we need to allow comparing - # against both END and END|LABEL. - ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL - # In the event that we're comparing identifiers, we're going to - # allow a little divergence. Ripper doesn't account for local - # variables introduced through named captures in regexes, and we - # do, which accounts for this difference. - IdentToken.new([[lineno, column], event, value, lex_state]) - else - Token.new([[lineno, column], event, value, lex_state]) - end - when :on_embexpr_end - IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ignored_nl - # Ignored newlines can occasionally have a LABEL state attached to - # them which doesn't actually impact anything. We don't mirror that - # state so we ignored it. - IgnoredNewlineToken.new([[lineno, column], event, value, lex_state]) + Token.new([[lineno, column], event, value, lex_state]) when :on_regexp_end # On regex end, Ripper scans and then sets end state, so the ripper # lexed output is begin, when it should be end. prism sets lex state diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb index 68e47a0964..bdf0366f2e 100644 --- a/test/prism/lex_test.rb +++ b/test/prism/lex_test.rb @@ -6,43 +6,6 @@ module Prism class LexTest < TestCase - except = [ - # https://bugs.ruby-lang.org/issues/21756 - "spanning_heredoc.txt", - # Prism emits a single string in some cases when ripper splits them up - "whitequark/dedenting_heredoc.txt", - "heredocs_with_fake_newlines.txt", - # Prism emits BEG for `on_regexp_end` - "spanning_heredoc_newlines.txt", - ] - - if RUBY_VERSION < "3.3.0" - # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if - # we're on an earlier version. - except << "seattlerb/pct_w_heredoc_interp_nested.txt" - - # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace - # characters in the heredoc start. - # Example: <<~' EOF' or <<-' EOF' - # https://bugs.ruby-lang.org/issues/19539 - except << "heredocs_leading_whitespace.txt" - except << "whitequark/ruby_bug_19539.txt" - - # https://bugs.ruby-lang.org/issues/19025 - except << "whitequark/numparam_ruby_bug_19025.txt" - # https://bugs.ruby-lang.org/issues/18878 - except << "whitequark/ruby_bug_18878.txt" - # https://bugs.ruby-lang.org/issues/19281 - except << "whitequark/ruby_bug_19281.txt" - end - - # https://bugs.ruby-lang.org/issues/21168#note-5 - except << "command_method_call_2.txt" - - Fixture.each_for_current_ruby(except: except) do |fixture| - define_method(fixture.test_name) { assert_lex(fixture) } - end - def test_lex_file assert_nothing_raised do Prism.lex_file(__FILE__) @@ -82,18 +45,5 @@ def test_parse_lex_file Prism.parse_lex_file(nil) end end - - private - - def assert_lex(fixture) - source = fixture.read - - result = Prism.lex_compat(source, version: "current") - assert_equal [], result.errors - - Prism.lex_ripper(source).zip(result.value).each do |(ripper, prism)| - assert_equal ripper, prism - end - end end end