Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 1 addition & 94 deletions lib/prism/lex_compat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -225,66 +225,6 @@ def state
end
end

# Ripper doesn't include the rest of the token in the event, so we need to
# trim it down to just the content on the first line when comparing.
class EndContentToken < Token
def ==(other) # :nodoc:
[self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
end
end

# Tokens where state should be ignored
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
class IgnoreStateToken < Token
def ==(other) # :nodoc:
self[0...-1] == other[0...-1]
end
end

# Ident tokens for the most part are exactly the same, except sometimes we
# know an ident is a local when ripper doesn't (when they are introduced
# through named captures in regular expressions). In that case we don't
# compare the state.
class IdentToken < Token
def ==(other) # :nodoc:
(self[0...-1] == other[0...-1]) && (
(other[3] == Translation::Ripper::EXPR_LABEL | Translation::Ripper::EXPR_END) ||
(other[3] & (Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_CMDARG) != 0)
)
end
end

# Ignored newlines can occasionally have a LABEL state attached to them, so
# we compare the state differently here.
class IgnoredNewlineToken < Token
def ==(other) # :nodoc:
return false unless self[0...-1] == other[0...-1]

if self[3] == Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED
other[3] & Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED != 0
else
self[3] == other[3]
end
end
end

# If we have an identifier that follows a method name like:
#
# def foo bar
#
# then Ripper will mark bar as END|LABEL if there is a local in a parent
# scope named bar because it hasn't pushed the local table yet. We do this
# more accurately, so we need to allow comparing against both END and
# END|LABEL.
class ParamToken < Token
def ==(other) # :nodoc:
(self[0...-1] == other[0...-1]) && (
(other[3] == Translation::Ripper::EXPR_END) ||
(other[3] == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL)
)
end
end

# A heredoc in this case is a list of tokens that belong to the body of the
# heredoc that should be appended onto the list of tokens when the heredoc
# closes.
Expand Down Expand Up @@ -679,42 +619,9 @@ def result

token =
case event
when :on___end__
EndContentToken.new([[lineno, column], event, value, lex_state])
when :on_comment
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
when :on_heredoc_end
# Heredoc end tokens can be emitted in an odd order, so we don't
# want to bother comparing the state on them.
last_heredoc_end = token.location.end_offset
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
when :on_ident
if lex_state == Translation::Ripper::EXPR_END
# If we have an identifier that follows a method name like:
#
# def foo bar
#
# then Ripper will mark bar as END|LABEL if there is a local in a
# parent scope named bar because it hasn't pushed the local table
# yet. We do this more accurately, so we need to allow comparing
# against both END and END|LABEL.
ParamToken.new([[lineno, column], event, value, lex_state])
elsif lex_state == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL
# In the event that we're comparing identifiers, we're going to
# allow a little divergence. Ripper doesn't account for local
# variables introduced through named captures in regexes, and we
# do, which accounts for this difference.
IdentToken.new([[lineno, column], event, value, lex_state])
else
Token.new([[lineno, column], event, value, lex_state])
end
when :on_embexpr_end
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
when :on_ignored_nl
# Ignored newlines can occasionally have a LABEL state attached to
# them which doesn't actually impact anything. We don't mirror that
# state so we ignored it.
IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
Token.new([[lineno, column], event, value, lex_state])
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. prism sets lex state
Expand Down
50 changes: 0 additions & 50 deletions test/prism/lex_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,6 @@

module Prism
class LexTest < TestCase
except = [
# https://bugs.ruby-lang.org/issues/21756
"spanning_heredoc.txt",
# Prism emits a single string in some cases when ripper splits them up
"whitequark/dedenting_heredoc.txt",
"heredocs_with_fake_newlines.txt",
# Prism emits BEG for `on_regexp_end`
"spanning_heredoc_newlines.txt",
]

if RUBY_VERSION < "3.3.0"
# This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
# we're on an earlier version.
except << "seattlerb/pct_w_heredoc_interp_nested.txt"

# Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
# characters in the heredoc start.
# Example: <<~' EOF' or <<-' EOF'
# https://bugs.ruby-lang.org/issues/19539
except << "heredocs_leading_whitespace.txt"
except << "whitequark/ruby_bug_19539.txt"

# https://bugs.ruby-lang.org/issues/19025
except << "whitequark/numparam_ruby_bug_19025.txt"
# https://bugs.ruby-lang.org/issues/18878
except << "whitequark/ruby_bug_18878.txt"
# https://bugs.ruby-lang.org/issues/19281
except << "whitequark/ruby_bug_19281.txt"
end

# https://bugs.ruby-lang.org/issues/21168#note-5
except << "command_method_call_2.txt"

Fixture.each_for_current_ruby(except: except) do |fixture|
define_method(fixture.test_name) { assert_lex(fixture) }
end

def test_lex_file
assert_nothing_raised do
Prism.lex_file(__FILE__)
Expand Down Expand Up @@ -82,18 +45,5 @@ def test_parse_lex_file
Prism.parse_lex_file(nil)
end
end

private

def assert_lex(fixture)
source = fixture.read

result = Prism.lex_compat(source, version: "current")
assert_equal [], result.errors

Prism.lex_ripper(source).zip(result.value).each do |(ripper, prism)|
assert_equal ripper, prism
end
end
end
end
Loading