From 74bb12c8254b8fb9684e7395e0409084542e3a01 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Fri, 16 Jan 2026 10:47:18 +0100 Subject: [PATCH] Make the ripper shim work with rdoc The filter class is a 1:1 copy of ruby. rdoc has 32 test failures. It seems to expect `on_sp` in some cases to render code as written. --- lib/prism/translation/ripper.rb | 1 + lib/prism/translation/ripper/filter.rb | 53 ++++++++++++++++++++++++++ lib/prism/translation/ripper/lexer.rb | 16 +++----- prism.gemspec | 1 + rakelib/typecheck.rake | 1 + test/prism/ruby/ripper_test.rb | 32 +++++++++++----- 6 files changed, 84 insertions(+), 20 deletions(-) create mode 100644 lib/prism/translation/ripper/filter.rb diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index a901a72692..f70eb889dd 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -424,6 +424,7 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) end end + autoload :Filter, "prism/translation/ripper/filter" autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb new file mode 100644 index 0000000000..19deef2d37 --- /dev/null +++ b/lib/prism/translation/ripper/filter.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Ripper + class Filter # :nodoc: + # :stopdoc: + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + def filename + @__lexer.filename + end + + def lineno + @__line + end + + def column + @__col + end + + def state + @__state + end + + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + def on_default(event, token, data) + data + end + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb index 787181b5a7..bd40fb4c5a 100644 --- a/lib/prism/translation/ripper/lexer.rb +++ b/lib/prism/translation/ripper/lexer.rb @@ -100,21 +100,17 @@ def to_a end end - def initialize(...) - super - @lex_compat = Prism.lex_compat(@source, filepath: filename, line: lineno) + # Pretty much just the same as Prism.lex_compat. + def lex(raise_errors: false) + Ripper.lex(@source, filename, lineno, raise_errors: raise_errors) end # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. # Since ripper is a streaming parser, tokens are expected to be emitted in the order # that the parser encounters them. This is not implemented. - def parse(raise_errors: false) - if @lex_compat.failure? && raise_errors - raise SyntaxError, @lex_compat.errors.first.message - else - @lex_compat.value.map do |position, event, token, state| - Elem.new(position, event, token, state.to_int) - end + def parse(...) + lex(...).map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) end end diff --git a/prism.gemspec b/prism.gemspec index 463387e55c..283c7b04aa 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -104,6 +104,7 @@ Gem::Specification.new do |spec| "lib/prism/translation/parser/compiler.rb", "lib/prism/translation/parser/lexer.rb", "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/filter.rb", "lib/prism/translation/ripper/lexer.rb", "lib/prism/translation/ripper/sexp.rb", "lib/prism/translation/ripper/shim.rb", diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake index 439af9a8fa..67e2d4ed56 100644 --- a/rakelib/typecheck.rake +++ b/rakelib/typecheck.rake @@ -26,6 +26,7 @@ namespace :typecheck do - ./lib/prism/visitor.rb - ./lib/prism/translation/parser/lexer.rb - ./lib/prism/translation/ripper.rb + - ./lib/prism/translation/ripper/filter.rb - ./lib/prism/translation/ripper/lexer.rb - ./lib/prism/translation/ripper/sexp.rb - ./lib/prism/translation/ruby_parser.rb diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index 2bd9c2fe4a..8f7ffa45bf 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -59,7 +59,7 @@ class RipperTest < TestCase "whitequark/slash_newline_in_heredocs.txt" ] - omitted_lexer_parse = [ + omitted_lex = [ "comments.txt", "heredoc_percent_q_newline_delimiter.txt", "heredoc_with_escaped_newline_at_start.txt", @@ -80,8 +80,20 @@ class RipperTest < TestCase define_method("#{fixture.test_name}_sexp_raw") { assert_ripper_sexp_raw(fixture.read) } end - Fixture.each_for_current_ruby(except: incorrect | omitted_lexer_parse) do |fixture| - define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) } + Fixture.each_for_current_ruby(except: incorrect | omitted_lex) do |fixture| + define_method("#{fixture.test_name}_lex") { assert_ripper_lex(fixture.read) } + end + + def test_lexer + lexer = Translation::Ripper::Lexer.new("foo") + expected = [[1, 0], :on_ident, "foo", Translation::Ripper::EXPR_CMDARG] + + assert_equal([expected], lexer.lex) + assert_equal(expected, lexer.parse[0].to_a) + assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) + + assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end # Check that the hardcoded values don't change without us noticing. @@ -101,15 +113,15 @@ def assert_ripper_sexp_raw(source) assert_equal Ripper.sexp_raw(source), Prism::Translation::Ripper.sexp_raw(source) end - def assert_ripper_lexer_parse(source) - prism = Translation::Ripper::Lexer.new(source).parse - ripper = Ripper::Lexer.new(source).parse - ripper.reject! { |elem| elem.event == :on_sp } # Prism doesn't emit on_sp - ripper.sort_by!(&:pos) # Prism emits tokens by their order in the code, not in parse order + def assert_ripper_lex(source) + prism = Translation::Ripper.lex(source) + ripper = Ripper.lex(source) + ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp + ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order [prism.size, ripper.size].max.times do |i| - expected = ripper[i].to_a - actual = prism[i].to_a + expected = ripper[i] + actual = prism[i] # Since tokens related to heredocs are not emitted in the same order, # the state also doesn't line up. if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end