From 74bb12c8254b8fb9684e7395e0409084542e3a01 Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Fri, 16 Jan 2026 10:47:18 +0100
Subject: [PATCH] Make the ripper shim work with rdoc

The filter class is a 1:1 copy of ruby.

rdoc has 32 test failures. It seems to expect `on_sp` in some cases to render code as written.
---
 lib/prism/translation/ripper.rb        |  1 +
 lib/prism/translation/ripper/filter.rb | 53 ++++++++++++++++++++++++++
 lib/prism/translation/ripper/lexer.rb  | 16 +++-----
 prism.gemspec                          |  1 +
 rakelib/typecheck.rake                 |  1 +
 test/prism/ruby/ripper_test.rb         | 32 +++++++++++-----
 6 files changed, 84 insertions(+), 20 deletions(-)
 create mode 100644 lib/prism/translation/ripper/filter.rb

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index a901a72692..f70eb889dd 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -424,6 +424,7 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false)
         end
       end
 
+      autoload :Filter, "prism/translation/ripper/filter"
       autoload :Lexer, "prism/translation/ripper/lexer"
       autoload :SexpBuilder, "prism/translation/ripper/sexp"
       autoload :SexpBuilderPP, "prism/translation/ripper/sexp"
diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb
new file mode 100644
index 0000000000..19deef2d37
--- /dev/null
+++ b/lib/prism/translation/ripper/filter.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Prism
+  module Translation
+    class Ripper
+      class Filter # :nodoc:
+        # :stopdoc:
+        def initialize(src, filename = '-', lineno = 1)
+          @__lexer = Lexer.new(src, filename, lineno)
+          @__line = nil
+          @__col = nil
+          @__state = nil
+        end
+
+        def filename
+          @__lexer.filename
+        end
+
+        def lineno
+          @__line
+        end
+
+        def column
+          @__col
+        end
+
+        def state
+          @__state
+        end
+
+        def parse(init = nil)
+          data = init
+          @__lexer.lex.each do |pos, event, tok, state|
+            @__line, @__col = *pos
+            @__state = state
+            data = if respond_to?(event, true)
+                  then __send__(event, tok, data)
+                  else on_default(event, tok, data)
+                  end
+          end
+          data
+        end
+
+        private
+
+        def on_default(event, token, data)
+          data
+        end
+        # :startdoc:
+      end
+    end
+  end
+end
diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb
index 787181b5a7..bd40fb4c5a 100644
--- a/lib/prism/translation/ripper/lexer.rb
+++ b/lib/prism/translation/ripper/lexer.rb
@@ -100,21 +100,17 @@ def to_a
           end
         end
 
-        def initialize(...)
-          super
-          @lex_compat = Prism.lex_compat(@source, filepath: filename, line: lineno)
+        # Pretty much just the same as Prism.lex_compat.
+        def lex(raise_errors: false)
+          Ripper.lex(@source, filename, lineno, raise_errors: raise_errors)
         end
 
         # Returns the lex_compat result wrapped in `Elem`. Errors are omitted.
         # Since ripper is a streaming parser, tokens are expected to be emitted in the order
         # that the parser encounters them. This is not implemented.
-        def parse(raise_errors: false)
-          if @lex_compat.failure? && raise_errors
-            raise SyntaxError, @lex_compat.errors.first.message
-          else
-            @lex_compat.value.map do |position, event, token, state|
-              Elem.new(position, event, token, state.to_int)
-            end
+        def parse(...)
+          lex(...).map do |position, event, token, state|
+            Elem.new(position, event, token, state.to_int)
           end
         end
 
diff --git a/prism.gemspec b/prism.gemspec
index 463387e55c..283c7b04aa 100644
--- a/prism.gemspec
+++ b/prism.gemspec
@@ -104,6 +104,7 @@ Gem::Specification.new do |spec|
     "lib/prism/translation/parser/compiler.rb",
     "lib/prism/translation/parser/lexer.rb",
     "lib/prism/translation/ripper.rb",
+    "lib/prism/translation/ripper/filter.rb",
     "lib/prism/translation/ripper/lexer.rb",
     "lib/prism/translation/ripper/sexp.rb",
     "lib/prism/translation/ripper/shim.rb",
diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake
index 439af9a8fa..67e2d4ed56 100644
--- a/rakelib/typecheck.rake
+++ b/rakelib/typecheck.rake
@@ -26,6 +26,7 @@ namespace :typecheck do
         - ./lib/prism/visitor.rb
         - ./lib/prism/translation/parser/lexer.rb
         - ./lib/prism/translation/ripper.rb
+        - ./lib/prism/translation/ripper/filter.rb
         - ./lib/prism/translation/ripper/lexer.rb
         - ./lib/prism/translation/ripper/sexp.rb
         - ./lib/prism/translation/ruby_parser.rb
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 2bd9c2fe4a..8f7ffa45bf 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -59,7 +59,7 @@ class RipperTest < TestCase
       "whitequark/slash_newline_in_heredocs.txt"
     ]
 
-    omitted_lexer_parse = [
+    omitted_lex = [
       "comments.txt",
       "heredoc_percent_q_newline_delimiter.txt",
       "heredoc_with_escaped_newline_at_start.txt",
@@ -80,8 +80,20 @@ class RipperTest < TestCase
       define_method("#{fixture.test_name}_sexp_raw") { assert_ripper_sexp_raw(fixture.read) }
     end
 
-    Fixture.each_for_current_ruby(except: incorrect | omitted_lexer_parse) do |fixture|
-      define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) }
+    Fixture.each_for_current_ruby(except: incorrect | omitted_lex) do |fixture|
+      define_method("#{fixture.test_name}_lex") { assert_ripper_lex(fixture.read) }
+    end
+
+    def test_lexer
+      lexer = Translation::Ripper::Lexer.new("foo")
+      expected = [[1, 0], :on_ident, "foo", Translation::Ripper::EXPR_CMDARG]
+
+      assert_equal([expected], lexer.lex)
+      assert_equal(expected, lexer.parse[0].to_a)
+      assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a)
+
+      assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event))
+      assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) }
     end
 
     # Check that the hardcoded values don't change without us noticing.
@@ -101,15 +113,15 @@ def assert_ripper_sexp_raw(source)
       assert_equal Ripper.sexp_raw(source), Prism::Translation::Ripper.sexp_raw(source)
     end
 
-    def assert_ripper_lexer_parse(source)
-      prism = Translation::Ripper::Lexer.new(source).parse
-      ripper = Ripper::Lexer.new(source).parse
-      ripper.reject! { |elem| elem.event == :on_sp } # Prism doesn't emit on_sp
-      ripper.sort_by!(&:pos) # Prism emits tokens by their order in the code, not in parse order
+    def assert_ripper_lex(source)
+      prism = Translation::Ripper.lex(source)
+      ripper = Ripper.lex(source)
+      ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp
+      ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order
 
       [prism.size, ripper.size].max.times do |i|
-        expected = ripper[i].to_a
-        actual = prism[i].to_a
+        expected = ripper[i]
+        actual = prism[i]
         # Since tokens related to heredocs are not emitted in the same order,
         # the state also doesn't line up.
         if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end