From c45bdd30b003d95ac903344e6638473cab2b43ad Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 11:43:17 +0200
Subject: [PATCH 01/12] fix(kernel): emit 1-based UTF-16 code-unit columns from
 tree-sitter nodes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Introduced `LineColumnIndex` in `crates/common/src/utils/position_utils.rs`, a pre-built per-source index that converts tree-sitter's 0-based UTF-8 byte columns to 1-based UTF-16 code-unit columns, with an ASCII fast path and full unit-test coverage (BMP, supplementary-plane emoji, CJK, combining marks, CRLF, empty lines).
- Documented `Position.col` as a 1-based UTF-16 code-unit column in `crates/common/src/model/position.rs` to lock in the contract.
- Changed `map_node` and `get_query_nodes` (`tree_sitter.rs`) to accept and use `&LineColumnIndex`; updated `process_tree_sitter_tree_request` (`tree_sitter_tree.rs`) accordingly.
- Replaced `TreeSitterNode::from_ts_node` with `from_ts_node_with_index`, threading `LineColumnIndex` through the full ddsa call chain: `JsRuntime::execute_rule_internal` → `QueryMatchBridge::set_data` → `TsNodeBridge::insert`/`insert_capture`/`build_v8_node`.
- Updated the two taint-flow region builders (`LocatedNode::new_cst` in `flow/graph.rs`, `position_eq` helper in `flow/java.rs`) and the deno ops (`op_ts_node_named_children`, `op_ts_node_parent`, `op_digraph_adjacency_list_to_dot`) to use the same helper.
- Moved the `ctx_bridge` borrow in `op_ts_node_named_children` into the `else` branch where it is actually needed.
- Added multibyte regression tests across all layers: `LineColumnIndex` unit matrix, `map_node`/`get_query_nodes` emoji+CJK tests, `TsNodeBridge` bridge test, `LocatedNode` regression test, `stella_compat::getCode` JS execution test, server integration test on base64-encoded multibyte Python, and SARIF/CSV serialization assertions.

IDE-6037
---
 crates/cli/src/csv.rs                         |  35 ++++
 crates/cli/src/sarif/sarif_utils.rs           |  84 +++++++++
 crates/common/src/model/position.rs           |  12 ++
 crates/common/src/utils/position_utils.rs     | 176 ++++++++++++++++++
 .../analysis/ddsa_lib/bridge/query_match.rs   |  23 ++-
 .../src/analysis/ddsa_lib/bridge/ts_node.rs   |  98 +++++++---
 .../src/analysis/ddsa_lib/js/flow/graph.rs    |  71 ++++++-
 .../ddsa_lib/js/flow/graph_test_utils.rs      |   2 +-
 .../src/analysis/ddsa_lib/js/flow/java.rs     |  29 ++-
 .../src/analysis/ddsa_lib/js/ts_node.rs       | 105 +++++++++--
 .../src/analysis/ddsa_lib/ops.rs              |  19 +-
 .../src/analysis/ddsa_lib/runtime.rs          |  44 ++++-
 .../src/analysis/ddsa_lib/test_utils.rs       |   5 +
 .../src/analysis/tree_sitter.rs               | 146 +++++++++++++--
 .../src/tree_sitter_tree.rs                   |  51 ++++-
 15 files changed, 819 insertions(+), 81 deletions(-)

diff --git a/crates/cli/src/csv.rs b/crates/cli/src/csv.rs
index b3da79d4f..8b60c5a6a 100644
--- a/crates/cli/src/csv.rs
+++ b/crates/cli/src/csv.rs
@@ -97,4 +97,39 @@ mod tests {
         );
         assert_eq!(res_with_result, "filename,rule,category,severity,message,start_line,start_col,end_line,end_col\nfilename,myrule,performance,error,message,10,12,12,10\n");
     }
+
+    /// The CSV output faithfully serializes whatever col value is stored in the violation.
+    /// When the kernel produces UTF-16 columns (e.g. col 3 for a node after "🚀"), the CSV row
+    /// contains 3, not the raw byte col (5).
+    #[test]
+    fn test_export_csv_multibyte_col() {
+        let res = generate_csv_results(
+            &vec![RuleResult {
+                rule_name: "myrule".to_string(),
+                filename: "file.py".to_string(),
+                violations: vec![Violation {
+                    // Simulates a violation whose `col` was computed as UTF-16 (e.g. after 🚀).
+                    start: common::model::position::Position { line: 1, col: 3 },
+                    end: common::model::position::Position { line: 1, col: 16 },
+                    message: "multibyte".to_string(),
+                    severity: RuleSeverity::Error,
+                    category: RuleCategory::Security,
+                    fixes: vec![],
+                    taint_flow: None,
+                    is_suppressed: false,
+                }],
+                errors: vec![],
+                execution_error: None,
+                output: None,
+                execution_time_ms: 1,
+                query_node_time_ms: 0,
+                parsing_time_ms: 0,
+            }],
+            &vec![],
+        );
+        assert!(
+            res.contains(",1,3,1,16\n"),
+            "CSV should contain UTF-16 col 3; got: {res}"
+        );
+    }
 }
diff --git a/crates/cli/src/sarif/sarif_utils.rs b/crates/cli/src/sarif/sarif_utils.rs
index 22cf71f19..1bc844511 100644
--- a/crates/cli/src/sarif/sarif_utils.rs
+++ b/crates/cli/src/sarif/sarif_utils.rs
@@ -2231,4 +2231,88 @@ mod tests {
         let sarif_json = serde_json::to_value(sarif_report).unwrap();
         assert!(validate_data(&sarif_json));
     }
+
+    /// The SARIF `region.startColumn` / `region.endColumn` fields carry whatever value is in
+    /// `Position.col`.  When the kernel emits UTF-16 columns (as it now does), the SARIF output
+    /// automatically conforms to the SARIF v2.1 default encoding (UTF-16).
+    ///
+    /// This test constructs a violation whose col was computed as UTF-16 (e.g. after an emoji on
+    /// the same line, col 3) and asserts the SARIF JSON contains that value.
+    #[test]
+    fn sarif_region_carries_utf16_col() {
+        let rule = RuleBuilder::default()
+            .name("utf16-col-rule".to_string())
+            .description_base64(None)
+            .language(Language::Python)
+            .checksum("x".to_string())
+            .pattern(None)
+            .tree_sitter_query_base64(None)
+            .category(RuleCategory::Security)
+            .code_base64(String::new())
+            .short_description_base64(None)
+            .entity_checked(None)
+            .rule_type(RuleType::TreeSitterQuery)
+            .severity(RuleSeverity::Error)
+            .cwe(None)
+            .arguments(vec![])
+            .tests(vec![])
+            .is_testing(false)
+            .documentation_url(None)
+            .build()
+            .unwrap();
+        let sarif_rule = SarifRule::StaticAnalysis(Box::new(rule));
+
+        // UTF-16 col 3 = position of an identifier after 🚀 on the same line.
+        let violation = Violation {
+            start: Position::new(1, 3),
+            end: Position::new(1, 16),
+            message: "utf16 col test".to_string(),
+            severity: RuleSeverity::Error,
+            category: RuleCategory::Security,
+            fixes: vec![],
+            taint_flow: None,
+            is_suppressed: false,
+        };
+        let rule_result = RuleResultBuilder::default()
+            .rule_name("utf16-col-rule".to_string())
+            .filename("file.py".to_string())
+            .violations(vec![violation])
+            .errors(vec![])
+            .execution_error(None)
+            .output(None)
+            .execution_time_ms(0u128)
+            .parsing_time_ms(0u128)
+            .query_node_time_ms(0u128)
+            .build()
+            .unwrap();
+        let sarif_result = SarifRuleResult::try_from(rule_result).unwrap();
+
+        let directory = ".".to_string();
+        let sarif = generate_sarif_report(
+            &[sarif_rule],
+            &[sarif_result],
+            &directory,
+            SarifReportMetadata {
+                add_git_info: false,
+                debug: false,
+                config_digest: String::new(),
+                diff_aware_parameters: None,
+                execution_time_secs: 0,
+            },
+            &Default::default(),
+        )
+        .expect("generate sarif");
+
+        let sarif_json = serde_json::to_value(&sarif).unwrap();
+        let location =
+            &sarif_json["runs"][0]["results"][0]["locations"][0]["physicalLocation"]["region"];
+        assert_eq!(
+            location["startColumn"], 3,
+            "SARIF startColumn should be UTF-16 col 3"
+        );
+        assert_eq!(
+            location["endColumn"], 16,
+            "SARIF endColumn should be UTF-16 col 16"
+        );
+    }
 }
diff --git a/crates/common/src/model/position.rs b/crates/common/src/model/position.rs
index d7aa3d351..0d85c26db 100644
--- a/crates/common/src/model/position.rs
+++ b/crates/common/src/model/position.rs
@@ -6,6 +6,18 @@ use derive_builder::Builder;
 use serde::{Deserialize, Serialize};
 use std::fmt;
 
+/// A source-code position.
+///
+/// * `line` — 1-based line number.
+/// * `col` — 1-based **UTF-16 code-unit** column number for every position produced by the
+///   static-analysis kernel (tree-sitter path).  On ASCII-only lines, one UTF-16 code unit equals
+///   one byte, so the value is identical to what a byte-column would be.  For non-ASCII characters
+///   (CJK ideographs, emoji surrogate pairs, combining marks, etc.) the value reflects UTF-16
+///   semantics, which matches LSP / VS Code and the SARIF v2.1 default encoding.
+///
+///   The `get_position_in_string` helper (used by the secrets scanner) emits grapheme-based
+///   columns under a different contract; that path does **not** flow through `Position.col` in the
+///   kernel output.
 #[derive(Deserialize, Debug, Serialize, Clone, Copy, Builder, PartialEq, Eq, Hash)]
 pub struct Position {
     pub line: u32,
diff --git a/crates/common/src/utils/position_utils.rs b/crates/common/src/utils/position_utils.rs
index 4458abfca..6e85dfff0 100644
--- a/crates/common/src/utils/position_utils.rs
+++ b/crates/common/src/utils/position_utils.rs
@@ -2,6 +2,70 @@ use crate::model::position::Position;
 use bstr::BStr;
 use bstr::ByteSlice;
 
+/// Precomputed per-line index for fast repeated UTF-8 byte-column → UTF-16 code-unit column
+/// conversion.
+///
+/// Build once per source string with [`LineColumnIndex::new`], then call
+/// [`byte_col_to_utf16_col`] for every tree-sitter node on that source.
+pub struct LineColumnIndex<'a> {
+    source: &'a str,
+    /// Byte offset of the start of each line (0-indexed line number → byte offset of first byte
+    /// on that line).
+    line_starts: Vec<usize>,
+}
+
+impl<'a> LineColumnIndex<'a> {
+    /// Builds the index by scanning `source` for newline characters.
+    pub fn new(source: &'a str) -> Self {
+        let mut line_starts = vec![0usize];
+        for (i, b) in source.bytes().enumerate() {
+            // Intentionally split on `\n` only — this mirrors tree-sitter's line model exactly.
+            // Tree-sitter does not treat bare `\r` (old Mac OS 9) as a line terminator; for
+            // Windows `\r\n` files the `\r` is part of the column on the same line, matching
+            // tree-sitter's `Point.column` values. Using a broader splitter (e.g. `str::lines`)
+            // would diverge from tree-sitter and produce wrong UTF-16 columns.
+            if b == b'\n' {
+                line_starts.push(i + 1);
+            }
+        }
+        Self {
+            source,
+            line_starts,
+        }
+    }
+
+    /// Converts a tree-sitter 0-based `(row, byte_col)` point to a 1-based UTF-16 code-unit
+    /// column.
+    ///
+    /// **ASCII fast path**: when the line prefix up to `byte_col` is pure ASCII (all bytes
+    /// < 128), each byte is one UTF-16 code unit, so the result is simply `byte_col + 1`.
+    /// This path costs one slice-level `is_ascii()` check and is taken for the vast majority of
+    /// real-world source lines.
+    pub fn byte_col_to_utf16_col(&self, row: usize, byte_col: usize) -> u32 {
+        let line_start = self
+            .line_starts
+            .get(row)
+            .copied()
+            .unwrap_or(self.source.len());
+        // Slice from the line start to end-of-source (unbounded); the subsequent `prefix` slice
+        // clamps this to exactly `byte_col` bytes, so the trailing lines are never examined.
+        let source_from_line_start = &self.source.as_bytes()[line_start..];
+        // `byte_col` is the number of bytes from the line start to the position.
+        let prefix_len = byte_col.min(source_from_line_start.len());
+        let prefix = &source_from_line_start[..prefix_len];
+
+        // ASCII fast path: one byte == one UTF-16 code unit.
+        if prefix.is_ascii() {
+            return (byte_col + 1) as u32;
+        }
+
+        // Slow path: count UTF-16 code units emitted by each Unicode scalar.
+        let prefix_str = std::str::from_utf8(prefix).unwrap_or("");
+        let utf16_col: usize = prefix_str.chars().map(|c| c.len_utf16()).sum();
+        (utf16_col + 1) as u32
+    }
+}
+
 /// Get position of an offset in a code and return a [Position].
 pub fn get_position_in_string(content: &str, offset: usize) -> anyhow::Result<Position> {
     if offset >= content.len() {
@@ -137,4 +201,116 @@ mod tests {
             Position::new(3, 13)
         );
     }
+
+    // ── LineColumnIndex tests ─────────────────────────────────────────────────
+
+    #[test]
+    fn lci_ascii_fast_path() {
+        // Pure ASCII: UTF-16 col == byte col + 1 for every position.
+        let idx = LineColumnIndex::new("hello world\nfoo bar");
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1); // 'h' → col 1
+        assert_eq!(idx.byte_col_to_utf16_col(0, 5), 6); // ' ' → col 6
+        assert_eq!(idx.byte_col_to_utf16_col(1, 3), 4); // ' ' on line 2 → col 4
+    }
+
+    #[test]
+    fn lci_bmp_non_ascii() {
+        // 'é' is U+00E9: 2 UTF-8 bytes, 1 UTF-16 code unit.
+        // Source: "café\nend"  — bytes: c(0) a(1) f(2) é(3,4) \n(5)
+        let src = "caf\u{00E9}\nend";
+        let idx = LineColumnIndex::new(src);
+        // byte_col 3 (start of é) → prefix "caf" = 3 UTF-16 units → col 4
+        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 4);
+        // byte_col 5 (past é, 3 + 2 bytes) → prefix "café" = 4 UTF-16 units → col 5
+        assert_eq!(idx.byte_col_to_utf16_col(0, 5), 5);
+        // line 1 is ASCII
+        assert_eq!(idx.byte_col_to_utf16_col(1, 3), 4);
+    }
+
+    #[test]
+    fn lci_supplementary_plane_emoji() {
+        // '🚀' is U+1F680: 4 UTF-8 bytes, 2 UTF-16 code units (surrogate pair).
+        // Source: "x🚀y"
+        // byte positions: x(0) 🚀(1,2,3,4) y(5)
+        let src = "x\u{1F680}y";
+        let idx = LineColumnIndex::new(src);
+        // byte_col 0 → col 1 (before 'x')
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
+        // byte_col 1 → prefix = "x" → 1 UTF-16 unit → col 2
+        assert_eq!(idx.byte_col_to_utf16_col(0, 1), 2);
+        // byte_col 5 → prefix = "x🚀" → x(1) + 🚀(2) = 3 UTF-16 units → col 4
+        assert_eq!(idx.byte_col_to_utf16_col(0, 5), 4);
+        // byte_col 6 → prefix = "x🚀y" → 3+1 = 4 UTF-16 units → col 5
+        assert_eq!(idx.byte_col_to_utf16_col(0, 6), 5);
+    }
+
+    #[test]
+    fn lci_cjk() {
+        // '日' is U+65E5: 3 UTF-8 bytes, 1 UTF-16 code unit.
+        // Source: "日本"
+        // bytes: 日(0,1,2) 本(3,4,5)
+        let src = "\u{65E5}\u{672C}"; // 日本
+        let idx = LineColumnIndex::new(src);
+        // byte_col 0 → col 1
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
+        // byte_col 3 → prefix = "日" → 1 UTF-16 unit → col 2
+        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 2);
+        // byte_col 6 → prefix = "日本" → 2 UTF-16 units → col 3
+        assert_eq!(idx.byte_col_to_utf16_col(0, 6), 3);
+    }
+
+    #[test]
+    fn lci_combining_mark() {
+        // 'e' + U+0301 (combining acute accent): 2 codepoints, 2 UTF-16 code units, 1 grapheme.
+        // Source: "e\u{0301}x"
+        // bytes: e(0) \u{0301}(1,2) x(3)
+        let src = "e\u{0301}x";
+        let idx = LineColumnIndex::new(src);
+        // byte_col 0 → col 1
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
+        // byte_col 1 → prefix = "e" → 1 UTF-16 unit → col 2
+        assert_eq!(idx.byte_col_to_utf16_col(0, 1), 2);
+        // byte_col 3 → prefix = "e\u{0301}" → 2 UTF-16 units → col 3
+        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 3);
+        // byte_col 4 → prefix = "e\u{0301}x" → 3 UTF-16 units → col 4
+        assert_eq!(idx.byte_col_to_utf16_col(0, 4), 4);
+    }
+
+    #[test]
+    fn lci_crlf_line_endings() {
+        // CRLF: tree-sitter counts \n as the line terminator and the col includes \r.
+        // Source: "ab\r\ncd"
+        // Line 0 bytes: a(0) b(1) \r(2) \n(3) → line_start[1] = 4
+        // Line 1 bytes: c(0 rel) d(1 rel)
+        let src = "ab\r\ncd";
+        let idx = LineColumnIndex::new(src);
+        // Line 0
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
+        assert_eq!(idx.byte_col_to_utf16_col(0, 2), 3); // past "ab" → 2+1=3
+                                                        // Line 1
+        assert_eq!(idx.byte_col_to_utf16_col(1, 0), 1);
+        assert_eq!(idx.byte_col_to_utf16_col(1, 2), 3);
+    }
+
+    #[test]
+    fn lci_eol_boundary() {
+        // byte_col == 0 on the start of any line → col 1.
+        let src = "abc\ndef\nghi";
+        let idx = LineColumnIndex::new(src);
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
+        assert_eq!(idx.byte_col_to_utf16_col(1, 0), 1);
+        assert_eq!(idx.byte_col_to_utf16_col(2, 0), 1);
+        // byte_col at end of "abc" (3 bytes) → col 4
+        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 4);
+    }
+
+    #[test]
+    fn lci_empty_line() {
+        // An empty line: byte_col 0 → col 1.
+        let src = "\n\nfoo";
+        let idx = LineColumnIndex::new(src);
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
+        assert_eq!(idx.byte_col_to_utf16_col(1, 0), 1);
+        assert_eq!(idx.byte_col_to_utf16_col(2, 0), 1);
+    }
 }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
index c41e1e236..24054f3e7 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
@@ -7,6 +7,7 @@ use crate::analysis::ddsa_lib::common::{Class, DDSAJsRuntimeError, NodeId, Stell
 use crate::analysis::ddsa_lib::js;
 use crate::analysis::ddsa_lib::v8_ds::MirroredVec;
 use crate::analysis::tree_sitter::QueryMatch;
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::v8;
 use deno_core::v8::HandleScope;
 
@@ -33,6 +34,10 @@ impl QueryMatchBridge {
     /// Sets the bridge's data to the list of [`QueryMatch`]es, inserting tree-sitter nodes
     /// into the provided `TsNodeBridge`.
     ///
+    /// `idx` must be built from the same source string that produced the tree-sitter nodes in
+    /// `matches`; it is forwarded to [`TsNodeBridge::insert_capture`] so that `col` values
+    /// stored in v8 are 1-based UTF-16 code units.
+    ///
     /// NOTE: if the bridge had existing `QueryMatch`es, the tree-sitter nodes associated with them
     ///       will not be removed from the `TsNodeBridge`.
     pub fn set_data<'tree>(
@@ -40,6 +45,7 @@ impl QueryMatchBridge {
         scope: &mut HandleScope,
         matches: impl Into<Vec<QueryMatch<tree_sitter::Node<'tree>>>>,
         node_bridge: &mut TsNodeBridge,
+        idx: &LineColumnIndex<'_>,
     ) {
         let matches = matches.into();
         // Pass each node in via the bridge (assigning it an id), and use this id to transform
@@ -49,7 +55,7 @@ impl QueryMatchBridge {
             .map(|q_match| {
                 q_match
                     .into_iter()
-                    .map(|capture| node_bridge.insert_capture(scope, capture))
+                    .map(|capture| node_bridge.insert_capture(scope, capture, idx))
                     .collect::<Vec<_>>()
                     .into()
             })
@@ -89,6 +95,7 @@ mod tests {
     use crate::analysis::ddsa_lib::v8_ds::MirroredVec;
     use crate::analysis::tree_sitter::{get_tree, QueryMatch, TSCaptureContent, TSQuery};
     use crate::model::common::Language;
+    use common::utils::position_utils::LineColumnIndex;
     use deno_core::JsRuntime;
 
     fn setup_bridge() -> (JsRuntime, QueryMatchBridge, TsNodeBridge) {
@@ -125,9 +132,10 @@ const ghi = 'hello' + ' world';
 ";
         let query = TSQuery::try_new(&tree.language(), query).unwrap();
         let matches = query.cursor().matches(tree.root_node(), text, None);
+        let idx = LineColumnIndex::new(text);
         assert!(query_match_bridge.is_empty());
         assert!(ts_node_bridge.is_empty());
-        query_match_bridge.set_data(scope, matches.clone(), &mut ts_node_bridge);
+        query_match_bridge.set_data(scope, matches.clone(), &mut ts_node_bridge, &idx);
         assert_eq!(query_match_bridge.len(), 3);
         assert_eq!(ts_node_bridge.len(), 3);
 
@@ -139,16 +147,17 @@ const ghi = 'hello' + ' world';
         }
 
         // The `QueryMatchBridge` doesn't clear nodes from `TsNodeBridge` when values change.
-        query_match_bridge.set_data(scope, &matches[0..2], &mut ts_node_bridge);
+        query_match_bridge.set_data(scope, &matches[0..2], &mut ts_node_bridge, &idx);
         assert_eq!(query_match_bridge.len(), 2);
         assert_eq!(ts_node_bridge.len(), 3);
-        let text = "\
+        let text2 = "\
 // Arbitrary JavaScript that contains `identifier` CST nodes
 const alpha = 'bravo';
 ";
-        let tree = get_tree(text, &Language::JavaScript).unwrap();
-        let matches = query.cursor().matches(tree.root_node(), text, None);
-        query_match_bridge.set_data(scope, matches, &mut ts_node_bridge);
+        let tree2 = get_tree(text2, &Language::JavaScript).unwrap();
+        let matches2 = query.cursor().matches(tree2.root_node(), text2, None);
+        let idx2 = LineColumnIndex::new(text2);
+        query_match_bridge.set_data(scope, matches2, &mut ts_node_bridge, &idx2);
         assert_eq!(get_node_id_at_idx(&query_match_bridge, 0), 3);
         assert_eq!(ts_node_bridge.len(), 4);
     }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
index d2740856d..2f17fa07b 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
@@ -7,6 +7,7 @@ use crate::analysis::ddsa_lib::js::TreeSitterNode;
 use crate::analysis::ddsa_lib::v8_ds::MirroredIndexMap;
 use crate::analysis::ddsa_lib::{js, RawTSNode};
 use crate::analysis::tree_sitter::{TSCaptureContent, TSQueryCapture};
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::v8;
 use deno_core::v8::HandleScope;
 
@@ -39,7 +40,16 @@ impl TsNodeBridge {
     /// Inserts a tree-sitter node into the bridge, returning the `NodeId` it was assigned.
     ///
     /// If the node already existed in the bridge, the existing `NodeId` will be returned.
-    pub fn insert(&mut self, scope: &mut HandleScope, node: tree_sitter::Node) -> NodeId {
+    ///
+    /// `idx` must be built from the same source string that produced `node`; it is forwarded to
+    /// [`TreeSitterNode::from_ts_node_with_index`] so that `col` values are 1-based UTF-16
+    /// code units.
+    pub fn insert(
+        &mut self,
+        scope: &mut HandleScope,
+        node: tree_sitter::Node,
+        idx: &LineColumnIndex<'_>,
+    ) -> NodeId {
         let raw_ts_node = RawTSNode::new(node);
         if let Some((_, _, node_id)) = self.mirrored_im.get_full(&raw_ts_node) {
             *node_id
@@ -48,7 +58,7 @@ impl TsNodeBridge {
             // to the map's current length. Because we use this as the `NodeId`, we can pre-assign
             // this, knowing this invariant (index == NodeId) will hold.
             let node_id = self.mirrored_im.len() as NodeId;
-            let v8_node = self.build_v8_node(scope, node, node_id);
+            let v8_node = self.build_v8_node(scope, node, node_id, idx);
             self.mirrored_im
                 .insert_with(scope, raw_ts_node, node_id, |scope, _key, value| {
                     // In Rust, we map from `tree_sitter::Node` to `NodeId`.
@@ -76,14 +86,15 @@ impl TsNodeBridge {
             .map(|(raw_node, _)| raw_node)
     }
 
-    /// Serializes a [`tree_sitter::Node`] to v8.
+    /// Serializes a [`tree_sitter::Node`] to v8 using UTF-16 column values from `idx`.
     fn build_v8_node<'s>(
         &self,
         scope: &mut HandleScope<'s>,
         node: tree_sitter::Node,
         id: NodeId,
+        idx: &LineColumnIndex<'_>,
     ) -> v8::Local<'s, v8::Object> {
-        let ts_node = TreeSitterNode::<Instance>::from_ts_node(id, node);
+        let ts_node = TreeSitterNode::<Instance>::from_ts_node_with_index(id, node, idx);
         self.js_class.new_instance(scope, ts_node)
     }
 
@@ -108,24 +119,28 @@ impl TsNodeBridge {
         self.mirrored_im.as_local(scope)
     }
 
-    /// Inserts the nodes from a [`TSQueryCapture<tree_sitter::Node>`] into a v8 scope, consuming the `QueryCapture`.
-    /// Returns a transformed `QueryCapture` containing the ids of the inserted nodes.
+    /// Inserts the nodes from a [`TSQueryCapture<tree_sitter::Node>`] into a v8 scope, consuming
+    /// the `QueryCapture`. Returns a transformed `QueryCapture` containing the ids of the inserted
+    /// nodes.
+    ///
+    /// `idx` is forwarded to [`insert`](Self::insert) for UTF-16 column conversion.
     pub fn insert_capture(
         &mut self,
         scope: &mut HandleScope,
         capture: TSQueryCapture<tree_sitter::Node>,
+        idx: &LineColumnIndex<'_>,
     ) -> TSQueryCapture<NodeId> {
         TSQueryCapture::<NodeId> {
             name: capture.name,
             contents: match capture.contents {
                 TSCaptureContent::Single(node) => {
-                    let nid = self.insert(scope, node);
+                    let nid = self.insert(scope, node, idx);
                     TSCaptureContent::Single(nid)
                 }
                 TSCaptureContent::Multi(nodes) => {
                     let nids = nodes
                         .into_iter()
-                        .map(|node| self.insert(scope, node))
+                        .map(|node| self.insert(scope, node, idx))
                         .collect::<Vec<_>>();
                     TSCaptureContent::Multi(nids)
                 }
@@ -158,6 +173,7 @@ mod tests {
     };
     use crate::analysis::ddsa_lib::RawTSNode;
     use crate::model::common::Language;
+    use common::utils::position_utils::LineColumnIndex;
     use deno_core::v8;
     use deno_core::v8::HandleScope;
     use std::cell::RefCell;
@@ -205,22 +221,24 @@ mod tests {
         let scope = &mut runtime.handle_scope();
         let mut bridge = bridge.borrow_mut();
 
-        let tree = TsTree::new(r#"const val = foo(bar, baz);"#, Language::JavaScript);
+        let source = r#"const val = foo(bar, baz);"#;
+        let tree = TsTree::new(source, Language::JavaScript);
+        let idx = LineColumnIndex::new(source);
         let foo = tree.find_named_nodes(Some("foo"), None)[0];
         let bar = tree.find_named_nodes(Some("bar"), None)[0];
         let baz = tree.find_named_nodes(Some("baz"), None)[0];
 
         assert!(bridge.v8_get(scope, 0).is_none());
-        assert_eq!(bridge.insert(scope, foo), 0);
+        assert_eq!(bridge.insert(scope, foo, &idx), 0);
         let v8_tsn = bridge.v8_get(scope, 0).unwrap();
         assert!(ts_node_eq(scope, v8_tsn, foo));
         assert!(bridge.v8_get(scope, 1).is_none());
-        assert_eq!(bridge.insert(scope, bar), 1);
-        assert_eq!(bridge.insert(scope, baz), 2);
+        assert_eq!(bridge.insert(scope, bar, &idx), 1);
+        assert_eq!(bridge.insert(scope, baz, &idx), 2);
 
         bridge.clear(scope);
         assert!(bridge.is_empty());
-        assert_eq!(bridge.insert(scope, baz), 0);
+        assert_eq!(bridge.insert(scope, baz, &idx), 0);
         let v8_tsn = bridge.v8_get(scope, 0).unwrap();
         assert!(ts_node_eq(scope, v8_tsn, baz));
     }
@@ -232,13 +250,15 @@ mod tests {
         let scope = &mut runtime.handle_scope();
         let mut bridge = bridge.borrow_mut();
 
-        let tree = TsTree::new(r#"const val = foo(bar, baz);"#, Language::JavaScript);
+        let source = r#"const val = foo(bar, baz);"#;
+        let tree = TsTree::new(source, Language::JavaScript);
+        let idx = LineColumnIndex::new(source);
         let foo = tree.find_named_nodes(Some("foo"), None)[0];
         let bar = tree.find_named_nodes(Some("bar"), None)[0];
         let baz = tree.find_named_nodes(Some("baz"), None)[0];
 
         for node in [foo, bar, baz] {
-            bridge.insert(scope, node);
+            bridge.insert(scope, node, &idx);
         }
 
         let bar_raw = RawTSNode::new(bar);
@@ -252,16 +272,50 @@ mod tests {
         let scope = &mut runtime.handle_scope();
         let mut bridge = bridge.borrow_mut();
 
-        let tree = TsTree::new(r#"const val = foo(bar, baz);"#, Language::JavaScript);
+        let source = r#"const val = foo(bar, baz);"#;
+        let tree = TsTree::new(source, Language::JavaScript);
+        let idx = LineColumnIndex::new(source);
         let foo = tree.find_named_nodes(Some("foo"), None)[0];
 
-        assert_eq!(bridge.insert(scope, foo), 0);
+        assert_eq!(bridge.insert(scope, foo, &idx), 0);
         assert!(bridge.v8_get(scope, 0).is_some());
         assert!(bridge.v8_get(scope, 1).is_none());
-        assert_eq!(bridge.insert(scope, foo), 0);
+        assert_eq!(bridge.insert(scope, foo, &idx), 0);
         assert!(bridge.v8_get(scope, 1).is_none());
     }
 
+    /// Inserting a node from a multibyte source produces UTF-16 columns in the v8 object.
+    ///
+    /// Source: `"\u{1F680}"; abc = 1` — 🚀 is 4 UTF-8 bytes / 2 UTF-16 code units inside a string.
+    /// `abc` starts at byte 8 (after `"🚀"; `), and its UTF-16 col must account for the emoji.
+    ///
+    /// Byte layout: `"` `🚀`(4 bytes) `"` `;` ` ` = 8 bytes → `abc` at byte 8.
+    /// UTF-16 prefix: `"` + 🚀(2 units) + `"` + `;` + ` ` = 6 units → col 7.
+    #[test]
+    fn ts_node_bridge_multibyte_utf16_col() {
+        let (mut runtime, bridge) = setup_bridge();
+        let scope = &mut runtime.handle_scope();
+        let mut bridge = bridge.borrow_mut();
+
+        // Parse `"\u{1F680}"; abc = 1` as JavaScript.
+        let source = "\"\u{1F680}\"; abc = 1";
+        let tree = TsTree::new(source, Language::JavaScript);
+        let idx = LineColumnIndex::new(source);
+        let abc = tree.find_named_nodes(Some("abc"), Some("identifier"))[0];
+        // "🚀" (6 bytes: quote+4+quote) + "; " (2 bytes) = 8 bytes before `abc`.
+        assert_eq!(abc.start_position().column, 8, "byte col of `abc` is 8");
+
+        bridge.insert(scope, abc, &idx);
+        let v8_obj = bridge.v8_get(scope, 0).unwrap();
+
+        // Read `_startCol` from the v8 object.
+        let start_col = get_field::<v8::Integer>(v8_obj, "_startCol", scope, "integer")
+            .unwrap()
+            .value() as u32;
+        // UTF-16 units before abc: `"` (1) + 🚀 (2 surrogate units) + `"` (1) + `;` (1) + ` ` (1) = 6 → col 7.
+        assert_eq!(start_col, 7, "UTF-16 col of `abc` after 🚀 should be 7");
+    }
+
     /// The text that the node spans can be retrieved.
     #[test]
     fn get_node_text() {
@@ -271,6 +325,7 @@ const abc = 123;
 const def = 456;
 ";
         let tree = TsTree::new(file_contents, Language::JavaScript);
+        let idx = LineColumnIndex::new(file_contents);
         let file_contents = Arc::<str>::from(file_contents);
         let file_name = Arc::<str>::from("file_name.js");
 
@@ -284,8 +339,8 @@ const def = 456;
             .set_root_context(scope, &tree.tree(), &file_contents, &file_name);
         let node_0 = tree.find_named_nodes(Some("abc"), Some("identifier"))[0];
         let node_1 = tree.find_named_nodes(Some("456"), Some("number"))[0];
-        ts_node_bridge.borrow_mut().insert(scope, node_0);
-        ts_node_bridge.borrow_mut().insert(scope, node_1);
+        ts_node_bridge.borrow_mut().insert(scope, node_0, &idx);
+        ts_node_bridge.borrow_mut().insert(scope, node_1, &idx);
         let expected = [(0, "abc"), (1, "456")]
             .map(|(nid, text)| (format!("TS_NODES.get({}).text;", nid), text));
         for (code, text) in &expected {
@@ -311,6 +366,7 @@ const def = 456;
         let (mut runtime, ts_node_bridge) = setup_bridge();
         let file_contents = "const abc = 123;";
         let tree = TsTree::new(file_contents, Language::JavaScript);
+        let idx = LineColumnIndex::new(file_contents);
         let file_contents = Arc::<str>::from(file_contents);
         let file_name = Arc::<str>::from("file_name.js");
 
@@ -323,7 +379,7 @@ const def = 456;
             .borrow_mut()
             .set_root_context(scope, &tree.tree(), &file_contents, &file_name);
         let node_0 = tree.find_named_nodes(Some("abc"), Some("identifier"))[0];
-        ts_node_bridge.borrow_mut().insert(scope, node_0);
+        ts_node_bridge.borrow_mut().insert(scope, node_0, &idx);
 
         let code = "\
 const node = TS_NODES.get(0);
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
index 06926b951..82fce1ee3 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
@@ -5,6 +5,7 @@
 use crate::analysis::ddsa_lib::bridge::TsNodeBridge;
 use crate::analysis::ddsa_lib::common::{swallow_v8_error, v8_uint};
 use crate::analysis::ddsa_lib::test_utils::TsTree;
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::v8;
 use graphviz_rust::dot_structures;
 use graphviz_rust::dot_structures::{Attribute, Stmt};
@@ -621,11 +622,17 @@ impl From<LocatedEdge<'_>> for dot_structures::Edge {
 
 impl<'a> LocatedNode<'a> {
     /// Constructs a new `LocatedNode` from a tree-sitter node.
-    pub fn new_cst(node: tree_sitter::Node, text: &'a str) -> LocatedNode<'a> {
+    ///
+    /// `source` is the full source text that was parsed to produce `node`; it is used via
+    /// [`LineColumnIndex`] to emit a 1-based UTF-16 code-unit column consistent with
+    /// [`TreeSitterNode::from_ts_node_with_index`].
+    pub fn new_cst(node: tree_sitter::Node, text: &'a str, source: &str) -> LocatedNode<'a> {
+        let idx = LineColumnIndex::new(source);
+        let start = node.start_position();
         Self::Cst {
             text,
-            line: node.start_position().row + 1,
-            col: node.start_position().column + 1,
+            line: start.row + 1,
+            col: idx.byte_col_to_utf16_col(start.row, start.column) as usize,
             cst_kind: node.kind(),
         }
     }
@@ -1195,4 +1202,62 @@ strict digraph TestPhi {
         let expected = graphviz_rust::parse(expected).unwrap();
         assert_eq!(collection.to_digraph(), expected);
     }
+
+    /// **Regression-prevention test**: `LocatedNode::new_cst` emits a 1-based UTF-16 column that
+    /// differs from the raw `tree_sitter::Point.column + 1` value for multibyte input.
+    ///
+    /// If a future change reintroduces a raw `tree_sitter::Point.column + 1` read in
+    /// `LocatedNode::new_cst`, this test will fail on the multibyte assertion.
+    #[test]
+    fn located_node_multibyte_col_is_utf16() {
+        use crate::analysis::ddsa_lib::js::flow::graph::LocatedNode;
+        use crate::analysis::ddsa_lib::test_utils::TsTree;
+        use crate::model::common::Language;
+        use common::utils::position_utils::LineColumnIndex;
+
+        // Source: "🚀var x = 1;"
+        // 🚀 = 4 UTF-8 bytes, 2 UTF-16 code units.
+        // `x` identifier starts at byte 8 (4 for 🚀, 4 for "var ").
+        // UTF-16 prefix: 🚀(2) + v(1) + a(1) + r(1) + ' '(1) = 6 units → col 7.
+        // Raw byte col of `x` = 8 → raw byte col + 1 = 9.
+        // (These differ because 🚀 = 4 bytes but 2 UTF-16 units.)
+        let source = "\u{1F680}var x = 1;";
+        let tree = TsTree::new(source, Language::JavaScript);
+
+        // Find the `x` identifier node.
+        let x_node = tree.find_named_nodes(Some("x"), Some("identifier"))[0];
+        let raw_byte_col_plus_one = x_node.start_position().column + 1;
+
+        // LocatedNode::new_cst must produce a UTF-16 col ≠ raw byte col + 1.
+        let located = LocatedNode::new_cst(x_node, tree.text(x_node), source);
+        let located_col = match located {
+            LocatedNode::Cst { col, .. } => col,
+            _ => panic!("expected Cst variant"),
+        };
+
+        // UTF-16 col should be smaller than raw byte col on multibyte input.
+        assert_ne!(
+            located_col, raw_byte_col_plus_one,
+            "LocatedNode col should be UTF-16, not raw byte col + 1 (raw={raw_byte_col_plus_one})"
+        );
+        // Sanity check: for this specific source, UTF-16 col must be 7.
+        // prefix "🚀var " = 2+1+1+1+1 = 6 UTF-16 units → col 7
+        assert_eq!(located_col, 7, "UTF-16 col of `x` after 🚀 should be 7");
+
+        // Additionally, verify that for an ASCII source the values agree (regression guard).
+        let ascii_source = "var x = 1;";
+        let ascii_tree = TsTree::new(ascii_source, Language::JavaScript);
+        let ascii_x = ascii_tree.find_named_nodes(Some("x"), Some("identifier"))[0];
+        let ascii_raw = ascii_x.start_position().column + 1;
+        let ascii_located = LocatedNode::new_cst(ascii_x, ascii_tree.text(ascii_x), ascii_source);
+        let ascii_col = match ascii_located {
+            LocatedNode::Cst { col, .. } => col,
+            _ => panic!("expected Cst variant"),
+        };
+        assert_eq!(
+            ascii_col, ascii_raw,
+            "ASCII: UTF-16 col should equal raw byte col + 1"
+        );
+        let _ = LineColumnIndex::new(ascii_source); // ensure import is used
+    }
 }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs
index a107790f6..7eb613f8f 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs
@@ -81,7 +81,7 @@ pub fn cst_dot_digraph(
     let tree = ts_tree.tree();
     let candidates = TsTree::preorder_nodes(root_node.unwrap_or(tree.root_node()))
         .iter()
-        .map(|&node| LocatedNode::new_cst(node, ts_tree.text(node)))
+        .map(|&node| LocatedNode::new_cst(node, ts_tree.text(node), ts_tree.source()))
         .collect::<Vec<_>>();
 
     // The `String` in the tuple is the original ID of the vertex (as specified in the DOT).
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
index 25d103d2b..8761aad60 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
@@ -101,6 +101,7 @@ mod tests {
     use crate::model::common::Language;
     use crate::model::rule::{RuleCategory, RuleSeverity};
     use common::model::position;
+    use common::utils::position_utils::LineColumnIndex;
     use deno_core::v8;
     use std::sync::Arc;
 
@@ -118,13 +119,14 @@ mod tests {
     pub(crate) fn setup(source_text: &str) -> (JsRuntime, TsTree) {
         let mut rt = cfg_test_v8().new_runtime();
         let tree = TsTree::new(source_text, Language::Java);
-        let source_text = Arc::<str>::from(source_text);
+        let idx = LineColumnIndex::new(source_text);
+        let source_arc = Arc::<str>::from(source_text);
         let filename = Arc::<str>::from("test_doesnt_use_filename");
-        rt.bridge_context().borrow_mut().set_root_context(&mut rt.v8_handle_scope(), &tree.tree(), &source_text, &filename);
+        rt.bridge_context().borrow_mut().set_root_context(&mut rt.v8_handle_scope(), &tree.tree(), &source_arc, &filename);
         let tsn_bridge = rt.bridge_ts_node();
         let mut tsn_bridge = tsn_bridge.borrow_mut();
         for node in tree.find_named_nodes(None, None) {
-            tsn_bridge.insert(&mut rt.v8_handle_scope(), node);
+            tsn_bridge.insert(&mut rt.v8_handle_scope(), node, &idx);
         }
 
         (rt, tree)
@@ -413,11 +415,18 @@ serialized;
     #[test]
     fn violation_taint_flow_regions() {
         let v_converter = ViolationConverter::new();
-        fn position_eq(region: js::CodeRegion<Instance>, node: tree_sitter::Node) -> bool {
-            region.start_line == (node.start_position().row as u32) + 1
-                && region.start_col == (node.start_position().column as u32) + 1
-                && region.end_line == (node.end_position().row as u32) + 1
-                && region.end_col == (node.end_position().column as u32) + 1
+        fn position_eq(
+            region: js::CodeRegion<Instance>,
+            node: tree_sitter::Node,
+            source: &str,
+        ) -> bool {
+            let idx = LineColumnIndex::new(source);
+            let start = node.start_position();
+            let end = node.end_position();
+            region.start_line == (start.row as u32) + 1
+                && region.start_col == idx.byte_col_to_utf16_col(start.row, start.column)
+                && region.end_line == (end.row as u32) + 1
+                && region.end_col == idx.byte_col_to_utf16_col(end.row, end.column)
         }
 
         // language=java
@@ -465,7 +474,7 @@ v;
             )
             .unwrap();
 
-        assert!(position_eq(violation.base_region, expected_flow[0]));
+        assert!(position_eq(violation.base_region, expected_flow[0], code));
 
         let taint_flow_regions = violation.taint_flow_regions.unwrap();
         assert_eq!(taint_flow_regions.len(), expected_flow.len());
@@ -473,7 +482,7 @@ v;
             .iter()
             .zip(expected_flow)
             .for_each(|(&region, node)| {
-                assert!(position_eq(region, node));
+                assert!(position_eq(region, node, code));
             });
     }
 
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
index 273133e44..b27858df8 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
@@ -5,6 +5,7 @@
 use crate::analysis::ddsa_lib::common::{
     load_function, swallow_v8_error, v8_uint, Class, DDSAJsRuntimeError, Instance, NodeId,
 };
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::v8;
 use deno_core::v8::HandleScope;
 use std::marker::PhantomData;
@@ -24,26 +25,32 @@ pub struct TreeSitterNode<T> {
 }
 
 impl<T> TreeSitterNode<T> {
-    /// Converts the provided [`tree_sitter::Node`] into a `TreeSitterNode`, assigning the provided id.
-    pub fn from_ts_node(id: NodeId, node: tree_sitter::Node) -> Self {
-        // NOTE: We normalize the 0-based `tree_sitter::Point` to be 1-based.
-        fn normalize_ts_point_num(num: usize) -> u32 {
-            num as u32 + 1
-        }
+    /// Converts the provided [`tree_sitter::Node`] into a `TreeSitterNode`, assigning the
+    /// provided id.
+    ///
+    /// Columns are produced as **1-based UTF-16 code-unit** values using `idx`, which must be
+    /// built from the same source string that was parsed to obtain `node`.  Use this constructor
+    /// everywhere the source string is available — it is the single source of truth for
+    /// `Position.col` semantics.
+    pub fn from_ts_node_with_index(
+        id: NodeId,
+        node: tree_sitter::Node,
+        idx: &LineColumnIndex<'_>,
+    ) -> Self {
         let tree_sitter::Point {
-            row: start_line,
-            column: start_col,
+            row: start_row,
+            column: start_byte_col,
         } = node.start_position();
         let tree_sitter::Point {
-            row: end_line,
-            column: end_col,
+            row: end_row,
+            column: end_byte_col,
         } = node.end_position();
         Self {
             id,
-            start_line: normalize_ts_point_num(start_line),
-            start_col: normalize_ts_point_num(start_col),
-            end_line: normalize_ts_point_num(end_line),
-            end_col: normalize_ts_point_num(end_col),
+            start_line: (start_row as u32) + 1,
+            start_col: idx.byte_col_to_utf16_col(start_row, start_byte_col),
+            end_line: (end_row as u32) + 1,
+            end_col: idx.byte_col_to_utf16_col(end_row, end_byte_col),
             node_type_id: node.kind_id(),
             _pd: PhantomData,
         }
@@ -104,6 +111,7 @@ mod tests {
     };
     use crate::analysis::tree_sitter::get_tree_sitter_language;
     use crate::model::common::Language;
+    use common::utils::position_utils::LineColumnIndex;
     use deno_core::v8;
     use std::marker::PhantomData;
     use tree_sitter::StreamingIterator;
@@ -152,7 +160,8 @@ mod tests {
         let mut parser = tree_sitter::Parser::new();
         parser.set_language(&lang).unwrap();
 
-        let tree = parser.parse(r#"foo(bar, baz);"#, None).unwrap();
+        let source = r#"foo(bar, baz);"#;
+        let tree = parser.parse(source, None).unwrap();
         let root = tree.root_node();
 
         assert_eq!(root.start_position().row, 0);
@@ -160,7 +169,8 @@ mod tests {
         assert_eq!(root.end_position().row, 0);
         assert_eq!(root.end_position().column, 14);
 
-        let tree_sitter_node = TreeSitterNode::<Instance>::from_ts_node(0, root);
+        let idx = LineColumnIndex::new(source);
+        let tree_sitter_node = TreeSitterNode::<Instance>::from_ts_node_with_index(0, root, &idx);
 
         assert_eq!(tree_sitter_node.start_line, 1);
         assert_eq!(tree_sitter_node.start_col, 1);
@@ -168,6 +178,66 @@ mod tests {
         assert_eq!(tree_sitter_node.end_col, 15);
     }
 
+    /// Mirrors [`ts_node_line_col_one_based`] but with multibyte content.
+    ///
+    /// Source: `"\u{1F680}"; foo(bar);`
+    ///   The emoji lives in a string literal so `foo` is a distinct identifier.
+    ///   Bytes before `foo`: `"` (1) + 🚀 (4) + `"` (1) + `;` (1) + ` ` (1) = 8 bytes.
+    ///   UTF-16 prefix:      `"` (1) + 🚀 (2) + `"` (1) + `;` (1) + ` ` (1) = 6 units → col 7.
+    #[test]
+    fn ts_node_line_col_multibyte() {
+        let lang = get_tree_sitter_language(&Language::JavaScript);
+        let mut parser = tree_sitter::Parser::new();
+        parser.set_language(&lang).unwrap();
+
+        // Source: `"\u{1F680}"; foo(bar);`
+        // 🚀 is placed inside a string literal so `foo` is clearly a separate identifier.
+        let source = "\"\u{1F680}\"; foo(bar);";
+        let tree = parser.parse(source, None).unwrap();
+
+        // The second statement is `foo(bar);`
+        let root = tree.root_node();
+        let call_node = root
+            .child(1) // second expression_statement
+            .unwrap()
+            .child(0) // call_expression
+            .unwrap();
+
+        // `foo` starts at byte 8 ("🚀" = 6 bytes + "; " = 2 bytes).
+        let fn_node = call_node.child_by_field_name("function").unwrap();
+        assert_eq!(fn_node.start_position().column, 8, "byte col of `foo` is 8");
+
+        let idx = LineColumnIndex::new(source);
+        let ts_node = TreeSitterNode::<Instance>::from_ts_node_with_index(0, fn_node, &idx);
+        assert_eq!(ts_node.start_line, 1);
+        // UTF-16 col 7: " (1) + 🚀 surrogate pair (2) + " (1) + ; (1) + space (1) = 6 → col 7.
+        assert_eq!(
+            ts_node.start_col, 7,
+            "UTF-16 col of `foo` should be 7 (emoji as string literal)"
+        );
+
+        // Also verify with a CJK prefix: `"日"; foo(bar);`
+        // 日 = 3 UTF-8 bytes, 1 UTF-16 unit.
+        // Bytes before foo: " (1) + 日 (3) + " (1) + ; (1) + space (1) = 7 bytes.
+        // UTF-16: " (1) + 日 (1) + " (1) + ; (1) + space (1) = 5 units → col 6.
+        let source_cjk = "\"\u{65E5}\"; foo(bar);";
+        let tree_cjk = parser.parse(source_cjk, None).unwrap();
+        let call_cjk = tree_cjk.root_node().child(1).unwrap().child(0).unwrap();
+        let fn_cjk = call_cjk.child_by_field_name("function").unwrap();
+        assert_eq!(
+            fn_cjk.start_position().column,
+            7,
+            "byte col of `foo` after 日 is 7"
+        );
+        let idx_cjk = LineColumnIndex::new(source_cjk);
+        let ts_node_cjk = TreeSitterNode::<Instance>::from_ts_node_with_index(1, fn_cjk, &idx_cjk);
+        // UTF-16 col 6: " (1) + 日 (1) + " (1) + ; (1) + space (1) = 5 → col 6.
+        assert_eq!(
+            ts_node_cjk.start_col, 6,
+            "UTF-16 col of `foo` after 日 should be 6"
+        );
+    }
+
     /// Tests that we use the [`tree_sitter::Node::kind_id`] instead of the [`tree_sitter::Node::grammar_id`], which
     /// has subtle differences.
     #[rustfmt::skip]
@@ -188,7 +258,8 @@ mod tests {
         // This should resolve to `assert_ne!("identifier", "property_identifier")`
         assert_ne!(ts_node.grammar_name(), ts_node.kind());
 
-        let tree_sitter_node = TreeSitterNode::<Instance>::from_ts_node(0, ts_node);
+        let idx = LineColumnIndex::new(text);
+        let tree_sitter_node = TreeSitterNode::<Instance>::from_ts_node_with_index(0, ts_node, &idx);
         assert_eq!(tree_sitter_node.node_type_id, ts_node.kind_id());
     }
 
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
index 3fe23047c..be4de85f3 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
@@ -5,6 +5,7 @@
 use crate::analysis::ddsa_lib;
 use crate::analysis::ddsa_lib::common::{v8_uint, NodeId};
 use crate::analysis::ddsa_lib::{bridge, runtime, RawTSNode};
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::{op2, v8, OpState};
 use std::cell::RefCell;
 use std::rc::Rc;
@@ -104,6 +105,12 @@ pub fn op_ts_node_named_children<'s>(
         None
     } else {
         let array = v8::Array::new(scope, count as i32);
+        // ctx_bridge is only needed in this branch. Build the index while ctx_borrow is alive;
+        // ts_node_bridge and ctx_bridge are separate RefCells so the concurrent borrows are valid.
+        let ctx_bridge = state.borrow::<Rc<RefCell<bridge::ContextBridge>>>();
+        let ctx_borrow = ctx_bridge.borrow();
+        let source = ctx_borrow.ddsa_root_context().get_text().unwrap_or("");
+        let idx = LineColumnIndex::new(source);
         let mut bridge_ref = ts_node_bridge.borrow_mut();
 
         let mut cursor = ts_node.walk();
@@ -119,7 +126,7 @@ pub fn op_ts_node_named_children<'s>(
             }
             let child_node = cursor.node();
 
-            let nid = bridge_ref.insert(scope, child_node);
+            let nid = bridge_ref.insert(scope, child_node, &idx);
             let nid = v8_uint(scope, nid);
             let nid_index = i * 2;
             array.set_index(scope, nid_index as u32, nid.into());
@@ -152,14 +159,16 @@ pub fn op_ts_node_parent(
     let safe_raw_ts_node = OpSafeRawTSNode::from_tsn_bridge(&ts_node_bridge.borrow(), node_id)?;
     let ts_node = safe_raw_ts_node.to_node();
 
-    let ctx_bridge = ctx_bridge.borrow_mut();
-    let root_ctx = ctx_bridge.ddsa_root_context();
+    let ctx_borrow = ctx_bridge.borrow();
+    let root_ctx = ctx_borrow.ddsa_root_context();
     let safe_raw_parent =
         OpSafeRawTSNode::from_root_context(root_ctx, |ctx| ctx.get_ts_node_parent(ts_node))?;
     let parent_ts_node = safe_raw_parent.to_node();
 
+    let source = root_ctx.get_text().unwrap_or("");
+    let idx = LineColumnIndex::new(source);
     let mut bridge_ref = ts_node_bridge.borrow_mut();
-    let nid = bridge_ref.insert(scope, parent_ts_node);
+    let nid = bridge_ref.insert(scope, parent_ts_node, &idx);
     Some(nid)
 }
 
@@ -269,7 +278,7 @@ pub fn op_digraph_adjacency_list_to_dot(
                 let node_text = ts_node
                     .utf8_text(text.as_bytes())
                     .expect("bytes should be utf8 sequence");
-                Some(LocatedNode::new_cst(ts_node, node_text))
+                Some(LocatedNode::new_cst(ts_node, node_text, text))
             }
             VertexKind::Phi => Some(LocatedNode::new_phi(vid.internal_id())),
             VertexKind::Invalid => None,
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
index 1c6a75a01..cd335650b 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
@@ -15,6 +15,7 @@ use crate::analysis::ddsa_lib::resource_watchdog::V8ResourceWatchdog;
 use crate::model::common::Language;
 use crate::model::rule::RuleInternal;
 use crate::model::violation;
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::v8;
 use deno_core::v8::HandleScope;
 use std::cell::RefCell;
@@ -263,10 +264,11 @@ impl JsRuntime {
 
             // Add any rule arguments
             ctx_bridge.set_rule_arguments(scope, rule_arguments);
-            // Push the query matches:
+            // Push the query matches, using UTF-16 column conversion for every node.
+            let idx = LineColumnIndex::new(source_text.as_ref());
             let mut ts_node_bridge = self.bridge_ts_node.borrow_mut();
             self.bridge_query_match
-                .set_data(scope, query_matches, &mut ts_node_bridge);
+                .set_data(scope, query_matches, &mut ts_node_bridge, &idx);
         }
 
         // We use a bridge to pull violations, so we can ignore the return value with a noop handler.
@@ -1446,6 +1448,44 @@ function visit(query, filename, code) {
         assert_eq!(*violation, expected);
     }
 
+    /// `stella_compat.js::getCode` correctly extracts an identifier that follows a multibyte
+    /// character on the same line.
+    ///
+    /// The source `"🚀protectedName = 1;"` has the emoji (4 UTF-8 bytes / 2 UTF-16 code units)
+    /// before the identifier.  With UTF-16 columns, `col - 1` correctly indexes into the JS
+    /// string via `substring`, so `getCodeForNode(node, code)` returns `"protectedName"`.
+    #[test]
+    fn stella_compat_getcode_multibyte() {
+        let mut rt = cfg_test_v8().new_runtime();
+        // 🚀 is placed in a string literal so that `protectedName` is a distinct identifier.
+        // Bytes before `protectedName`: " (1) + 🚀 (4) + " (1) + ; (1) + space (1) = 8 bytes.
+        // UTF-16 col: " (1) + 🚀 surrogate pair (2) + " (1) + ; (1) + space (1) = 6 units → col 7.
+        let text = "\"\u{1F680}\"; protectedName = 1;";
+        let filename = "some_filename.js";
+        let ts_query = r#"((identifier) @cap_name (#eq? @cap_name "protectedName"))"#;
+        let rule = r#"
+function visit(query, filename, code) {
+    const node = query.captures["cap_name"];
+    const nodeText = getCodeForNode(node, code);
+    const error = buildError(
+        node.start.line,
+        node.start.col,
+        node.end.line,
+        node.end.col,
+        nodeText
+    );
+    addError(error);
+}
+"#;
+        let violations =
+            shorthand_execute_rule_internal(&mut rt, text, filename, ts_query, rule, None).unwrap();
+        assert_eq!(violations.len(), 1);
+        let violation = violations.first().unwrap();
+        assert_eq!(violation.message, "protectedName");
+        // UTF-16 col 7: "(1) + 🚀 surrogate pair(2) + "(1) + ;(1) + space(1) = 6 units → col 7.
+        assert_eq!(violation.base_region.start_col, 7);
+    }
+
     /// Passing a `VisitArgFilenameCompat` or `VisitArgCodeCompat` instance directly into console.log
     /// returns the contents of the underlying string.
     #[test]
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/test_utils.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/test_utils.rs
index 821065f78..ac9d71db1 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/test_utils.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/test_utils.rs
@@ -395,6 +395,11 @@ impl TsTree {
         Arc::clone(&self.tree)
     }
 
+    /// Returns the full source text.
+    pub fn source(&self) -> &str {
+        &self.text
+    }
+
     /// Returns the text for the provided node.
     pub fn text(&self, node: tree_sitter::Node) -> &str {
         node.utf8_text(self.text.as_bytes()).unwrap()
diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
index b9fd80ba2..4660a1dc1 100644
--- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
+++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
@@ -1,6 +1,7 @@
 use crate::model::analysis::{MatchNode, MatchNodeContext, TreeSitterNode};
 use crate::model::common::Language;
 use common::model::position::Position;
+use common::utils::position_utils::LineColumnIndex;
 use indexmap::IndexMap;
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -297,17 +298,20 @@ pub fn get_query_nodes(
 ) -> Vec<MatchNode> {
     let mut match_nodes: Vec<MatchNode> = vec![];
 
+    let idx = LineColumnIndex::new(code);
+
     for query_match in query.cursor().matches(tree.root_node(), code, None) {
         let mut captures: HashMap<String, TreeSitterNode> = HashMap::new();
         let mut captures_list: HashMap<String, Vec<TreeSitterNode>> = HashMap::new();
         for capture in query_match {
             let list = match capture.contents {
                 TSCaptureContent::Single(node) => {
-                    map_node(node).map(|n| vec![n]).unwrap_or_default()
-                }
-                TSCaptureContent::Multi(nodes) => {
-                    nodes.into_iter().filter_map(map_node).collect::<Vec<_>>()
+                    map_node(node, &idx).map(|n| vec![n]).unwrap_or_default()
                 }
+                TSCaptureContent::Multi(nodes) => nodes
+                    .into_iter()
+                    .filter_map(|n| map_node(n, &idx))
+                    .collect::<Vec<_>>(),
             };
             // All captures are inserted into `captures_list`. However, the prior implementation continually
             // called `insert` on the `captures` map, which ended up re-writing the value every time.
@@ -336,10 +340,15 @@ pub fn get_query_nodes(
 // map a node from the tree-sitter representation into our own internal representation
 // this is the representation that is passed to the JavaScript layer and how we represent
 // or expose the node to the end-user.
-pub fn map_node(node: tree_sitter::Node) -> Option<TreeSitterNode> {
+//
+// `idx` is a pre-built [`LineColumnIndex`] for the source that produced `node`; it is used to
+// convert tree-sitter's 0-based UTF-8 byte columns into 1-based UTF-16 code-unit columns so
+// that `Position.col` is consistent with LSP / VS Code / SARIF v2.1 semantics.
+pub fn map_node(node: tree_sitter::Node, idx: &LineColumnIndex<'_>) -> Option<TreeSitterNode> {
     fn map_node_internal(
         cursor: &mut tree_sitter::TreeCursor,
         only_named_node: bool,
+        idx: &LineColumnIndex<'_>,
     ) -> Option<TreeSitterNode> {
         // we do not map space, parenthesis and other non-named nodes if there
         // when `only_named_node` is true (which is `true` for children only).
@@ -352,7 +361,7 @@ pub fn map_node(node: tree_sitter::Node) -> Option<TreeSitterNode> {
         if cursor.goto_first_child() {
             loop {
                 // For the child, we only want to capture named nodes to avoid polluting the AST.
-                let maybe_child = map_node_internal(cursor, true);
+                let maybe_child = map_node_internal(cursor, true, idx);
                 if let Some(child) = maybe_child {
                     children.push(child);
                 }
@@ -363,16 +372,19 @@ pub fn map_node(node: tree_sitter::Node) -> Option<TreeSitterNode> {
             cursor.goto_parent();
         }
 
+        let start_point = cursor.node().range().start_point;
+        let end_point = cursor.node().range().end_point;
+
         // finally, build the return value.
         let ts_node = TreeSitterNode {
             ast_type: cursor.node().kind().to_string(),
             start: Position {
-                line: u32::try_from(cursor.node().range().start_point.row + 1).unwrap(),
-                col: u32::try_from(cursor.node().range().start_point.column + 1).unwrap(),
+                line: u32::try_from(start_point.row + 1).unwrap(),
+                col: idx.byte_col_to_utf16_col(start_point.row, start_point.column),
             },
             end: Position {
-                line: u32::try_from(cursor.node().range().end_point.row + 1).unwrap(),
-                col: u32::try_from(cursor.node().range().end_point.column + 1).unwrap(),
+                line: u32::try_from(end_point.row + 1).unwrap(),
+                col: idx.byte_col_to_utf16_col(end_point.row, end_point.column),
             },
             field_name: cursor.field_name().map(ToString::to_string),
             children,
@@ -385,7 +397,7 @@ pub fn map_node(node: tree_sitter::Node) -> Option<TreeSitterNode> {
 
     // Initially, we capture both un/named nodes to allow capturing unnamed node from
     // the tree-sitter query.
-    map_node_internal(&mut ts_cursor, false)
+    map_node_internal(&mut ts_cursor, false, idx)
 }
 
 #[cfg(test)]
@@ -413,7 +425,8 @@ def func():
    pass;"#;
         let t = get_tree(source_code, &Language::Python);
         assert!(t.is_some());
-        let tree_node = map_node(t.unwrap().root_node());
+        let idx = LineColumnIndex::new(source_code);
+        let tree_node = map_node(t.unwrap().root_node(), &idx);
         assert!(tree_node.is_some());
         let root = tree_node.unwrap();
         assert_eq!(2, root.children.len());
@@ -745,6 +758,115 @@ SELECT * FROM table WHERE column = 'value';
         assert!(query_node.captures.contains_key("classname"));
     }
 
+    /// `map_node` emits 1-based UTF-16 columns.  An emoji (U+1F680, 4 UTF-8 bytes, 2 UTF-16
+    /// code units) before a named identifier must shift subsequent col values accordingly.
+    ///
+    /// Source: `x = "🚀"; num = 5`
+    /// UTF-8 byte layout (0-indexed, line 0):
+    ///   x(0) ' '(1) =(2) ' '(3) "(4) 🚀(5..8) "(9) ;(10) ' '(11) n(12) u(13) m(14) ' '(15)
+    /// UTF-16 layout:
+    ///   x(1) ' '(2) =(3) ' '(4) "(5) 🚀(6,7) "(8) ;(9) ' '(10) n(11) ...
+    ///   → `num` starts at UTF-16 col 11 (0-based byte 11, but 🚀 takes 2 UTF-16 units → 11-2+1 = 10?)
+    ///
+    /// Let's compute carefully:
+    ///   bytes 0-10: `x = "🚀";` (11 bytes: x, ' ', =, ' ', ", 🚀×4, ", ;)
+    ///   byte 11 (space), byte 12..14 = 'n','u','m'
+    ///   UTF-16 prefix to byte 12: x(1),' '(1),=(1),' '(1),"(1),🚀(2),"(1),;(1),' '(1) = 10 units
+    ///   → num starts at UTF-16 col 11 (10 units + 1-based)
+    #[test]
+    fn test_map_node_multibyte_emoji() {
+        let source_code = "x = \"\u{1F680}\"; num = 5";
+        let tree = get_tree(source_code, &Language::Python).unwrap();
+        let idx = LineColumnIndex::new(source_code);
+        let root = map_node(tree.root_node(), &idx).unwrap();
+        // Find the `num` identifier node in the AST.
+        fn find_node<'a>(
+            node: &'a crate::model::analysis::TreeSitterNode,
+            ast_type: &str,
+            text: &str,
+        ) -> Option<&'a crate::model::analysis::TreeSitterNode> {
+            // Check children recursively.
+            for child in &node.children {
+                if child.ast_type == ast_type {
+                    return Some(child);
+                }
+                if let Some(found) = find_node(child, ast_type, text) {
+                    return Some(found);
+                }
+            }
+            None
+        }
+        // We parse "x = "🚀"; num = 5" as Python.
+        // The second assignment has `num` as the left side (an identifier).
+        // Byte 12 = 'n', UTF-16 col = 11.
+        let second_stmt = root.children.get(1).unwrap(); // second expression_statement
+        let assignment = second_stmt.children.get(0).unwrap();
+        // In Python AST, the left side of `num = 5` is an identifier.
+        let num_node = assignment
+            .children
+            .iter()
+            .find(|c| c.ast_type == "identifier")
+            .unwrap();
+        // Byte offset of 'n' in `x = "🚀"; num = 5`:
+        //   "x = \"" = 5 bytes, "🚀" = 4 bytes, "\"; " = 3 bytes = total 12 bytes before 'n'
+        // UTF-16 col: x(1),' '(1),=(1),' '(1),"(1),🚀_high(1),🚀_low(1),"(1),;(1),' '(1) = 10 → col 11
+        assert_eq!(num_node.start.col, 11, "UTF-16 col of `num` should be 11");
+        assert_eq!(num_node.start.line, 1);
+    }
+
+    /// `get_query_nodes` emits 1-based UTF-16 columns for CJK characters
+    /// (3 UTF-8 bytes, 1 UTF-16 code unit each).
+    ///
+    /// Source (Python): `日 = 1; end = 2`
+    /// 日 = 3 UTF-8 bytes, 1 UTF-16 unit.
+    /// `end` starts at byte offset 7 (`日`(3) + ' '(1) + '='(1) + ' '(1) + '1'(1) = 7),
+    /// but UTF-16 units before it: 日(1),' '(1),=(1),' '(1),1(1),;(1),' '(1) = 7 → col 8.
+    /// Since 日 is BMP (1 UTF-16 unit = 1 byte col unit at the codepoint level, but 3 UTF-8
+    /// bytes), we have: 3 bytes - 1 UTF-16 = 2 byte savings on that char, so byte 7 → UTF-16 col
+    /// = 7 - 2 + 1 = 6.  Let's recompute step by step:
+    ///   日(3 bytes → 1 UTF-16), ' '(1→1), =(1→1), ' '(1→1), 1(1→1), ;(1→1), ' '(1→1) = 7 UTF-16 units → col 8
+    #[test]
+    fn test_get_query_nodes_cjk() {
+        // Python: assign 日 = 1, then end = 2
+        let code = "\u{65E5} = 1; end = 2"; // 日 = 1; end = 2
+        let q = "(identifier) @id";
+        let tree = get_tree(code, &Language::Python).unwrap();
+        let query = get_query(q, &Language::Python).expect("query defined");
+        let nodes = get_query_nodes(
+            &tree,
+            &query,
+            "f.py",
+            code,
+            &std::collections::HashMap::new(),
+        );
+
+        // 日 is at byte 0, UTF-16 col 1 (ASCII fast path applies to byte 0).
+        // But 日 itself is an identifier. Its start byte col is 0 → UTF-16 col 1.
+        let dai_col = nodes
+            .iter()
+            .flat_map(|mn| mn.captures.values())
+            .find(|n| n.start.col == 1)
+            .map(|n| n.start.col)
+            .unwrap_or(0);
+        assert_eq!(dai_col, 1, "日 starts at UTF-16 col 1");
+
+        // `end` starts at byte 11 in the source:
+        //   日(3) + ' '(1) + '='(1) + ' '(1) + '1'(1) + ';'(1) + ' '(1) = 9 bytes → byte 9
+        //   Wait, let me recount: 日=3, ' '=1 → 4, '='=1 → 5, ' '=1 → 6, '1'=1 → 7, ';'=1 → 8, ' '=1 → 9
+        //   So 'e' in 'end' is at byte 9.
+        //   UTF-16 prefix to byte 9: 日(1),' '(1),=(1),' '(1),1(1),;(1),' '(1) = 7 units → col 8.
+        let end_col = nodes
+            .iter()
+            .flat_map(|mn| mn.captures.values())
+            .find(|n| {
+                // The `end` identifier node is at col > 1.
+                n.start.col > 1
+            })
+            .map(|n| n.start.col)
+            .unwrap_or(0);
+        assert_eq!(end_col, 8, "UTF-16 col of `end` after CJK char should be 8");
+    }
+
     #[test]
     fn ts_query_cursor_matches_timeout() {
         let timeout = Duration::from_millis(500);
diff --git a/crates/static-analysis-server/src/tree_sitter_tree.rs b/crates/static-analysis-server/src/tree_sitter_tree.rs
index 1295c9475..905d6c8bc 100644
--- a/crates/static-analysis-server/src/tree_sitter_tree.rs
+++ b/crates/static-analysis-server/src/tree_sitter_tree.rs
@@ -2,6 +2,7 @@ use crate::constants::{ERROR_CODE_NOT_BASE64, ERROR_CODE_NO_ROOT_NODE};
 use crate::model::tree_sitter_tree_node::ServerTreeSitterNode;
 use crate::model::tree_sitter_tree_request::TreeSitterRequest;
 use crate::model::tree_sitter_tree_response::TreeSitterResponse;
+use common::utils::position_utils::LineColumnIndex;
 use kernel::analysis::tree_sitter::{get_tree, map_node};
 use kernel::utils::decode_base64_string;
 
@@ -38,9 +39,10 @@ pub fn process_tree_sitter_tree_request(request: TreeSitterRequest) -> TreeSitte
         return no_root_node;
     }
 
-    if let Some(result) =
-        result.map(|tree| map_node(tree.root_node()).map(ServerTreeSitterNode::from))
-    {
+    if let Some(result) = result.map(|tree| {
+        let idx = LineColumnIndex::new(&decoded);
+        map_node(tree.root_node(), &idx).map(ServerTreeSitterNode::from)
+    }) {
         tracing::info!("Successfully completed tree-sitter AST generation");
         TreeSitterResponse {
             result,
@@ -71,6 +73,49 @@ mod tests {
         assert_eq!("module", response.result.unwrap().ast_type);
     }
 
+    /// `process_tree_sitter_tree_request` emits 1-based UTF-16 columns for non-ASCII content.
+    ///
+    /// Source: `x = "🚀"; num = 5`
+    /// The emoji (🚀, U+1F680) is 4 UTF-8 bytes / 2 UTF-16 code units.  `num` starts at byte 12,
+    /// so its UTF-16 col = 10 (units before it) + 1 = 11.
+    #[test]
+    fn test_process_tree_sitter_tree_request_multibyte() {
+        // Source: `x = "🚀"; num = 5`
+        // printf 'x = "\xF0\x9F\x9A\x80"; num = 5' | base64
+        let request = TreeSitterRequest {
+            code_base64: "eCA9ICLwn5qAIjsgbnVtID0gNQ==".to_string(),
+            file_encoding: "utf-8".to_string(),
+            language: Language::Python,
+        };
+        let response = process_tree_sitter_tree_request(request);
+        assert!(response.errors.is_empty());
+        let root = response.result.unwrap();
+
+        /// Depth-first search for a node whose `start.col` matches `expected_col`.
+        fn find_col(
+            node: &crate::model::tree_sitter_tree_node::ServerTreeSitterNode,
+            expected_col: u32,
+        ) -> bool {
+            if node.start.col == expected_col {
+                return true;
+            }
+            node.children.iter().any(|c| find_col(c, expected_col))
+        }
+
+        // `num` starts at byte 12 in `x = "🚀"; num = 5`.
+        // UTF-16 units before it: x(1) ' '(1) =(1) ' '(1) "(1) 🚀(2) "(1) ;(1) ' '(1) = 10 → col 11.
+        assert!(
+            find_col(&root, 11),
+            "expected a node with UTF-16 start.col == 11 (position of `num` after 🚀)"
+        );
+
+        // Ensure that no node reports col 13 (what the raw byte col + 1 would give for `num`).
+        assert!(
+            !find_col(&root, 13),
+            "no node should report raw byte col 13 — that would indicate the UTF-16 fix is not applied"
+        );
+    }
+
     #[test]
     fn test_process_tree_sitter_invalid_base64() {
         let request = TreeSitterRequest {

From 05755f2d1a5c73c411dd52adb5a436070c44aee9 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 11:52:24 +0200
Subject: [PATCH 02/12] refactor(tree_sitter): remove unused `find_node`
 function

- Deleted the `find_node` function from `tree_sitter.rs` as it was not utilized in the current implementation. This cleanup helps streamline the code and improve maintainability.
---
 .../src/analysis/tree_sitter.rs                 | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
index 4660a1dc1..b0be02e1a 100644
--- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
+++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
@@ -779,23 +779,6 @@ SELECT * FROM table WHERE column = 'value';
         let tree = get_tree(source_code, &Language::Python).unwrap();
         let idx = LineColumnIndex::new(source_code);
         let root = map_node(tree.root_node(), &idx).unwrap();
-        // Find the `num` identifier node in the AST.
-        fn find_node<'a>(
-            node: &'a crate::model::analysis::TreeSitterNode,
-            ast_type: &str,
-            text: &str,
-        ) -> Option<&'a crate::model::analysis::TreeSitterNode> {
-            // Check children recursively.
-            for child in &node.children {
-                if child.ast_type == ast_type {
-                    return Some(child);
-                }
-                if let Some(found) = find_node(child, ast_type, text) {
-                    return Some(found);
-                }
-            }
-            None
-        }
         // We parse "x = "🚀"; num = 5" as Python.
         // The second assignment has `num` as the left side (an identifier).
         // Byte 12 = 'n', UTF-16 col = 11.

From 76571835f5efd68d91f31f334c1238836bb75364 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:24:58 +0200
Subject: [PATCH 03/12] perf(ddsa): cache LineColumnIndex line-starts in
 RootContext
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Added `LineColumnIndex::compute_line_starts` (extracted from `new`) and
  `LineColumnIndex::from_parts` to allow constructing an index from a
  pre-computed `Vec<usize>` without re-scanning the source.
- Added a `line_starts: Vec<usize>` field to `RootContext`, populated in
  `set_text` so the scan runs once per file rather than once per op call.
- Added `RootContext::line_column_index()` which builds a `LineColumnIndex`
  from the cached offsets in O(1) (a Vec clone, not a source scan).
- Updated `op_ts_node_named_children` and `op_ts_node_parent` to use the
  cached index, eliminating O(source_len × visited_nodes) behavior on deep
  tree walks or large files.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 crates/common/src/utils/position_utils.rs     | 37 ++++++++++++++-----
 .../src/analysis/ddsa_lib/context/root.rs     | 25 +++++++++++++
 .../src/analysis/ddsa_lib/ops.rs              | 15 ++++++--
 3 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/crates/common/src/utils/position_utils.rs b/crates/common/src/utils/position_utils.rs
index 6e85dfff0..853431f57 100644
--- a/crates/common/src/utils/position_utils.rs
+++ b/crates/common/src/utils/position_utils.rs
@@ -17,21 +17,40 @@ pub struct LineColumnIndex<'a> {
 impl<'a> LineColumnIndex<'a> {
     /// Builds the index by scanning `source` for newline characters.
     pub fn new(source: &'a str) -> Self {
+        let line_starts = Self::compute_line_starts(source);
+        Self {
+            source,
+            line_starts,
+        }
+    }
+
+    /// Creates a [`LineColumnIndex`] from a **pre-computed** `line_starts` vector.
+    ///
+    /// Use this when `line_starts` has already been built by [`compute_line_starts`] and cached
+    /// (e.g. in `RootContext`) to avoid re-scanning the source on every call.
+    /// `line_starts` must have been computed from the same `source` string.
+    pub fn from_parts(source: &'a str, line_starts: Vec<usize>) -> Self {
+        Self {
+            source,
+            line_starts,
+        }
+    }
+
+    /// Scans `source` and returns the byte offset of the start of each line.
+    ///
+    /// Intentionally splits on `\n` only — this mirrors tree-sitter's line model exactly.
+    /// Tree-sitter does not treat bare `\r` (old Mac OS 9) as a line terminator; for
+    /// Windows `\r\n` files the `\r` is part of the column on the same line, matching
+    /// tree-sitter's `Point.column` values. Using a broader splitter (e.g. `str::lines`)
+    /// would diverge from tree-sitter and produce wrong UTF-16 columns.
+    pub fn compute_line_starts(source: &str) -> Vec<usize> {
         let mut line_starts = vec![0usize];
         for (i, b) in source.bytes().enumerate() {
-            // Intentionally split on `\n` only — this mirrors tree-sitter's line model exactly.
-            // Tree-sitter does not treat bare `\r` (old Mac OS 9) as a line terminator; for
-            // Windows `\r\n` files the `\r` is part of the column on the same line, matching
-            // tree-sitter's `Point.column` values. Using a broader splitter (e.g. `str::lines`)
-            // would diverge from tree-sitter and produce wrong UTF-16 columns.
             if b == b'\n' {
                 line_starts.push(i + 1);
             }
         }
-        Self {
-            source,
-            line_starts,
-        }
+        line_starts
     }
 
     /// Converts a tree-sitter 0-based `(row, byte_col)` point to a 1-based UTF-16 code-unit
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
index 9e50d29f5..874570cb0 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
@@ -3,6 +3,7 @@
 // Copyright 2024 Datadog, Inc.
 
 use crate::analysis::ddsa_lib::RawTSNode;
+use common::utils::position_utils::LineColumnIndex;
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::sync::Arc;
@@ -17,6 +18,11 @@ pub struct RootContext {
     tree: Option<Arc<tree_sitter::Tree>>,
     /// The source string that was parsed by tree-sitter to generate `tree`.
     tree_text: Option<Arc<str>>,
+    /// Pre-computed byte offset of the start of each line in `tree_text`.
+    ///
+    /// Built once in [`set_text`] and reused by [`line_column_index`] to avoid re-scanning the
+    /// source on every deno op call (e.g. `namedChildren`, `parent`).
+    line_starts: Vec<usize>,
     /// A filename associated with a rule execution.
     filename: Option<Arc<str>>,
 
@@ -45,10 +51,29 @@ impl RootContext {
 
     /// Assigns the provided text string to the context. If an existing text string was assigned, it
     /// will be returned as `Some(text)`.
+    ///
+    /// Also rebuilds the internal [`line_starts`](Self::line_column_index) cache so that subsequent
+    /// calls to [`line_column_index`](Self::line_column_index) are O(1).
     pub fn set_text(&mut self, text: Arc<str>) -> Option<Arc<str>> {
+        self.line_starts = LineColumnIndex::compute_line_starts(&text);
         Option::replace(&mut self.tree_text, text)
     }
 
+    /// Returns a [`LineColumnIndex`] built from the cached line-start offsets.
+    ///
+    /// Construction is O(1) — the line-start offsets are pre-computed in [`set_text`] and stored
+    /// as a `Vec<usize>`. This avoids the O(source_len) scan that `LineColumnIndex::new` would
+    /// perform on every deno op call.
+    ///
+    /// Returns `None` when no source text has been assigned yet.
+    pub fn line_column_index(&self) -> Option<LineColumnIndex<'_>> {
+        let source = self.tree_text.as_deref()?;
+        Some(LineColumnIndex::from_parts(
+            source,
+            self.line_starts.clone(),
+        ))
+    }
+
     /// Returns a reference to the underlying [`tree_sitter::Tree`], if it exists.
     pub fn get_tree(&self) -> Option<&Arc<tree_sitter::Tree>> {
         self.tree.as_ref()
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
index be4de85f3..94aa503e8 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
@@ -109,8 +109,12 @@ pub fn op_ts_node_named_children<'s>(
         // ts_node_bridge and ctx_bridge are separate RefCells so the concurrent borrows are valid.
         let ctx_bridge = state.borrow::<Rc<RefCell<bridge::ContextBridge>>>();
         let ctx_borrow = ctx_bridge.borrow();
-        let source = ctx_borrow.ddsa_root_context().get_text().unwrap_or("");
-        let idx = LineColumnIndex::new(source);
+        // Use the pre-computed line_starts cached in RootContext (O(1)) rather than
+        // rescanning the source on every namedChildren call.
+        let idx = ctx_borrow
+            .ddsa_root_context()
+            .line_column_index()
+            .unwrap_or_else(|| LineColumnIndex::new(""));
         let mut bridge_ref = ts_node_bridge.borrow_mut();
 
         let mut cursor = ts_node.walk();
@@ -165,8 +169,11 @@ pub fn op_ts_node_parent(
         OpSafeRawTSNode::from_root_context(root_ctx, |ctx| ctx.get_ts_node_parent(ts_node))?;
     let parent_ts_node = safe_raw_parent.to_node();
 
-    let source = root_ctx.get_text().unwrap_or("");
-    let idx = LineColumnIndex::new(source);
+    // Use the pre-computed line_starts cached in RootContext (O(1)) rather than
+    // rescanning the source on every parent call.
+    let idx = root_ctx
+        .line_column_index()
+        .unwrap_or_else(|| LineColumnIndex::new(""));
     let mut bridge_ref = ts_node_bridge.borrow_mut();
     let nid = bridge_ref.insert(scope, parent_ts_node, &idx);
     Some(nid)

From a719e85877a376f6a3d78ef7dfcaa5269f9c699f Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:27:30 +0200
Subject: [PATCH 04/12] perf(ddsa): refactor LocatedNode::new_cst to accept
 &LineColumnIndex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The old signature built a new LineColumnIndex (O(source_len) scan) on every
node, making op_digraph_adjacency_list_to_dot O(source_len × CST_vertices).

Change new_cst to accept &LineColumnIndex<'_> so callers own the index
lifetime and can build it once before any loop:
- ops.rs (op_digraph_adjacency_list_to_dot): build once before the closure,
  re-using the RootContext-cached index when available.
- graph_test_utils.rs (cst_dot_digraph): build once before the iterator.
- graph.rs (located_node_multibyte_col_is_utf16 test): pass &idx directly.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../src/analysis/ddsa_lib/js/flow/graph.rs    | 20 +++++++++++--------
 .../ddsa_lib/js/flow/graph_test_utils.rs      |  5 ++++-
 .../src/analysis/ddsa_lib/ops.rs              | 10 +++++++---
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
index 82fce1ee3..72c925800 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
@@ -623,11 +623,14 @@ impl From<LocatedEdge<'_>> for dot_structures::Edge {
 impl<'a> LocatedNode<'a> {
     /// Constructs a new `LocatedNode` from a tree-sitter node.
     ///
-    /// `source` is the full source text that was parsed to produce `node`; it is used via
-    /// [`LineColumnIndex`] to emit a 1-based UTF-16 code-unit column consistent with
-    /// [`TreeSitterNode::from_ts_node_with_index`].
-    pub fn new_cst(node: tree_sitter::Node, text: &'a str, source: &str) -> LocatedNode<'a> {
-        let idx = LineColumnIndex::new(source);
+    /// `idx` must be a [`LineColumnIndex`] built from the same source text that was parsed to
+    /// produce `node`. Pass a single index built at the call site — do **not** construct one
+    /// per-node, as that would scan the entire source on every call.
+    pub fn new_cst(
+        node: tree_sitter::Node,
+        text: &'a str,
+        idx: &LineColumnIndex<'_>,
+    ) -> LocatedNode<'a> {
         let start = node.start_position();
         Self::Cst {
             text,
@@ -1229,7 +1232,8 @@ strict digraph TestPhi {
         let raw_byte_col_plus_one = x_node.start_position().column + 1;
 
         // LocatedNode::new_cst must produce a UTF-16 col ≠ raw byte col + 1.
-        let located = LocatedNode::new_cst(x_node, tree.text(x_node), source);
+        let idx = LineColumnIndex::new(source);
+        let located = LocatedNode::new_cst(x_node, tree.text(x_node), &idx);
         let located_col = match located {
             LocatedNode::Cst { col, .. } => col,
             _ => panic!("expected Cst variant"),
@@ -1249,7 +1253,8 @@ strict digraph TestPhi {
         let ascii_tree = TsTree::new(ascii_source, Language::JavaScript);
         let ascii_x = ascii_tree.find_named_nodes(Some("x"), Some("identifier"))[0];
         let ascii_raw = ascii_x.start_position().column + 1;
-        let ascii_located = LocatedNode::new_cst(ascii_x, ascii_tree.text(ascii_x), ascii_source);
+        let ascii_idx = LineColumnIndex::new(ascii_source);
+        let ascii_located = LocatedNode::new_cst(ascii_x, ascii_tree.text(ascii_x), &ascii_idx);
         let ascii_col = match ascii_located {
             LocatedNode::Cst { col, .. } => col,
             _ => panic!("expected Cst variant"),
@@ -1258,6 +1263,5 @@ strict digraph TestPhi {
             ascii_col, ascii_raw,
             "ASCII: UTF-16 col should equal raw byte col + 1"
         );
-        let _ = LineColumnIndex::new(ascii_source); // ensure import is used
     }
 }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs
index 7eb613f8f..7d5c4da12 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph_test_utils.rs
@@ -12,6 +12,7 @@ use crate::analysis::ddsa_lib::js::flow::graph::{
     VertexId, VertexKind, COL, CST_KIND, KIND, LINE, TEXT, V_KIND,
 };
 use crate::analysis::ddsa_lib::test_utils::TsTree;
+use common::utils::position_utils::LineColumnIndex;
 use deno_core::v8;
 use graphviz_rust::dot_structures;
 
@@ -79,9 +80,11 @@ pub fn cst_dot_digraph(
     let stmts = digraph_stmts(&graph);
 
     let tree = ts_tree.tree();
+    // Build the index once before the per-node iterator to avoid O(source_len × nodes).
+    let lc_idx = LineColumnIndex::new(ts_tree.source());
     let candidates = TsTree::preorder_nodes(root_node.unwrap_or(tree.root_node()))
         .iter()
-        .map(|&node| LocatedNode::new_cst(node, ts_tree.text(node), ts_tree.source()))
+        .map(|&node| LocatedNode::new_cst(node, ts_tree.text(node), &lc_idx))
         .collect::<Vec<_>>();
 
     // The `String` in the tuple is the original ID of the vertex (as specified in the DOT).
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
index 94aa503e8..8986e6280 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
@@ -264,10 +264,14 @@ pub fn op_digraph_adjacency_list_to_dot(
     let tsn_bridge = tsn_bridge.borrow();
     let ctx_bridge = state.borrow::<Rc<RefCell<bridge::ContextBridge>>>();
     let ctx_bridge = ctx_bridge.borrow();
-    let text = ctx_bridge
-        .ddsa_root_context()
+    let root_ctx = ctx_bridge.ddsa_root_context();
+    let text = root_ctx
         .get_text()
         .expect("tree text should always be `Some` during rule execution");
+    // Build the index once before the per-vertex loop; use the cached line_starts when available.
+    let lc_idx = root_ctx
+        .line_column_index()
+        .unwrap_or_else(|| LineColumnIndex::new(text));
 
     // Transformation:
     // If `VertexKind::CST`: constructs a dot node from metadata from the `TsNodeBridge` and `RootContext`.
@@ -285,7 +289,7 @@ pub fn op_digraph_adjacency_list_to_dot(
                 let node_text = ts_node
                     .utf8_text(text.as_bytes())
                     .expect("bytes should be utf8 sequence");
-                Some(LocatedNode::new_cst(ts_node, node_text, text))
+                Some(LocatedNode::new_cst(ts_node, node_text, &lc_idx))
             }
             VertexKind::Phi => Some(LocatedNode::new_phi(vid.internal_id())),
             VertexKind::Invalid => None,

From ef4527b880192b61fffe05638bedcf8c050c82b3 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:28:27 +0200
Subject: [PATCH 05/12] docs(test): rewrite test_map_node_multibyte_emoji
 comment to one clean calculation

The old comment had a tangent line ending in "10?" that contradicted the
actual expected value of 11. Replace with a single, step-by-step UTF-16
prefix sum that maps directly to the assert_eq! below it.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../src/analysis/tree_sitter.rs                     | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
index b0be02e1a..3d85cf4bb 100644
--- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
+++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
@@ -763,16 +763,11 @@ SELECT * FROM table WHERE column = 'value';
     ///
     /// Source: `x = "🚀"; num = 5`
     /// UTF-8 byte layout (0-indexed, line 0):
-    ///   x(0) ' '(1) =(2) ' '(3) "(4) 🚀(5..8) "(9) ;(10) ' '(11) n(12) u(13) m(14) ' '(15)
-    /// UTF-16 layout:
-    ///   x(1) ' '(2) =(3) ' '(4) "(5) 🚀(6,7) "(8) ;(9) ' '(10) n(11) ...
-    ///   → `num` starts at UTF-16 col 11 (0-based byte 11, but 🚀 takes 2 UTF-16 units → 11-2+1 = 10?)
+    ///   x(0) ' '(1) =(2) ' '(3) "(4) 🚀(5-8) "(9) ;(10) ' '(11) n(12) u(13) m(14) ...
     ///
-    /// Let's compute carefully:
-    ///   bytes 0-10: `x = "🚀";` (11 bytes: x, ' ', =, ' ', ", 🚀×4, ", ;)
-    ///   byte 11 (space), byte 12..14 = 'n','u','m'
-    ///   UTF-16 prefix to byte 12: x(1),' '(1),=(1),' '(1),"(1),🚀(2),"(1),;(1),' '(1) = 10 units
-    ///   → num starts at UTF-16 col 11 (10 units + 1-based)
+    /// `num` starts at byte 12.  UTF-16 prefix for bytes 0..12:
+    ///   x(1) + ' '(1) + =(1) + ' '(1) + "(1) + 🚀(2) + "(1) + ;(1) + ' '(1) = 10 UTF-16 units
+    ///   → `num` is at 1-based UTF-16 col 11.
     #[test]
     fn test_map_node_multibyte_emoji() {
         let source_code = "x = \"\u{1F680}\"; num = 5";

From 0243860ef8a2d1af53900c81bb39bce767a62f9a Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:29:05 +0200
Subject: [PATCH 06/12] docs(test): fix contradictory byte-offset derivation in
 test_get_query_nodes_cjk

The comment had a first derivation stating end starts at byte 7, then a
second "let's recompute" block arriving at byte 9. Remove the wrong first
derivation and keep only the corrected step-by-step calculation.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../static-analysis-kernel/src/analysis/tree_sitter.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
index 3d85cf4bb..c2774a0c5 100644
--- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
+++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
@@ -797,12 +797,10 @@ SELECT * FROM table WHERE column = 'value';
     ///
     /// Source (Python): `日 = 1; end = 2`
     /// 日 = 3 UTF-8 bytes, 1 UTF-16 unit.
-    /// `end` starts at byte offset 7 (`日`(3) + ' '(1) + '='(1) + ' '(1) + '1'(1) = 7),
-    /// but UTF-16 units before it: 日(1),' '(1),=(1),' '(1),1(1),;(1),' '(1) = 7 → col 8.
-    /// Since 日 is BMP (1 UTF-16 unit = 1 byte col unit at the codepoint level, but 3 UTF-8
-    /// bytes), we have: 3 bytes - 1 UTF-16 = 2 byte savings on that char, so byte 7 → UTF-16 col
-    /// = 7 - 2 + 1 = 6.  Let's recompute step by step:
-    ///   日(3 bytes → 1 UTF-16), ' '(1→1), =(1→1), ' '(1→1), 1(1→1), ;(1→1), ' '(1→1) = 7 UTF-16 units → col 8
+    ///
+    /// `end` starts at byte 9:  日(3) + ' '(1) + =(1) + ' '(1) + 1(1) + ;(1) + ' '(1) = 9 bytes.
+    /// UTF-16 prefix for those 9 bytes:
+    ///   日(1) + ' '(1) + =(1) + ' '(1) + 1(1) + ;(1) + ' '(1) = 7 UTF-16 units → 1-based col 8.
     #[test]
     fn test_get_query_nodes_cjk() {
         // Python: assign 日 = 1, then end = 2

From 60e4f955142b354a2106af83628a3427e24a7fb3 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:29:42 +0200
Subject: [PATCH 07/12] docs(test): fix misleading header for
 stella_compat_getcode_multibyte
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous header said the source was '🚀protectedName = 1;' (emoji
directly preceding the identifier), but the actual source places the emoji
in a string literal: '"\u{1F680}"; protectedName = 1;'. Rewrite the header
to match the real source layout and update the byte/UTF-16 derivation.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../src/analysis/ddsa_lib/runtime.rs                     | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
index cd335650b..b4322fb7f 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
@@ -1451,9 +1451,12 @@ function visit(query, filename, code) {
     /// `stella_compat.js::getCode` correctly extracts an identifier that follows a multibyte
     /// character on the same line.
     ///
-    /// The source `"🚀protectedName = 1;"` has the emoji (4 UTF-8 bytes / 2 UTF-16 code units)
-    /// before the identifier.  With UTF-16 columns, `col - 1` correctly indexes into the JS
-    /// string via `substring`, so `getCodeForNode(node, code)` returns `"protectedName"`.
+    /// The source is `"\u{1F680}"; protectedName = 1;` — the emoji sits inside a string literal
+    /// so that `protectedName` is a distinct, standalone identifier token.
+    /// Bytes before `protectedName`: `"` (1) + 🚀 (4) + `"` (1) + `;` (1) + ` ` (1) = 8 bytes.
+    /// UTF-16 prefix: `"` (1) + 🚀 surrogate pair (2) + `"` (1) + `;` (1) + ` ` (1) = 6 units → col 7.
+    /// With UTF-16 columns, `col - 1` correctly indexes into the JS string via `substring`,
+    /// so `getCodeForNode(node, code)` returns `"protectedName"`.
     #[test]
     fn stella_compat_getcode_multibyte() {
         let mut rt = cfg_test_v8().new_runtime();

From 04e6d1fe3c918e152e2fe8346e6ddcfff6777291 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:30:17 +0200
Subject: [PATCH 08/12] docs: clarify Position.col producer contracts in
 docstring

The previous docstring led with UTF-16 as the universal meaning, then
footnoted the secrets-scanner exception. Rewrite so both contracts are
equal-weight bullet points: kernel (UTF-16 code units) and secrets scanner
(Unicode grapheme clusters). Consumers reading just the field-level doc now
see the full picture without having to read further.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 crates/common/src/model/position.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/crates/common/src/model/position.rs b/crates/common/src/model/position.rs
index 0d85c26db..503b29c12 100644
--- a/crates/common/src/model/position.rs
+++ b/crates/common/src/model/position.rs
@@ -9,15 +9,15 @@ use std::fmt;
 /// A source-code position.
 ///
 /// * `line` — 1-based line number.
-/// * `col` — 1-based **UTF-16 code-unit** column number for every position produced by the
-///   static-analysis kernel (tree-sitter path).  On ASCII-only lines, one UTF-16 code unit equals
-///   one byte, so the value is identical to what a byte-column would be.  For non-ASCII characters
-///   (CJK ideographs, emoji surrogate pairs, combining marks, etc.) the value reflects UTF-16
-///   semantics, which matches LSP / VS Code and the SARIF v2.1 default encoding.
-///
-///   The `get_position_in_string` helper (used by the secrets scanner) emits grapheme-based
-///   columns under a different contract; that path does **not** flow through `Position.col` in the
-///   kernel output.
+/// * `col` — 1-based column number.  The unit depends on the producer:
+///   - **Static-analysis kernel** (tree-sitter path): UTF-16 code-unit column.  Matches LSP,
+///     VS Code, and the SARIF v2.1 default encoding.  On ASCII-only lines, one UTF-16 code unit
+///     equals one byte, so the value is identical to a byte-column.  For non-ASCII characters
+///     (CJK ideographs, emoji surrogate pairs, combining marks, etc.) the count reflects UTF-16
+///     surrogate-pair expansion.
+///   - **Secrets scanner** (`get_position_in_string`): Unicode-grapheme-cluster column.  A single
+///     user-perceived character (including emoji and composed sequences) counts as one unit
+///     regardless of its UTF-8 or UTF-16 size.
 #[derive(Deserialize, Debug, Serialize, Clone, Copy, Builder, PartialEq, Eq, Hash)]
 pub struct Position {
     pub line: u32,

From d39db47e912b71bba7b0bc218ea2bc67745101ef Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 12:43:49 +0200
Subject: [PATCH 09/12] fix(ops): replace silent unwrap_or_else fallback with
 expect

The unwrap_or_else(|| LineColumnIndex::new("")) fallback would silently
produce col=1 for every node if tree text was never set, making bugs
invisible. Replace with expect() in both op_ts_node_named_children and
op_ts_node_parent, consistent with the already-present panic in
op_digraph_adjacency_list_to_dot for the same invariant.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
index 8986e6280..bec8db64f 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
@@ -114,7 +114,7 @@ pub fn op_ts_node_named_children<'s>(
         let idx = ctx_borrow
             .ddsa_root_context()
             .line_column_index()
-            .unwrap_or_else(|| LineColumnIndex::new(""));
+            .expect("tree text must be set before ops run");
         let mut bridge_ref = ts_node_bridge.borrow_mut();
 
         let mut cursor = ts_node.walk();
@@ -173,7 +173,7 @@ pub fn op_ts_node_parent(
     // rescanning the source on every parent call.
     let idx = root_ctx
         .line_column_index()
-        .unwrap_or_else(|| LineColumnIndex::new(""));
+        .expect("tree text must be set before ops run");
     let mut bridge_ref = ts_node_bridge.borrow_mut();
     let nid = bridge_ref.insert(scope, parent_ts_node, &idx);
     Some(nid)

From 329df0ccdfaf560c71bd32546fb76ec4155c92d0 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Sun, 17 May 2026 17:16:58 +0200
Subject: [PATCH 10/12] =?UTF-8?q?ci(regressions):=20temporarily=20exclude?=
 =?UTF-8?q?=20columns=20from=20SARIF=20diff=20for=20byte=E2=86=92UTF-16=20?=
 =?UTF-8?q?transition?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR changes the kernel from emitting 1-based UTF-8 byte columns to
1-based UTF-16 code-unit columns (matching LSP / VS Code / SARIF v2.1).

While the PR is in review, the regression workflow compares two SARIF
runs as JSON strings — one built from `main` (byte cols) and one from
this branch (UTF-16 cols). Every violation that lives on a line with a
non-ASCII character drifts by N columns (N = number of multibyte chars
before the position) and shows up as a "removed + added" pair.

To unblock the CI we drop `startColumn` / `endColumn` from both the
comparison key (in `parseFile`) and the summary location display, and
compare by (file, ruleId, message, startLine, endLine) only.

This is a ONE-SHOT loosening for the encoding transition. A stacked
follow-up PR will restore the column fields once this PR lands on
`main` (at which point both runs are on UTF-16 columns again and
column-level regression detection becomes meaningful).

Verified locally on numpy's `_core/tests/test_strings.py`:
- before fix: 3 "removed" + 3 "added" false positives (lines with λ μ ·)
- after  fix: 0 + 0 (clean)

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .github/scripts/check-regressions.js | 35 +++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/check-regressions.js b/.github/scripts/check-regressions.js
index 15bd5303a..0126f5fdd 100644
--- a/.github/scripts/check-regressions.js
+++ b/.github/scripts/check-regressions.js
@@ -22,7 +22,33 @@ const parseFile = (filepath) => {
       const current = {};
       current.message = i.message;
       current.ruleId = i.ruleId;
-      current.physicalLocation = i.locations[0].physicalLocation;
+      // ------------------------------------------------------------------
+      // IMPORTANT (TEMPORARY): strip `startColumn` / `endColumn` from the region.
+      //
+      // PR #914 changes the kernel from emitting 1-based UTF-8 byte columns to
+      // 1-based UTF-16 code-unit columns (matching LSP / VS Code / SARIF v2.1).
+      //
+      // While that PR is in review, `main` still emits byte columns and the
+      // feature branch emits UTF-16 columns. The regression check compares
+      // results as JSON strings, so it would flag every violation that lives
+      // on a line containing a non-ASCII character as a "removed + added"
+      // pair — same file, same rule, same line; only the column number drifts
+      // by N (where N is the number of multibyte chars before the position).
+      //
+      // To unblock CI for #914 we compare by
+      //     (file, ruleId, message, startLine, endLine)
+      // and intentionally ignore columns. This is a one-shot loosening for
+      // the byte→UTF-16 transition. A stacked follow-up PR will restore
+      // `startColumn` / `endColumn` to the comparison key once #914 lands on
+      // `main` (at which point both runs are on UTF-16 columns again and
+      // column-level regression detection becomes meaningful again).
+      // ------------------------------------------------------------------
+      const { startColumn: _startCol, endColumn: _endCol, ...regionWithoutColumns } =
+        i.locations[0].physicalLocation.region;
+      current.physicalLocation = {
+        ...i.locations[0].physicalLocation,
+        region: regionWithoutColumns,
+      };
       results.push(current);
     }
 
@@ -55,13 +81,16 @@ const main = async () => {
     let table1 = [];
 
     if (count1 > 0 || count2 > 0) {
+      // Location display only shows `startLine-endLine`. Columns are
+      // intentionally omitted to match the comparison key (see `parseFile`
+      // above for the full rationale on the byte→UTF-16 transition).
       for (const item of diff1) {
         const json = JSON.parse(item);
         table1.push([
           { data: json.physicalLocation.artifactLocation.uri },
           { data: json.message.text },
           { data: json.ruleId },
-          { data: `${json.physicalLocation.region.startLine}:${json.physicalLocation.region.startColumn}-${json.physicalLocation.region.endLine}:${json.physicalLocation.region.endColumn}` },
+          { data: `${json.physicalLocation.region.startLine}-${json.physicalLocation.region.endLine}` },
           { data: dupes1[json.ruleId] },
         ]);
       }
@@ -74,7 +103,7 @@ const main = async () => {
           { data: json.physicalLocation.artifactLocation.uri },
           { data: json.message.text },
           { data: json.ruleId },
-          { data: `${json.physicalLocation.region.startLine}:${json.physicalLocation.region.startColumn}-${json.physicalLocation.region.endLine}:${json.physicalLocation.region.endColumn}` },
+          { data: `${json.physicalLocation.region.startLine}-${json.physicalLocation.region.endLine}` },
           { data: dupes2[json.ruleId] },
         ]);
       }

From 8947d3bbda2decb494140adb628ce37085abcf24 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Tue, 19 May 2026 16:28:24 +0200
Subject: [PATCH 11/12] refactor(common): use line-index crate for UTF-16
 column conversion

Replace hand-rolled LineColumnIndex implementation with a thin newtype
wrapper around line_index::LineIndex. Public API and all call sites are
unchanged. Simplifies RootContext cache to own the index directly,
eliminating per-call Vec<usize> clones.

Addresses reviewer feedback on #914.
---
 Cargo.lock                                    | 17 ++++
 LICENSE-3rdparty.csv                          |  1 +
 crates/common/Cargo.toml                      |  3 +-
 crates/common/src/utils/position_utils.rs     | 96 ++++++-------------
 .../analysis/ddsa_lib/bridge/query_match.rs   |  2 +-
 .../src/analysis/ddsa_lib/bridge/ts_node.rs   |  6 +-
 .../src/analysis/ddsa_lib/context/root.rs     | 29 +++---
 .../src/analysis/ddsa_lib/js/flow/graph.rs    |  2 +-
 .../src/analysis/ddsa_lib/js/ts_node.rs       |  2 +-
 .../src/analysis/ddsa_lib/ops.rs              | 16 ++--
 .../src/analysis/tree_sitter.rs               |  4 +-
 11 files changed, 75 insertions(+), 103 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2761a58b5..2d0280439 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -696,6 +696,7 @@ dependencies = [
  "anyhow",
  "bstr",
  "derive_builder",
+ "line-index",
  "serde",
 ]
 
@@ -2540,6 +2541,16 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "line-index"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e27e0ed5a392a7f5ba0b3808a2afccff16c64933312c84b57618b49d1209bd2"
+dependencies = [
+ "nohash-hasher",
+ "text-size",
+]
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.15"
@@ -2766,6 +2777,12 @@ dependencies = [
  "smallvec",
 ]
 
+[[package]]
+name = "nohash-hasher"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
+
 [[package]]
 name = "nom"
 version = "7.1.3"
diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv
index 20a3e1316..543bdb646 100644
--- a/LICENSE-3rdparty.csv
+++ b/LICENSE-3rdparty.csv
@@ -12,6 +12,7 @@ graphviz-rust,https://github.com/besok/graphviz-rust,MIT,Copyright (c) 2013 Bori
 indexmap,https://github.com/indexmap-rs/indexmap,MIT and Apache-2.0,Copyright (c) 2016-2017 bluss
 indicatif,https://crates.io/crates/indicatif,MIT,Copyright (c) 2017 Armin Ronacher <armin.ronacher@active-4.com>
 itertools,https://github.com/rust-itertools/itertools,MIT,Copyright 2015 itertools Developers
+line-index,https://github.com/rust-lang/rust-analyzer,MIT OR Apache-2.0,Copyright (c) 2019 rust-analyzer developers
 num_cpus,https://github.com/seanmonstar/num_cpus,MIT, Copyright (c) 2015 Sean McArthur
 percent-encoding,https://github.com/servo/rust-url/,MIT, Copyright (c) 2013-2022 The rust-url developers
 prettytable-rs,https://github.com/phsym/prettytable-rs/,BSD-3-Clause,Copyright (c) 2022 Pierre-Henri Symoneaux
diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml
index 52512fccb..a5a4997f3 100644
--- a/crates/common/Cargo.toml
+++ b/crates/common/Cargo.toml
@@ -9,4 +9,5 @@ serde = { version = "1.0.219", features = ["derive"] }
 derive_builder = { workspace = true }
 
 # other
-bstr = "1.12.0"
\ No newline at end of file
+bstr = "1.12.0"
+line-index = "0.1.2"
\ No newline at end of file
diff --git a/crates/common/src/utils/position_utils.rs b/crates/common/src/utils/position_utils.rs
index 853431f57..53575c228 100644
--- a/crates/common/src/utils/position_utils.rs
+++ b/crates/common/src/utils/position_utils.rs
@@ -1,87 +1,45 @@
 use crate::model::position::Position;
 use bstr::BStr;
 use bstr::ByteSlice;
+use line_index::{LineCol, LineIndex, WideEncoding};
 
 /// Precomputed per-line index for fast repeated UTF-8 byte-column → UTF-16 code-unit column
 /// conversion.
 ///
 /// Build once per source string with [`LineColumnIndex::new`], then call
-/// [`byte_col_to_utf16_col`] for every tree-sitter node on that source.
-pub struct LineColumnIndex<'a> {
-    source: &'a str,
-    /// Byte offset of the start of each line (0-indexed line number → byte offset of first byte
-    /// on that line).
-    line_starts: Vec<usize>,
-}
-
-impl<'a> LineColumnIndex<'a> {
-    /// Builds the index by scanning `source` for newline characters.
-    pub fn new(source: &'a str) -> Self {
-        let line_starts = Self::compute_line_starts(source);
-        Self {
-            source,
-            line_starts,
-        }
-    }
-
-    /// Creates a [`LineColumnIndex`] from a **pre-computed** `line_starts` vector.
-    ///
-    /// Use this when `line_starts` has already been built by [`compute_line_starts`] and cached
-    /// (e.g. in `RootContext`) to avoid re-scanning the source on every call.
-    /// `line_starts` must have been computed from the same `source` string.
-    pub fn from_parts(source: &'a str, line_starts: Vec<usize>) -> Self {
-        Self {
-            source,
-            line_starts,
-        }
-    }
+/// [`byte_col_to_utf16_col`](LineColumnIndex::byte_col_to_utf16_col) for every tree-sitter node
+/// on that source. Backed by [`line_index::LineIndex`] from the rust-analyzer project.
+///
+/// ## Line model
+///
+/// [`line_index::LineIndex`] splits on `\n` only, which mirrors tree-sitter's line model exactly.
+/// Tree-sitter does not treat a bare `\r` (classic Mac OS 9) as a line terminator; for Windows
+/// `\r\n` files the `\r` is counted as part of the column on the same line, matching
+/// tree-sitter's `Point.column` values. Using a broader splitter (e.g. Unicode line endings)
+/// would diverge from tree-sitter and produce wrong UTF-16 columns.
+#[derive(Debug)]
+pub struct LineColumnIndex(LineIndex);
 
-    /// Scans `source` and returns the byte offset of the start of each line.
-    ///
-    /// Intentionally splits on `\n` only — this mirrors tree-sitter's line model exactly.
-    /// Tree-sitter does not treat bare `\r` (old Mac OS 9) as a line terminator; for
-    /// Windows `\r\n` files the `\r` is part of the column on the same line, matching
-    /// tree-sitter's `Point.column` values. Using a broader splitter (e.g. `str::lines`)
-    /// would diverge from tree-sitter and produce wrong UTF-16 columns.
-    pub fn compute_line_starts(source: &str) -> Vec<usize> {
-        let mut line_starts = vec![0usize];
-        for (i, b) in source.bytes().enumerate() {
-            if b == b'\n' {
-                line_starts.push(i + 1);
-            }
-        }
-        line_starts
+impl LineColumnIndex {
+    /// Builds the index by scanning `source`.
+    pub fn new(source: &str) -> Self {
+        Self(LineIndex::new(source))
     }
 
     /// Converts a tree-sitter 0-based `(row, byte_col)` point to a 1-based UTF-16 code-unit
     /// column.
     ///
-    /// **ASCII fast path**: when the line prefix up to `byte_col` is pure ASCII (all bytes
-    /// < 128), each byte is one UTF-16 code unit, so the result is simply `byte_col + 1`.
-    /// This path costs one slice-level `is_ascii()` check and is taken for the vast majority of
-    /// real-world source lines.
+    /// Falls back to `byte_col + 1` (correct for ASCII-only content) when the coordinates fall
+    /// outside the indexed source, preserving pre-existing behaviour for out-of-range inputs.
     pub fn byte_col_to_utf16_col(&self, row: usize, byte_col: usize) -> u32 {
-        let line_start = self
-            .line_starts
-            .get(row)
-            .copied()
-            .unwrap_or(self.source.len());
-        // Slice from the line start to end-of-source (unbounded); the subsequent `prefix` slice
-        // clamps this to exactly `byte_col` bytes, so the trailing lines are never examined.
-        let source_from_line_start = &self.source.as_bytes()[line_start..];
-        // `byte_col` is the number of bytes from the line start to the position.
-        let prefix_len = byte_col.min(source_from_line_start.len());
-        let prefix = &source_from_line_start[..prefix_len];
-
-        // ASCII fast path: one byte == one UTF-16 code unit.
-        if prefix.is_ascii() {
-            return (byte_col + 1) as u32;
-        }
-
-        // Slow path: count UTF-16 code units emitted by each Unicode scalar.
-        let prefix_str = std::str::from_utf8(prefix).unwrap_or("");
-        let utf16_col: usize = prefix_str.chars().map(|c| c.len_utf16()).sum();
-        (utf16_col + 1) as u32
+        let lc = LineCol {
+            line: row as u32,
+            col: byte_col as u32,
+        };
+        self.0
+            .to_wide(WideEncoding::Utf16, lc)
+            .map(|w| w.col + 1)
+            .unwrap_or((byte_col as u32) + 1)
     }
 }
 
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
index 24054f3e7..990dd5b58 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
@@ -45,7 +45,7 @@ impl QueryMatchBridge {
         scope: &mut HandleScope,
         matches: impl Into<Vec<QueryMatch<tree_sitter::Node<'tree>>>>,
         node_bridge: &mut TsNodeBridge,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) {
         let matches = matches.into();
         // Pass each node in via the bridge (assigning it an id), and use this id to transform
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
index 2f17fa07b..1e76229d7 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
@@ -48,7 +48,7 @@ impl TsNodeBridge {
         &mut self,
         scope: &mut HandleScope,
         node: tree_sitter::Node,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) -> NodeId {
         let raw_ts_node = RawTSNode::new(node);
         if let Some((_, _, node_id)) = self.mirrored_im.get_full(&raw_ts_node) {
@@ -92,7 +92,7 @@ impl TsNodeBridge {
         scope: &mut HandleScope<'s>,
         node: tree_sitter::Node,
         id: NodeId,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) -> v8::Local<'s, v8::Object> {
         let ts_node = TreeSitterNode::<Instance>::from_ts_node_with_index(id, node, idx);
         self.js_class.new_instance(scope, ts_node)
@@ -128,7 +128,7 @@ impl TsNodeBridge {
         &mut self,
         scope: &mut HandleScope,
         capture: TSQueryCapture<tree_sitter::Node>,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) -> TSQueryCapture<NodeId> {
         TSQueryCapture::<NodeId> {
             name: capture.name,
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
index 874570cb0..c8f76e92a 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
@@ -18,11 +18,11 @@ pub struct RootContext {
     tree: Option<Arc<tree_sitter::Tree>>,
     /// The source string that was parsed by tree-sitter to generate `tree`.
     tree_text: Option<Arc<str>>,
-    /// Pre-computed byte offset of the start of each line in `tree_text`.
+    /// Pre-computed line/column index for `tree_text`.
     ///
-    /// Built once in [`set_text`] and reused by [`line_column_index`] to avoid re-scanning the
-    /// source on every deno op call (e.g. `namedChildren`, `parent`).
-    line_starts: Vec<usize>,
+    /// Built once in [`set_text`] and borrowed by [`line_column_index`] on every deno op call
+    /// (e.g. `namedChildren`, `parent`), avoiding repeated source scans.
+    lci: Option<LineColumnIndex>,
     /// A filename associated with a rule execution.
     filename: Option<Arc<str>>,
 
@@ -52,26 +52,21 @@ impl RootContext {
     /// Assigns the provided text string to the context. If an existing text string was assigned, it
     /// will be returned as `Some(text)`.
     ///
-    /// Also rebuilds the internal [`line_starts`](Self::line_column_index) cache so that subsequent
-    /// calls to [`line_column_index`](Self::line_column_index) are O(1).
+    /// Also rebuilds the internal [line/column index](Self::line_column_index) cache so that
+    /// subsequent calls to [`line_column_index`](Self::line_column_index) are O(1).
     pub fn set_text(&mut self, text: Arc<str>) -> Option<Arc<str>> {
-        self.line_starts = LineColumnIndex::compute_line_starts(&text);
+        self.lci = Some(LineColumnIndex::new(&text));
         Option::replace(&mut self.tree_text, text)
     }
 
-    /// Returns a [`LineColumnIndex`] built from the cached line-start offsets.
+    /// Returns a reference to the cached [`LineColumnIndex`].
     ///
-    /// Construction is O(1) — the line-start offsets are pre-computed in [`set_text`] and stored
-    /// as a `Vec<usize>`. This avoids the O(source_len) scan that `LineColumnIndex::new` would
-    /// perform on every deno op call.
+    /// The index is built once in [`set_text`] and reused on every deno op call, avoiding
+    /// repeated source scans.
     ///
     /// Returns `None` when no source text has been assigned yet.
-    pub fn line_column_index(&self) -> Option<LineColumnIndex<'_>> {
-        let source = self.tree_text.as_deref()?;
-        Some(LineColumnIndex::from_parts(
-            source,
-            self.line_starts.clone(),
-        ))
+    pub fn line_column_index(&self) -> Option<&LineColumnIndex> {
+        self.lci.as_ref()
     }
 
     /// Returns a reference to the underlying [`tree_sitter::Tree`], if it exists.
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
index 72c925800..ca2e2a570 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
@@ -629,7 +629,7 @@ impl<'a> LocatedNode<'a> {
     pub fn new_cst(
         node: tree_sitter::Node,
         text: &'a str,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) -> LocatedNode<'a> {
         let start = node.start_position();
         Self::Cst {
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
index b27858df8..996d0acaf 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
@@ -35,7 +35,7 @@ impl<T> TreeSitterNode<T> {
     pub fn from_ts_node_with_index(
         id: NodeId,
         node: tree_sitter::Node,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) -> Self {
         let tree_sitter::Point {
             row: start_row,
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
index bec8db64f..8d3f9cff4 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
@@ -5,7 +5,6 @@
 use crate::analysis::ddsa_lib;
 use crate::analysis::ddsa_lib::common::{v8_uint, NodeId};
 use crate::analysis::ddsa_lib::{bridge, runtime, RawTSNode};
-use common::utils::position_utils::LineColumnIndex;
 use deno_core::{op2, v8, OpState};
 use std::cell::RefCell;
 use std::rc::Rc;
@@ -109,7 +108,7 @@ pub fn op_ts_node_named_children<'s>(
         // ts_node_bridge and ctx_bridge are separate RefCells so the concurrent borrows are valid.
         let ctx_bridge = state.borrow::<Rc<RefCell<bridge::ContextBridge>>>();
         let ctx_borrow = ctx_bridge.borrow();
-        // Use the pre-computed line_starts cached in RootContext (O(1)) rather than
+        // Use the pre-computed index cached in RootContext (O(1)) rather than
         // rescanning the source on every namedChildren call.
         let idx = ctx_borrow
             .ddsa_root_context()
@@ -130,7 +129,7 @@ pub fn op_ts_node_named_children<'s>(
             }
             let child_node = cursor.node();
 
-            let nid = bridge_ref.insert(scope, child_node, &idx);
+            let nid = bridge_ref.insert(scope, child_node, idx);
             let nid = v8_uint(scope, nid);
             let nid_index = i * 2;
             array.set_index(scope, nid_index as u32, nid.into());
@@ -169,13 +168,13 @@ pub fn op_ts_node_parent(
         OpSafeRawTSNode::from_root_context(root_ctx, |ctx| ctx.get_ts_node_parent(ts_node))?;
     let parent_ts_node = safe_raw_parent.to_node();
 
-    // Use the pre-computed line_starts cached in RootContext (O(1)) rather than
+    // Use the pre-computed index cached in RootContext (O(1)) rather than
     // rescanning the source on every parent call.
     let idx = root_ctx
         .line_column_index()
         .expect("tree text must be set before ops run");
     let mut bridge_ref = ts_node_bridge.borrow_mut();
-    let nid = bridge_ref.insert(scope, parent_ts_node, &idx);
+    let nid = bridge_ref.insert(scope, parent_ts_node, idx);
     Some(nid)
 }
 
@@ -268,10 +267,11 @@ pub fn op_digraph_adjacency_list_to_dot(
     let text = root_ctx
         .get_text()
         .expect("tree text should always be `Some` during rule execution");
-    // Build the index once before the per-vertex loop; use the cached line_starts when available.
+    // Use the pre-computed index cached in RootContext (O(1)) rather than rebuilding per-vertex.
+    // `set_text` always builds the index alongside `tree_text`, so this is always `Some` here.
     let lc_idx = root_ctx
         .line_column_index()
-        .unwrap_or_else(|| LineColumnIndex::new(text));
+        .expect("tree text must be set before ops run");
 
     // Transformation:
     // If `VertexKind::CST`: constructs a dot node from metadata from the `TsNodeBridge` and `RootContext`.
@@ -289,7 +289,7 @@ pub fn op_digraph_adjacency_list_to_dot(
                 let node_text = ts_node
                     .utf8_text(text.as_bytes())
                     .expect("bytes should be utf8 sequence");
-                Some(LocatedNode::new_cst(ts_node, node_text, &lc_idx))
+                Some(LocatedNode::new_cst(ts_node, node_text, lc_idx))
             }
             VertexKind::Phi => Some(LocatedNode::new_phi(vid.internal_id())),
             VertexKind::Invalid => None,
diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
index c2774a0c5..7b2cebc2f 100644
--- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
+++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
@@ -344,11 +344,11 @@ pub fn get_query_nodes(
 // `idx` is a pre-built [`LineColumnIndex`] for the source that produced `node`; it is used to
 // convert tree-sitter's 0-based UTF-8 byte columns into 1-based UTF-16 code-unit columns so
 // that `Position.col` is consistent with LSP / VS Code / SARIF v2.1 semantics.
-pub fn map_node(node: tree_sitter::Node, idx: &LineColumnIndex<'_>) -> Option<TreeSitterNode> {
+pub fn map_node(node: tree_sitter::Node, idx: &LineColumnIndex) -> Option<TreeSitterNode> {
     fn map_node_internal(
         cursor: &mut tree_sitter::TreeCursor,
         only_named_node: bool,
-        idx: &LineColumnIndex<'_>,
+        idx: &LineColumnIndex,
     ) -> Option<TreeSitterNode> {
         // we do not map space, parenthesis and other non-named nodes if there
         // when `only_named_node` is true (which is `true` for children only).

From 1f052b9f02077b7736f8d7ed51b96c4764212b26 Mon Sep 17 00:00:00 2001
From: Roberto Huertas <roberto.huertas@datadoghq.com>
Date: Wed, 20 May 2026 16:03:50 +0200
Subject: [PATCH 12/12] refactor: address review feedback on UTF-16 column
 conversion

- Removed eight unit tests in position_utils.rs that duplicated coverage already provided by the line-index crate, collapsing them into a single wiring test that proves the wrapper delegates to LineIndex::to_wide(WideEncoding::Utf16, ..) and adds 1.
- Dropped per-call-site multibyte tests covered by the new helper: test_export_csv_multibyte_col, sarif_region_carries_utf16_col, ts_node_line_col_multibyte, stella_compat_getcode_multibyte, test_map_node_multibyte_emoji, test_get_query_nodes_cjk, ts_node_bridge_multibyte_utf16_col, located_node_multibyte_col_is_utf16, and test_process_tree_sitter_tree_request_multibyte.
- Trimmed LLM-flavored doc comments on insert_capture, RootContext (lci field, set_text, line_column_index), LocatedNode::new_cst, TreeSitterNode::from_ts_node_with_index, QueryMatchBridge::set_data, and map_node.
- Removed noisy O(1)/pre-computed inline comments from three ops.rs call sites that explained self-evident caching behaviour.
- Changed LineColumnIndex::byte_col_to_utf16_col to return Option<u32> instead of silently falling back to byte_col + 1, making the fallibility explicit at the type level.
- Updated the five production call sites (tree_sitter.rs, js/ts_node.rs, js/flow/graph.rs) to use unwrap_or(byte_col as u32 + 1) so the ASCII fallback is visible at every caller, and updated test sites in js/flow/java.rs to use unwrap().

IDE-6037
---
 crates/cli/src/csv.rs                         |  35 -----
 crates/cli/src/sarif/sarif_utils.rs           |  84 ------------
 crates/common/src/utils/position_utils.rs     | 128 ++----------------
 .../analysis/ddsa_lib/bridge/query_match.rs   |   4 -
 .../src/analysis/ddsa_lib/bridge/ts_node.rs   |  34 -----
 .../src/analysis/ddsa_lib/context/root.rs     |  14 +-
 .../src/analysis/ddsa_lib/js/flow/graph.rs    |  67 +--------
 .../src/analysis/ddsa_lib/js/flow/java.rs     |   4 +-
 .../src/analysis/ddsa_lib/js/ts_node.rs       |  73 +---------
 .../src/analysis/ddsa_lib/ops.rs              |   6 -
 .../src/analysis/ddsa_lib/runtime.rs          |  41 ------
 .../src/analysis/tree_sitter.rs               |  97 +------------
 .../src/tree_sitter_tree.rs                   |  43 ------
 13 files changed, 31 insertions(+), 599 deletions(-)

diff --git a/crates/cli/src/csv.rs b/crates/cli/src/csv.rs
index 8b60c5a6a..b3da79d4f 100644
--- a/crates/cli/src/csv.rs
+++ b/crates/cli/src/csv.rs
@@ -97,39 +97,4 @@ mod tests {
         );
         assert_eq!(res_with_result, "filename,rule,category,severity,message,start_line,start_col,end_line,end_col\nfilename,myrule,performance,error,message,10,12,12,10\n");
     }
-
-    /// The CSV output faithfully serializes whatever col value is stored in the violation.
-    /// When the kernel produces UTF-16 columns (e.g. col 3 for a node after "🚀"), the CSV row
-    /// contains 3, not the raw byte col (5).
-    #[test]
-    fn test_export_csv_multibyte_col() {
-        let res = generate_csv_results(
-            &vec![RuleResult {
-                rule_name: "myrule".to_string(),
-                filename: "file.py".to_string(),
-                violations: vec![Violation {
-                    // Simulates a violation whose `col` was computed as UTF-16 (e.g. after 🚀).
-                    start: common::model::position::Position { line: 1, col: 3 },
-                    end: common::model::position::Position { line: 1, col: 16 },
-                    message: "multibyte".to_string(),
-                    severity: RuleSeverity::Error,
-                    category: RuleCategory::Security,
-                    fixes: vec![],
-                    taint_flow: None,
-                    is_suppressed: false,
-                }],
-                errors: vec![],
-                execution_error: None,
-                output: None,
-                execution_time_ms: 1,
-                query_node_time_ms: 0,
-                parsing_time_ms: 0,
-            }],
-            &vec![],
-        );
-        assert!(
-            res.contains(",1,3,1,16\n"),
-            "CSV should contain UTF-16 col 3; got: {res}"
-        );
-    }
 }
diff --git a/crates/cli/src/sarif/sarif_utils.rs b/crates/cli/src/sarif/sarif_utils.rs
index 1bc844511..22cf71f19 100644
--- a/crates/cli/src/sarif/sarif_utils.rs
+++ b/crates/cli/src/sarif/sarif_utils.rs
@@ -2231,88 +2231,4 @@ mod tests {
         let sarif_json = serde_json::to_value(sarif_report).unwrap();
         assert!(validate_data(&sarif_json));
     }
-
-    /// The SARIF `region.startColumn` / `region.endColumn` fields carry whatever value is in
-    /// `Position.col`.  When the kernel emits UTF-16 columns (as it now does), the SARIF output
-    /// automatically conforms to the SARIF v2.1 default encoding (UTF-16).
-    ///
-    /// This test constructs a violation whose col was computed as UTF-16 (e.g. after an emoji on
-    /// the same line, col 3) and asserts the SARIF JSON contains that value.
-    #[test]
-    fn sarif_region_carries_utf16_col() {
-        let rule = RuleBuilder::default()
-            .name("utf16-col-rule".to_string())
-            .description_base64(None)
-            .language(Language::Python)
-            .checksum("x".to_string())
-            .pattern(None)
-            .tree_sitter_query_base64(None)
-            .category(RuleCategory::Security)
-            .code_base64(String::new())
-            .short_description_base64(None)
-            .entity_checked(None)
-            .rule_type(RuleType::TreeSitterQuery)
-            .severity(RuleSeverity::Error)
-            .cwe(None)
-            .arguments(vec![])
-            .tests(vec![])
-            .is_testing(false)
-            .documentation_url(None)
-            .build()
-            .unwrap();
-        let sarif_rule = SarifRule::StaticAnalysis(Box::new(rule));
-
-        // UTF-16 col 3 = position of an identifier after 🚀 on the same line.
-        let violation = Violation {
-            start: Position::new(1, 3),
-            end: Position::new(1, 16),
-            message: "utf16 col test".to_string(),
-            severity: RuleSeverity::Error,
-            category: RuleCategory::Security,
-            fixes: vec![],
-            taint_flow: None,
-            is_suppressed: false,
-        };
-        let rule_result = RuleResultBuilder::default()
-            .rule_name("utf16-col-rule".to_string())
-            .filename("file.py".to_string())
-            .violations(vec![violation])
-            .errors(vec![])
-            .execution_error(None)
-            .output(None)
-            .execution_time_ms(0u128)
-            .parsing_time_ms(0u128)
-            .query_node_time_ms(0u128)
-            .build()
-            .unwrap();
-        let sarif_result = SarifRuleResult::try_from(rule_result).unwrap();
-
-        let directory = ".".to_string();
-        let sarif = generate_sarif_report(
-            &[sarif_rule],
-            &[sarif_result],
-            &directory,
-            SarifReportMetadata {
-                add_git_info: false,
-                debug: false,
-                config_digest: String::new(),
-                diff_aware_parameters: None,
-                execution_time_secs: 0,
-            },
-            &Default::default(),
-        )
-        .expect("generate sarif");
-
-        let sarif_json = serde_json::to_value(&sarif).unwrap();
-        let location =
-            &sarif_json["runs"][0]["results"][0]["locations"][0]["physicalLocation"]["region"];
-        assert_eq!(
-            location["startColumn"], 3,
-            "SARIF startColumn should be UTF-16 col 3"
-        );
-        assert_eq!(
-            location["endColumn"], 16,
-            "SARIF endColumn should be UTF-16 col 16"
-        );
-    }
 }
diff --git a/crates/common/src/utils/position_utils.rs b/crates/common/src/utils/position_utils.rs
index 53575c228..9e3216536 100644
--- a/crates/common/src/utils/position_utils.rs
+++ b/crates/common/src/utils/position_utils.rs
@@ -29,17 +29,13 @@ impl LineColumnIndex {
     /// Converts a tree-sitter 0-based `(row, byte_col)` point to a 1-based UTF-16 code-unit
     /// column.
     ///
-    /// Falls back to `byte_col + 1` (correct for ASCII-only content) when the coordinates fall
-    /// outside the indexed source, preserving pre-existing behaviour for out-of-range inputs.
-    pub fn byte_col_to_utf16_col(&self, row: usize, byte_col: usize) -> u32 {
+    /// Returns `None` if `(row, byte_col)` falls outside the indexed source.
+    pub fn byte_col_to_utf16_col(&self, row: usize, byte_col: usize) -> Option<u32> {
         let lc = LineCol {
             line: row as u32,
             col: byte_col as u32,
         };
-        self.0
-            .to_wide(WideEncoding::Utf16, lc)
-            .map(|w| w.col + 1)
-            .unwrap_or((byte_col as u32) + 1)
+        self.0.to_wide(WideEncoding::Utf16, lc).map(|w| w.col + 1)
     }
 }
 
@@ -179,115 +175,15 @@ mod tests {
         );
     }
 
-    // ── LineColumnIndex tests ─────────────────────────────────────────────────
-
-    #[test]
-    fn lci_ascii_fast_path() {
-        // Pure ASCII: UTF-16 col == byte col + 1 for every position.
-        let idx = LineColumnIndex::new("hello world\nfoo bar");
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1); // 'h' → col 1
-        assert_eq!(idx.byte_col_to_utf16_col(0, 5), 6); // ' ' → col 6
-        assert_eq!(idx.byte_col_to_utf16_col(1, 3), 4); // ' ' on line 2 → col 4
-    }
-
-    #[test]
-    fn lci_bmp_non_ascii() {
-        // 'é' is U+00E9: 2 UTF-8 bytes, 1 UTF-16 code unit.
-        // Source: "café\nend"  — bytes: c(0) a(1) f(2) é(3,4) \n(5)
-        let src = "caf\u{00E9}\nend";
-        let idx = LineColumnIndex::new(src);
-        // byte_col 3 (start of é) → prefix "caf" = 3 UTF-16 units → col 4
-        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 4);
-        // byte_col 5 (past é, 3 + 2 bytes) → prefix "café" = 4 UTF-16 units → col 5
-        assert_eq!(idx.byte_col_to_utf16_col(0, 5), 5);
-        // line 1 is ASCII
-        assert_eq!(idx.byte_col_to_utf16_col(1, 3), 4);
-    }
-
-    #[test]
-    fn lci_supplementary_plane_emoji() {
-        // '🚀' is U+1F680: 4 UTF-8 bytes, 2 UTF-16 code units (surrogate pair).
-        // Source: "x🚀y"
-        // byte positions: x(0) 🚀(1,2,3,4) y(5)
-        let src = "x\u{1F680}y";
-        let idx = LineColumnIndex::new(src);
-        // byte_col 0 → col 1 (before 'x')
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
-        // byte_col 1 → prefix = "x" → 1 UTF-16 unit → col 2
-        assert_eq!(idx.byte_col_to_utf16_col(0, 1), 2);
-        // byte_col 5 → prefix = "x🚀" → x(1) + 🚀(2) = 3 UTF-16 units → col 4
-        assert_eq!(idx.byte_col_to_utf16_col(0, 5), 4);
-        // byte_col 6 → prefix = "x🚀y" → 3+1 = 4 UTF-16 units → col 5
-        assert_eq!(idx.byte_col_to_utf16_col(0, 6), 5);
-    }
-
-    #[test]
-    fn lci_cjk() {
-        // '日' is U+65E5: 3 UTF-8 bytes, 1 UTF-16 code unit.
-        // Source: "日本"
-        // bytes: 日(0,1,2) 本(3,4,5)
-        let src = "\u{65E5}\u{672C}"; // 日本
-        let idx = LineColumnIndex::new(src);
-        // byte_col 0 → col 1
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
-        // byte_col 3 → prefix = "日" → 1 UTF-16 unit → col 2
-        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 2);
-        // byte_col 6 → prefix = "日本" → 2 UTF-16 units → col 3
-        assert_eq!(idx.byte_col_to_utf16_col(0, 6), 3);
-    }
-
-    #[test]
-    fn lci_combining_mark() {
-        // 'e' + U+0301 (combining acute accent): 2 codepoints, 2 UTF-16 code units, 1 grapheme.
-        // Source: "e\u{0301}x"
-        // bytes: e(0) \u{0301}(1,2) x(3)
-        let src = "e\u{0301}x";
-        let idx = LineColumnIndex::new(src);
-        // byte_col 0 → col 1
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
-        // byte_col 1 → prefix = "e" → 1 UTF-16 unit → col 2
-        assert_eq!(idx.byte_col_to_utf16_col(0, 1), 2);
-        // byte_col 3 → prefix = "e\u{0301}" → 2 UTF-16 units → col 3
-        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 3);
-        // byte_col 4 → prefix = "e\u{0301}x" → 3 UTF-16 units → col 4
-        assert_eq!(idx.byte_col_to_utf16_col(0, 4), 4);
-    }
-
-    #[test]
-    fn lci_crlf_line_endings() {
-        // CRLF: tree-sitter counts \n as the line terminator and the col includes \r.
-        // Source: "ab\r\ncd"
-        // Line 0 bytes: a(0) b(1) \r(2) \n(3) → line_start[1] = 4
-        // Line 1 bytes: c(0 rel) d(1 rel)
-        let src = "ab\r\ncd";
-        let idx = LineColumnIndex::new(src);
-        // Line 0
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
-        assert_eq!(idx.byte_col_to_utf16_col(0, 2), 3); // past "ab" → 2+1=3
-                                                        // Line 1
-        assert_eq!(idx.byte_col_to_utf16_col(1, 0), 1);
-        assert_eq!(idx.byte_col_to_utf16_col(1, 2), 3);
-    }
-
-    #[test]
-    fn lci_eol_boundary() {
-        // byte_col == 0 on the start of any line → col 1.
-        let src = "abc\ndef\nghi";
-        let idx = LineColumnIndex::new(src);
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
-        assert_eq!(idx.byte_col_to_utf16_col(1, 0), 1);
-        assert_eq!(idx.byte_col_to_utf16_col(2, 0), 1);
-        // byte_col at end of "abc" (3 bytes) → col 4
-        assert_eq!(idx.byte_col_to_utf16_col(0, 3), 4);
-    }
-
     #[test]
-    fn lci_empty_line() {
-        // An empty line: byte_col 0 → col 1.
-        let src = "\n\nfoo";
-        let idx = LineColumnIndex::new(src);
-        assert_eq!(idx.byte_col_to_utf16_col(0, 0), 1);
-        assert_eq!(idx.byte_col_to_utf16_col(1, 0), 1);
-        assert_eq!(idx.byte_col_to_utf16_col(2, 0), 1);
+    fn byte_col_to_utf16_col_calls_to_wide_utf16() {
+        // Single source string with one non-ASCII char. We only need to prove our wrapper
+        // delegates to line_index::LineIndex::to_wide(WideEncoding::Utf16, ..) and adds 1.
+        // Exhaustive encoding cases live in the line-index crate's own test suite.
+        let idx = LineColumnIndex::new("a\u{65E5}b"); // a 日 b
+                                                      // byte 0 ('a') → 0 UTF-16 units before, +1 = 1
+        assert_eq!(idx.byte_col_to_utf16_col(0, 0).unwrap(), 1);
+        // byte 4 ('b' after 日 which is 3 UTF-8 bytes / 1 UTF-16 unit) → 2 UTF-16 units before, +1 = 3
+        assert_eq!(idx.byte_col_to_utf16_col(0, 4).unwrap(), 3);
     }
 }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
index 990dd5b58..f8b95f18d 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs
@@ -34,10 +34,6 @@ impl QueryMatchBridge {
     /// Sets the bridge's data to the list of [`QueryMatch`]es, inserting tree-sitter nodes
     /// into the provided `TsNodeBridge`.
     ///
-    /// `idx` must be built from the same source string that produced the tree-sitter nodes in
-    /// `matches`; it is forwarded to [`TsNodeBridge::insert_capture`] so that `col` values
-    /// stored in v8 are 1-based UTF-16 code units.
-    ///
     /// NOTE: if the bridge had existing `QueryMatch`es, the tree-sitter nodes associated with them
     ///       will not be removed from the `TsNodeBridge`.
     pub fn set_data<'tree>(
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
index 1e76229d7..24871b054 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/ts_node.rs
@@ -122,8 +122,6 @@ impl TsNodeBridge {
     /// Inserts the nodes from a [`TSQueryCapture<tree_sitter::Node>`] into a v8 scope, consuming
     /// the `QueryCapture`. Returns a transformed `QueryCapture` containing the ids of the inserted
     /// nodes.
-    ///
-    /// `idx` is forwarded to [`insert`](Self::insert) for UTF-16 column conversion.
     pub fn insert_capture(
         &mut self,
         scope: &mut HandleScope,
@@ -284,38 +282,6 @@ mod tests {
         assert!(bridge.v8_get(scope, 1).is_none());
     }
 
-    /// Inserting a node from a multibyte source produces UTF-16 columns in the v8 object.
-    ///
-    /// Source: `"\u{1F680}"; abc = 1` — 🚀 is 4 UTF-8 bytes / 2 UTF-16 code units inside a string.
-    /// `abc` starts at byte 8 (after `"🚀"; `), and its UTF-16 col must account for the emoji.
-    ///
-    /// Byte layout: `"` `🚀`(4 bytes) `"` `;` ` ` = 8 bytes → `abc` at byte 8.
-    /// UTF-16 prefix: `"` + 🚀(2 units) + `"` + `;` + ` ` = 6 units → col 7.
-    #[test]
-    fn ts_node_bridge_multibyte_utf16_col() {
-        let (mut runtime, bridge) = setup_bridge();
-        let scope = &mut runtime.handle_scope();
-        let mut bridge = bridge.borrow_mut();
-
-        // Parse `"\u{1F680}"; abc = 1` as JavaScript.
-        let source = "\"\u{1F680}\"; abc = 1";
-        let tree = TsTree::new(source, Language::JavaScript);
-        let idx = LineColumnIndex::new(source);
-        let abc = tree.find_named_nodes(Some("abc"), Some("identifier"))[0];
-        // "🚀" (6 bytes: quote+4+quote) + "; " (2 bytes) = 8 bytes before `abc`.
-        assert_eq!(abc.start_position().column, 8, "byte col of `abc` is 8");
-
-        bridge.insert(scope, abc, &idx);
-        let v8_obj = bridge.v8_get(scope, 0).unwrap();
-
-        // Read `_startCol` from the v8 object.
-        let start_col = get_field::<v8::Integer>(v8_obj, "_startCol", scope, "integer")
-            .unwrap()
-            .value() as u32;
-        // UTF-16 units before abc: `"` (1) + 🚀 (2 surrogate units) + `"` (1) + `;` (1) + ` ` (1) = 6 → col 7.
-        assert_eq!(start_col, 7, "UTF-16 col of `abc` after 🚀 should be 7");
-    }
-
     /// The text that the node spans can be retrieved.
     #[test]
     fn get_node_text() {
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
index c8f76e92a..a62e61179 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/context/root.rs
@@ -19,9 +19,6 @@ pub struct RootContext {
     /// The source string that was parsed by tree-sitter to generate `tree`.
     tree_text: Option<Arc<str>>,
     /// Pre-computed line/column index for `tree_text`.
-    ///
-    /// Built once in [`set_text`] and borrowed by [`line_column_index`] on every deno op call
-    /// (e.g. `namedChildren`, `parent`), avoiding repeated source scans.
     lci: Option<LineColumnIndex>,
     /// A filename associated with a rule execution.
     filename: Option<Arc<str>>,
@@ -51,20 +48,13 @@ impl RootContext {
 
     /// Assigns the provided text string to the context. If an existing text string was assigned, it
     /// will be returned as `Some(text)`.
-    ///
-    /// Also rebuilds the internal [line/column index](Self::line_column_index) cache so that
-    /// subsequent calls to [`line_column_index`](Self::line_column_index) are O(1).
     pub fn set_text(&mut self, text: Arc<str>) -> Option<Arc<str>> {
         self.lci = Some(LineColumnIndex::new(&text));
         Option::replace(&mut self.tree_text, text)
     }
 
-    /// Returns a reference to the cached [`LineColumnIndex`].
-    ///
-    /// The index is built once in [`set_text`] and reused on every deno op call, avoiding
-    /// repeated source scans.
-    ///
-    /// Returns `None` when no source text has been assigned yet.
+    /// Returns a reference to the cached [`LineColumnIndex`], or `None` if no source text has been
+    /// assigned yet.
     pub fn line_column_index(&self) -> Option<&LineColumnIndex> {
         self.lci.as_ref()
     }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
index ca2e2a570..5a62f2224 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/graph.rs
@@ -622,10 +622,6 @@ impl From<LocatedEdge<'_>> for dot_structures::Edge {
 
 impl<'a> LocatedNode<'a> {
     /// Constructs a new `LocatedNode` from a tree-sitter node.
-    ///
-    /// `idx` must be a [`LineColumnIndex`] built from the same source text that was parsed to
-    /// produce `node`. Pass a single index built at the call site — do **not** construct one
-    /// per-node, as that would scan the entire source on every call.
     pub fn new_cst(
         node: tree_sitter::Node,
         text: &'a str,
@@ -635,7 +631,9 @@ impl<'a> LocatedNode<'a> {
         Self::Cst {
             text,
             line: start.row + 1,
-            col: idx.byte_col_to_utf16_col(start.row, start.column) as usize,
+            col: idx
+                .byte_col_to_utf16_col(start.row, start.column)
+                .unwrap_or(start.column as u32 + 1) as usize,
             cst_kind: node.kind(),
         }
     }
@@ -1205,63 +1203,4 @@ strict digraph TestPhi {
         let expected = graphviz_rust::parse(expected).unwrap();
         assert_eq!(collection.to_digraph(), expected);
     }
-
-    /// **Regression-prevention test**: `LocatedNode::new_cst` emits a 1-based UTF-16 column that
-    /// differs from the raw `tree_sitter::Point.column + 1` value for multibyte input.
-    ///
-    /// If a future change reintroduces a raw `tree_sitter::Point.column + 1` read in
-    /// `LocatedNode::new_cst`, this test will fail on the multibyte assertion.
-    #[test]
-    fn located_node_multibyte_col_is_utf16() {
-        use crate::analysis::ddsa_lib::js::flow::graph::LocatedNode;
-        use crate::analysis::ddsa_lib::test_utils::TsTree;
-        use crate::model::common::Language;
-        use common::utils::position_utils::LineColumnIndex;
-
-        // Source: "🚀var x = 1;"
-        // 🚀 = 4 UTF-8 bytes, 2 UTF-16 code units.
-        // `x` identifier starts at byte 8 (4 for 🚀, 4 for "var ").
-        // UTF-16 prefix: 🚀(2) + v(1) + a(1) + r(1) + ' '(1) = 6 units → col 7.
-        // Raw byte col of `x` = 8 → raw byte col + 1 = 9.
-        // (These differ because 🚀 = 4 bytes but 2 UTF-16 units.)
-        let source = "\u{1F680}var x = 1;";
-        let tree = TsTree::new(source, Language::JavaScript);
-
-        // Find the `x` identifier node.
-        let x_node = tree.find_named_nodes(Some("x"), Some("identifier"))[0];
-        let raw_byte_col_plus_one = x_node.start_position().column + 1;
-
-        // LocatedNode::new_cst must produce a UTF-16 col ≠ raw byte col + 1.
-        let idx = LineColumnIndex::new(source);
-        let located = LocatedNode::new_cst(x_node, tree.text(x_node), &idx);
-        let located_col = match located {
-            LocatedNode::Cst { col, .. } => col,
-            _ => panic!("expected Cst variant"),
-        };
-
-        // UTF-16 col should be smaller than raw byte col on multibyte input.
-        assert_ne!(
-            located_col, raw_byte_col_plus_one,
-            "LocatedNode col should be UTF-16, not raw byte col + 1 (raw={raw_byte_col_plus_one})"
-        );
-        // Sanity check: for this specific source, UTF-16 col must be 7.
-        // prefix "🚀var " = 2+1+1+1+1 = 6 UTF-16 units → col 7
-        assert_eq!(located_col, 7, "UTF-16 col of `x` after 🚀 should be 7");
-
-        // Additionally, verify that for an ASCII source the values agree (regression guard).
-        let ascii_source = "var x = 1;";
-        let ascii_tree = TsTree::new(ascii_source, Language::JavaScript);
-        let ascii_x = ascii_tree.find_named_nodes(Some("x"), Some("identifier"))[0];
-        let ascii_raw = ascii_x.start_position().column + 1;
-        let ascii_idx = LineColumnIndex::new(ascii_source);
-        let ascii_located = LocatedNode::new_cst(ascii_x, ascii_tree.text(ascii_x), &ascii_idx);
-        let ascii_col = match ascii_located {
-            LocatedNode::Cst { col, .. } => col,
-            _ => panic!("expected Cst variant"),
-        };
-        assert_eq!(
-            ascii_col, ascii_raw,
-            "ASCII: UTF-16 col should equal raw byte col + 1"
-        );
-    }
 }
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
index 8761aad60..9b47aabeb 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
@@ -424,9 +424,9 @@ serialized;
             let start = node.start_position();
             let end = node.end_position();
             region.start_line == (start.row as u32) + 1
-                && region.start_col == idx.byte_col_to_utf16_col(start.row, start.column)
+                && region.start_col == idx.byte_col_to_utf16_col(start.row, start.column).unwrap()
                 && region.end_line == (end.row as u32) + 1
-                && region.end_col == idx.byte_col_to_utf16_col(end.row, end.column)
+                && region.end_col == idx.byte_col_to_utf16_col(end.row, end.column).unwrap()
         }
 
         // language=java
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
index 996d0acaf..77c56cbb5 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/js/ts_node.rs
@@ -27,11 +27,6 @@ pub struct TreeSitterNode<T> {
 impl<T> TreeSitterNode<T> {
     /// Converts the provided [`tree_sitter::Node`] into a `TreeSitterNode`, assigning the
     /// provided id.
-    ///
-    /// Columns are produced as **1-based UTF-16 code-unit** values using `idx`, which must be
-    /// built from the same source string that was parsed to obtain `node`.  Use this constructor
-    /// everywhere the source string is available — it is the single source of truth for
-    /// `Position.col` semantics.
     pub fn from_ts_node_with_index(
         id: NodeId,
         node: tree_sitter::Node,
@@ -48,9 +43,13 @@ impl<T> TreeSitterNode<T> {
         Self {
             id,
             start_line: (start_row as u32) + 1,
-            start_col: idx.byte_col_to_utf16_col(start_row, start_byte_col),
+            start_col: idx
+                .byte_col_to_utf16_col(start_row, start_byte_col)
+                .unwrap_or(start_byte_col as u32 + 1),
             end_line: (end_row as u32) + 1,
-            end_col: idx.byte_col_to_utf16_col(end_row, end_byte_col),
+            end_col: idx
+                .byte_col_to_utf16_col(end_row, end_byte_col)
+                .unwrap_or(end_byte_col as u32 + 1),
             node_type_id: node.kind_id(),
             _pd: PhantomData,
         }
@@ -178,66 +177,6 @@ mod tests {
         assert_eq!(tree_sitter_node.end_col, 15);
     }
 
-    /// Mirrors [`ts_node_line_col_one_based`] but with multibyte content.
-    ///
-    /// Source: `"\u{1F680}"; foo(bar);`
-    ///   The emoji lives in a string literal so `foo` is a distinct identifier.
-    ///   Bytes before `foo`: `"` (1) + 🚀 (4) + `"` (1) + `;` (1) + ` ` (1) = 8 bytes.
-    ///   UTF-16 prefix:      `"` (1) + 🚀 (2) + `"` (1) + `;` (1) + ` ` (1) = 6 units → col 7.
-    #[test]
-    fn ts_node_line_col_multibyte() {
-        let lang = get_tree_sitter_language(&Language::JavaScript);
-        let mut parser = tree_sitter::Parser::new();
-        parser.set_language(&lang).unwrap();
-
-        // Source: `"\u{1F680}"; foo(bar);`
-        // 🚀 is placed inside a string literal so `foo` is clearly a separate identifier.
-        let source = "\"\u{1F680}\"; foo(bar);";
-        let tree = parser.parse(source, None).unwrap();
-
-        // The second statement is `foo(bar);`
-        let root = tree.root_node();
-        let call_node = root
-            .child(1) // second expression_statement
-            .unwrap()
-            .child(0) // call_expression
-            .unwrap();
-
-        // `foo` starts at byte 8 ("🚀" = 6 bytes + "; " = 2 bytes).
-        let fn_node = call_node.child_by_field_name("function").unwrap();
-        assert_eq!(fn_node.start_position().column, 8, "byte col of `foo` is 8");
-
-        let idx = LineColumnIndex::new(source);
-        let ts_node = TreeSitterNode::<Instance>::from_ts_node_with_index(0, fn_node, &idx);
-        assert_eq!(ts_node.start_line, 1);
-        // UTF-16 col 7: " (1) + 🚀 surrogate pair (2) + " (1) + ; (1) + space (1) = 6 → col 7.
-        assert_eq!(
-            ts_node.start_col, 7,
-            "UTF-16 col of `foo` should be 7 (emoji as string literal)"
-        );
-
-        // Also verify with a CJK prefix: `"日"; foo(bar);`
-        // 日 = 3 UTF-8 bytes, 1 UTF-16 unit.
-        // Bytes before foo: " (1) + 日 (3) + " (1) + ; (1) + space (1) = 7 bytes.
-        // UTF-16: " (1) + 日 (1) + " (1) + ; (1) + space (1) = 5 units → col 6.
-        let source_cjk = "\"\u{65E5}\"; foo(bar);";
-        let tree_cjk = parser.parse(source_cjk, None).unwrap();
-        let call_cjk = tree_cjk.root_node().child(1).unwrap().child(0).unwrap();
-        let fn_cjk = call_cjk.child_by_field_name("function").unwrap();
-        assert_eq!(
-            fn_cjk.start_position().column,
-            7,
-            "byte col of `foo` after 日 is 7"
-        );
-        let idx_cjk = LineColumnIndex::new(source_cjk);
-        let ts_node_cjk = TreeSitterNode::<Instance>::from_ts_node_with_index(1, fn_cjk, &idx_cjk);
-        // UTF-16 col 6: " (1) + 日 (1) + " (1) + ; (1) + space (1) = 5 → col 6.
-        assert_eq!(
-            ts_node_cjk.start_col, 6,
-            "UTF-16 col of `foo` after 日 should be 6"
-        );
-    }
-
     /// Tests that we use the [`tree_sitter::Node::kind_id`] instead of the [`tree_sitter::Node::grammar_id`], which
     /// has subtle differences.
     #[rustfmt::skip]
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
index 8d3f9cff4..0baf22e77 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/ops.rs
@@ -108,8 +108,6 @@ pub fn op_ts_node_named_children<'s>(
         // ts_node_bridge and ctx_bridge are separate RefCells so the concurrent borrows are valid.
         let ctx_bridge = state.borrow::<Rc<RefCell<bridge::ContextBridge>>>();
         let ctx_borrow = ctx_bridge.borrow();
-        // Use the pre-computed index cached in RootContext (O(1)) rather than
-        // rescanning the source on every namedChildren call.
         let idx = ctx_borrow
             .ddsa_root_context()
             .line_column_index()
@@ -168,8 +166,6 @@ pub fn op_ts_node_parent(
         OpSafeRawTSNode::from_root_context(root_ctx, |ctx| ctx.get_ts_node_parent(ts_node))?;
     let parent_ts_node = safe_raw_parent.to_node();
 
-    // Use the pre-computed index cached in RootContext (O(1)) rather than
-    // rescanning the source on every parent call.
     let idx = root_ctx
         .line_column_index()
         .expect("tree text must be set before ops run");
@@ -267,8 +263,6 @@ pub fn op_digraph_adjacency_list_to_dot(
     let text = root_ctx
         .get_text()
         .expect("tree text should always be `Some` during rule execution");
-    // Use the pre-computed index cached in RootContext (O(1)) rather than rebuilding per-vertex.
-    // `set_text` always builds the index alongside `tree_text`, so this is always `Some` here.
     let lc_idx = root_ctx
         .line_column_index()
         .expect("tree text must be set before ops run");
diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
index b4322fb7f..ecbacda0d 100644
--- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
+++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
@@ -1448,47 +1448,6 @@ function visit(query, filename, code) {
         assert_eq!(*violation, expected);
     }
 
-    /// `stella_compat.js::getCode` correctly extracts an identifier that follows a multibyte
-    /// character on the same line.
-    ///
-    /// The source is `"\u{1F680}"; protectedName = 1;` — the emoji sits inside a string literal
-    /// so that `protectedName` is a distinct, standalone identifier token.
-    /// Bytes before `protectedName`: `"` (1) + 🚀 (4) + `"` (1) + `;` (1) + ` ` (1) = 8 bytes.
-    /// UTF-16 prefix: `"` (1) + 🚀 surrogate pair (2) + `"` (1) + `;` (1) + ` ` (1) = 6 units → col 7.
-    /// With UTF-16 columns, `col - 1` correctly indexes into the JS string via `substring`,
-    /// so `getCodeForNode(node, code)` returns `"protectedName"`.
-    #[test]
-    fn stella_compat_getcode_multibyte() {
-        let mut rt = cfg_test_v8().new_runtime();
-        // 🚀 is placed in a string literal so that `protectedName` is a distinct identifier.
-        // Bytes before `protectedName`: " (1) + 🚀 (4) + " (1) + ; (1) + space (1) = 8 bytes.
-        // UTF-16 col: " (1) + 🚀 surrogate pair (2) + " (1) + ; (1) + space (1) = 6 units → col 7.
-        let text = "\"\u{1F680}\"; protectedName = 1;";
-        let filename = "some_filename.js";
-        let ts_query = r#"((identifier) @cap_name (#eq? @cap_name "protectedName"))"#;
-        let rule = r#"
-function visit(query, filename, code) {
-    const node = query.captures["cap_name"];
-    const nodeText = getCodeForNode(node, code);
-    const error = buildError(
-        node.start.line,
-        node.start.col,
-        node.end.line,
-        node.end.col,
-        nodeText
-    );
-    addError(error);
-}
-"#;
-        let violations =
-            shorthand_execute_rule_internal(&mut rt, text, filename, ts_query, rule, None).unwrap();
-        assert_eq!(violations.len(), 1);
-        let violation = violations.first().unwrap();
-        assert_eq!(violation.message, "protectedName");
-        // UTF-16 col 7: "(1) + 🚀 surrogate pair(2) + "(1) + ;(1) + space(1) = 6 units → col 7.
-        assert_eq!(violation.base_region.start_col, 7);
-    }
-
     /// Passing a `VisitArgFilenameCompat` or `VisitArgCodeCompat` instance directly into console.log
     /// returns the contents of the underlying string.
     #[test]
diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
index 7b2cebc2f..16865a810 100644
--- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
+++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs
@@ -340,10 +340,6 @@ pub fn get_query_nodes(
 // map a node from the tree-sitter representation into our own internal representation
 // this is the representation that is passed to the JavaScript layer and how we represent
 // or expose the node to the end-user.
-//
-// `idx` is a pre-built [`LineColumnIndex`] for the source that produced `node`; it is used to
-// convert tree-sitter's 0-based UTF-8 byte columns into 1-based UTF-16 code-unit columns so
-// that `Position.col` is consistent with LSP / VS Code / SARIF v2.1 semantics.
 pub fn map_node(node: tree_sitter::Node, idx: &LineColumnIndex) -> Option<TreeSitterNode> {
     fn map_node_internal(
         cursor: &mut tree_sitter::TreeCursor,
@@ -380,11 +376,15 @@ pub fn map_node(node: tree_sitter::Node, idx: &LineColumnIndex) -> Option<TreeSi
             ast_type: cursor.node().kind().to_string(),
             start: Position {
                 line: u32::try_from(start_point.row + 1).unwrap(),
-                col: idx.byte_col_to_utf16_col(start_point.row, start_point.column),
+                col: idx
+                    .byte_col_to_utf16_col(start_point.row, start_point.column)
+                    .unwrap_or(start_point.column as u32 + 1),
             },
             end: Position {
                 line: u32::try_from(end_point.row + 1).unwrap(),
-                col: idx.byte_col_to_utf16_col(end_point.row, end_point.column),
+                col: idx
+                    .byte_col_to_utf16_col(end_point.row, end_point.column)
+                    .unwrap_or(end_point.column as u32 + 1),
             },
             field_name: cursor.field_name().map(ToString::to_string),
             children,
@@ -758,91 +758,6 @@ SELECT * FROM table WHERE column = 'value';
         assert!(query_node.captures.contains_key("classname"));
     }
 
-    /// `map_node` emits 1-based UTF-16 columns.  An emoji (U+1F680, 4 UTF-8 bytes, 2 UTF-16
-    /// code units) before a named identifier must shift subsequent col values accordingly.
-    ///
-    /// Source: `x = "🚀"; num = 5`
-    /// UTF-8 byte layout (0-indexed, line 0):
-    ///   x(0) ' '(1) =(2) ' '(3) "(4) 🚀(5-8) "(9) ;(10) ' '(11) n(12) u(13) m(14) ...
-    ///
-    /// `num` starts at byte 12.  UTF-16 prefix for bytes 0..12:
-    ///   x(1) + ' '(1) + =(1) + ' '(1) + "(1) + 🚀(2) + "(1) + ;(1) + ' '(1) = 10 UTF-16 units
-    ///   → `num` is at 1-based UTF-16 col 11.
-    #[test]
-    fn test_map_node_multibyte_emoji() {
-        let source_code = "x = \"\u{1F680}\"; num = 5";
-        let tree = get_tree(source_code, &Language::Python).unwrap();
-        let idx = LineColumnIndex::new(source_code);
-        let root = map_node(tree.root_node(), &idx).unwrap();
-        // We parse "x = "🚀"; num = 5" as Python.
-        // The second assignment has `num` as the left side (an identifier).
-        // Byte 12 = 'n', UTF-16 col = 11.
-        let second_stmt = root.children.get(1).unwrap(); // second expression_statement
-        let assignment = second_stmt.children.get(0).unwrap();
-        // In Python AST, the left side of `num = 5` is an identifier.
-        let num_node = assignment
-            .children
-            .iter()
-            .find(|c| c.ast_type == "identifier")
-            .unwrap();
-        // Byte offset of 'n' in `x = "🚀"; num = 5`:
-        //   "x = \"" = 5 bytes, "🚀" = 4 bytes, "\"; " = 3 bytes = total 12 bytes before 'n'
-        // UTF-16 col: x(1),' '(1),=(1),' '(1),"(1),🚀_high(1),🚀_low(1),"(1),;(1),' '(1) = 10 → col 11
-        assert_eq!(num_node.start.col, 11, "UTF-16 col of `num` should be 11");
-        assert_eq!(num_node.start.line, 1);
-    }
-
-    /// `get_query_nodes` emits 1-based UTF-16 columns for CJK characters
-    /// (3 UTF-8 bytes, 1 UTF-16 code unit each).
-    ///
-    /// Source (Python): `日 = 1; end = 2`
-    /// 日 = 3 UTF-8 bytes, 1 UTF-16 unit.
-    ///
-    /// `end` starts at byte 9:  日(3) + ' '(1) + =(1) + ' '(1) + 1(1) + ;(1) + ' '(1) = 9 bytes.
-    /// UTF-16 prefix for those 9 bytes:
-    ///   日(1) + ' '(1) + =(1) + ' '(1) + 1(1) + ;(1) + ' '(1) = 7 UTF-16 units → 1-based col 8.
-    #[test]
-    fn test_get_query_nodes_cjk() {
-        // Python: assign 日 = 1, then end = 2
-        let code = "\u{65E5} = 1; end = 2"; // 日 = 1; end = 2
-        let q = "(identifier) @id";
-        let tree = get_tree(code, &Language::Python).unwrap();
-        let query = get_query(q, &Language::Python).expect("query defined");
-        let nodes = get_query_nodes(
-            &tree,
-            &query,
-            "f.py",
-            code,
-            &std::collections::HashMap::new(),
-        );
-
-        // 日 is at byte 0, UTF-16 col 1 (ASCII fast path applies to byte 0).
-        // But 日 itself is an identifier. Its start byte col is 0 → UTF-16 col 1.
-        let dai_col = nodes
-            .iter()
-            .flat_map(|mn| mn.captures.values())
-            .find(|n| n.start.col == 1)
-            .map(|n| n.start.col)
-            .unwrap_or(0);
-        assert_eq!(dai_col, 1, "日 starts at UTF-16 col 1");
-
-        // `end` starts at byte 11 in the source:
-        //   日(3) + ' '(1) + '='(1) + ' '(1) + '1'(1) + ';'(1) + ' '(1) = 9 bytes → byte 9
-        //   Wait, let me recount: 日=3, ' '=1 → 4, '='=1 → 5, ' '=1 → 6, '1'=1 → 7, ';'=1 → 8, ' '=1 → 9
-        //   So 'e' in 'end' is at byte 9.
-        //   UTF-16 prefix to byte 9: 日(1),' '(1),=(1),' '(1),1(1),;(1),' '(1) = 7 units → col 8.
-        let end_col = nodes
-            .iter()
-            .flat_map(|mn| mn.captures.values())
-            .find(|n| {
-                // The `end` identifier node is at col > 1.
-                n.start.col > 1
-            })
-            .map(|n| n.start.col)
-            .unwrap_or(0);
-        assert_eq!(end_col, 8, "UTF-16 col of `end` after CJK char should be 8");
-    }
-
     #[test]
     fn ts_query_cursor_matches_timeout() {
         let timeout = Duration::from_millis(500);
diff --git a/crates/static-analysis-server/src/tree_sitter_tree.rs b/crates/static-analysis-server/src/tree_sitter_tree.rs
index 905d6c8bc..c0d34d7a0 100644
--- a/crates/static-analysis-server/src/tree_sitter_tree.rs
+++ b/crates/static-analysis-server/src/tree_sitter_tree.rs
@@ -73,49 +73,6 @@ mod tests {
         assert_eq!("module", response.result.unwrap().ast_type);
     }
 
-    /// `process_tree_sitter_tree_request` emits 1-based UTF-16 columns for non-ASCII content.
-    ///
-    /// Source: `x = "🚀"; num = 5`
-    /// The emoji (🚀, U+1F680) is 4 UTF-8 bytes / 2 UTF-16 code units.  `num` starts at byte 12,
-    /// so its UTF-16 col = 10 (units before it) + 1 = 11.
-    #[test]
-    fn test_process_tree_sitter_tree_request_multibyte() {
-        // Source: `x = "🚀"; num = 5`
-        // printf 'x = "\xF0\x9F\x9A\x80"; num = 5' | base64
-        let request = TreeSitterRequest {
-            code_base64: "eCA9ICLwn5qAIjsgbnVtID0gNQ==".to_string(),
-            file_encoding: "utf-8".to_string(),
-            language: Language::Python,
-        };
-        let response = process_tree_sitter_tree_request(request);
-        assert!(response.errors.is_empty());
-        let root = response.result.unwrap();
-
-        /// Depth-first search for a node whose `start.col` matches `expected_col`.
-        fn find_col(
-            node: &crate::model::tree_sitter_tree_node::ServerTreeSitterNode,
-            expected_col: u32,
-        ) -> bool {
-            if node.start.col == expected_col {
-                return true;
-            }
-            node.children.iter().any(|c| find_col(c, expected_col))
-        }
-
-        // `num` starts at byte 12 in `x = "🚀"; num = 5`.
-        // UTF-16 units before it: x(1) ' '(1) =(1) ' '(1) "(1) 🚀(2) "(1) ;(1) ' '(1) = 10 → col 11.
-        assert!(
-            find_col(&root, 11),
-            "expected a node with UTF-16 start.col == 11 (position of `num` after 🚀)"
-        );
-
-        // Ensure that no node reports col 13 (what the raw byte col + 1 would give for `num`).
-        assert!(
-            !find_col(&root, 13),
-            "no node should report raw byte col 13 — that would indicate the UTF-16 fix is not applied"
-        );
-    }
-
     #[test]
     fn test_process_tree_sitter_invalid_base64() {
         let request = TreeSitterRequest {