From f795261f6dfd8336a72591e4f7420771e6c4f9d7 Mon Sep 17 00:00:00 2001 From: Philippe Granger <90652303+ppgranger@users.noreply.github.com> Date: Sat, 28 Mar 2026 16:10:46 +0100 Subject: [PATCH] fix: bugs and code deduplication in processors - Fix missing @ separator in Pipfile.lock package formatting - Fix overly permissive tree summary regex in file_listing - Fix extra newlines in generic truncation message - Deduplicate poetry.lock/Cargo.lock parsers into _compress_toml_lock - Extract shared Rust compiler regex patterns to utils.py --- src/processors/cargo.py | 40 ++++++++++++++++++---------------- src/processors/cargo_clippy.py | 25 +++++++++++---------- src/processors/file_content.py | 33 ++++++++-------------------- src/processors/file_listing.py | 2 +- src/processors/generic.py | 2 +- src/processors/utils.py | 8 +++++++ 6 files changed, 53 insertions(+), 57 deletions(-) diff --git a/src/processors/cargo.py b/src/processors/cargo.py index f40d52f..7a48513 100644 --- a/src/processors/cargo.py +++ b/src/processors/cargo.py @@ -5,17 +5,19 @@ from .. import config from .base import Processor +from .utils import ( + RUST_COMPILING_RE, + RUST_ERROR_START_RE, + RUST_FINISHED_RE, + RUST_SPAN_LINE_RE, + RUST_WARNING_START_RE, + RUST_WARNING_SUMMARY_RE, +) _CARGO_CMD_RE = re.compile(r"\bcargo\s+(build|check|doc|update|bench)\b") -_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") _DOWNLOADING_RE = re.compile(r"^\s*Downloading\s+\S+\s+v") _DOCUMENTING_RE = re.compile(r"^\s*Documenting\s+\S+\s+v") _RUNNING_RE = re.compile(r"^\s*Running\s+") -_FINISHED_RE = re.compile(r"^\s*Finished\s+") -_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") -_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") -_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") -_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") _UPDATE_LINE_RE = re.compile( r"^\s*(Updating|Removing|Adding)\s+(\S+)\s+v([\d.]+)(?:\s*->\s*v([\d.]+))?" ) @@ -89,7 +91,7 @@ def _process_cargo_build(self, output: str) -> str: for line in lines: stripped = line.strip() - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 continue if _DOWNLOADING_RE.match(stripped): @@ -97,7 +99,7 @@ def _process_cargo_build(self, output: str) -> str: continue # Error start - if _ERROR_START_RE.match(stripped): + if RUST_ERROR_START_RE.match(stripped): # Flush current warning block if current_type and current_block: warnings_by_type[current_type].append(current_block) @@ -111,8 +113,8 @@ def _process_cargo_build(self, output: str) -> str: continue # Warning start - wm = _WARNING_START_RE.match(stripped) - if wm and not _WARNING_SUMMARY_RE.match(stripped): + wm = RUST_WARNING_START_RE.match(stripped) + if wm and not RUST_WARNING_SUMMARY_RE.match(stripped): # Flush previous if in_error and current_error: error_blocks.append(current_error) @@ -127,7 +129,7 @@ def _process_cargo_build(self, output: str) -> str: current_block = [line] continue - if _WARNING_SUMMARY_RE.match(stripped): + if RUST_WARNING_SUMMARY_RE.match(stripped): if current_type and current_block: warnings_by_type[current_type].append(current_block) current_block = [] @@ -139,7 +141,7 @@ def _process_cargo_build(self, output: str) -> str: warning_summary_lines.append(line) continue - if _FINISHED_RE.match(stripped): + if RUST_FINISHED_RE.match(stripped): if current_type and current_block: warnings_by_type[current_type].append(current_block) current_block = [] @@ -201,16 +203,16 @@ def _process_cargo_doc(self, output: str) -> str: for line in lines: stripped = line.strip() - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 elif _DOCUMENTING_RE.match(stripped): documenting_count += 1 elif ( - _FINISHED_RE.match(stripped) + RUST_FINISHED_RE.match(stripped) or re.match(r"^\s*Generated\s+", stripped) or re.search(r"\bwarning\b", stripped) - or _ERROR_START_RE.match(stripped) - or (_SPAN_LINE_RE.match(stripped) and result) + or RUST_ERROR_START_RE.match(stripped) + or (RUST_SPAN_LINE_RE.match(stripped) and result) ): result.append(line) @@ -274,15 +276,15 @@ def _process_cargo_bench(self, output: str) -> str: for line in lines: stripped = line.strip() - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 elif _RUNNING_RE.match(stripped): continue elif ( re.match(r"^test\s+.+\s+bench:", stripped) or re.match(r"^test result:", stripped) - or _FINISHED_RE.match(stripped) - or _ERROR_START_RE.match(stripped) + or RUST_FINISHED_RE.match(stripped) + or RUST_ERROR_START_RE.match(stripped) ): result.append(line) diff --git a/src/processors/cargo_clippy.py b/src/processors/cargo_clippy.py index cfe83fd..65f03c2 100644 --- a/src/processors/cargo_clippy.py +++ b/src/processors/cargo_clippy.py @@ -5,15 +5,16 @@ from .. import config from .base import Processor +from .utils import ( + RUST_COMPILING_RE, + RUST_ERROR_START_RE, + RUST_FINISHED_RE, + RUST_WARNING_START_RE, + RUST_WARNING_SUMMARY_RE, +) _CLIPPY_CMD_RE = re.compile(r"\bcargo\s+clippy\b") -_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") -_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") -_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") -_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") -_FINISHED_RE = re.compile(r"^\s*Finished\s+") _CHECKING_RE = re.compile(r"^\s*Checking\s+\S+\s+v") -_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") # Clippy lint categories _CLIPPY_CATEGORIES = { @@ -83,12 +84,12 @@ def process(self, command: str, output: str) -> str: if _CHECKING_RE.match(stripped): checking_count += 1 continue - if _COMPILING_RE.match(stripped): + if RUST_COMPILING_RE.match(stripped): compiling_count += 1 continue # Error start - if _ERROR_START_RE.match(stripped): + if RUST_ERROR_START_RE.match(stripped): # Flush current warning block if current_rule and current_block: warnings_by_rule[current_rule].append(current_block) @@ -102,8 +103,8 @@ def process(self, command: str, output: str) -> str: continue # Warning start - wm = _WARNING_START_RE.match(stripped) - if wm and not _WARNING_SUMMARY_RE.match(stripped): + wm = RUST_WARNING_START_RE.match(stripped) + if wm and not RUST_WARNING_SUMMARY_RE.match(stripped): # Flush previous if in_error and current_error: error_blocks.append(current_error) @@ -117,7 +118,7 @@ def process(self, command: str, output: str) -> str: current_block = [line] continue - if _WARNING_SUMMARY_RE.match(stripped): + if RUST_WARNING_SUMMARY_RE.match(stripped): if current_rule and current_block: warnings_by_rule[current_rule].append(current_block) current_block = [] @@ -129,7 +130,7 @@ def process(self, command: str, output: str) -> str: summary_lines.append(line) continue - if _FINISHED_RE.match(stripped): + if RUST_FINISHED_RE.match(stripped): if current_rule and current_block: warnings_by_rule[current_rule].append(current_block) current_block = [] diff --git a/src/processors/file_content.py b/src/processors/file_content.py index 52c5d39..e46043b 100644 --- a/src/processors/file_content.py +++ b/src/processors/file_content.py @@ -401,8 +401,8 @@ def _compress_yarn_lock(self, lines: list[str], total: int) -> str: result.append(f" ... ({len(deps) - 50} more)") return "\n".join(result) - def _compress_poetry_lock(self, lines: list[str], total: int) -> str: - """poetry.lock: extract [[package]] name and version.""" + def _compress_toml_lock(self, lines: list[str], total: int, label: str) -> str: + """Extract [[package]] name and version from TOML lock files (poetry.lock, Cargo.lock).""" deps = [] current_name = None for line in lines: @@ -417,35 +417,20 @@ def _compress_poetry_lock(self, lines: list[str], total: int) -> str: deps.append(f"{current_name}@{val}") current_name = None - result = [f"poetry.lock ({len(deps)} packages, {total} lines):"] + result = [f"{label} ({len(deps)} packages, {total} lines):"] for d in deps[:50]: result.append(f" {d}") if len(deps) > 50: result.append(f" ... ({len(deps) - 50} more)") return "\n".join(result) + def _compress_poetry_lock(self, lines: list[str], total: int) -> str: + """poetry.lock: extract [[package]] name and version.""" + return self._compress_toml_lock(lines, total, "poetry.lock") + def _compress_cargo_lock(self, lines: list[str], total: int) -> str: """Cargo.lock: extract [[package]] name and version.""" - deps = [] - current_name = None - for line in lines: - stripped = line.strip() - if stripped == "[[package]]": - current_name = None - elif stripped.startswith("name = "): - val = stripped.split('"')[1] if '"' in stripped else stripped.split("=")[1].strip() - current_name = val - elif stripped.startswith("version = ") and current_name: - val = stripped.split('"')[1] if '"' in stripped else stripped.split("=")[1].strip() - deps.append(f"{current_name}@{val}") - current_name = None - - result = [f"Cargo.lock ({len(deps)} packages, {total} lines):"] - for d in deps[:50]: - result.append(f" {d}") - if len(deps) > 50: - result.append(f" ... ({len(deps) - 50} more)") - return "\n".join(result) + return self._compress_toml_lock(lines, total, "Cargo.lock") def _compress_json_lock(self, raw: str, total: int) -> str: """composer.lock / Pipfile.lock: extract package names + versions from JSON.""" @@ -465,7 +450,7 @@ def _compress_json_lock(self, raw: str, total: int) -> str: for section in ("default", "develop"): for name, info in data.get(section, {}).items(): version = info.get("version", "?") if isinstance(info, dict) else "?" - deps.append(f"{name}{version}") + deps.append(f"{name}@{version}") result = [f"lock file ({len(deps)} packages, {total} lines):"] for d in deps[:50]: diff --git a/src/processors/file_listing.py b/src/processors/file_listing.py index b607877..9f71b6c 100644 --- a/src/processors/file_listing.py +++ b/src/processors/file_listing.py @@ -169,7 +169,7 @@ def _process_tree(self, output: str) -> str: # Find the summary line (usually last line like "X directories, Y files") summary = "" for line in reversed(lines): - if re.match(r"\d+\s+director", line): + if re.match(r"\d+\s+director(?:ies|y)\b", line): summary = line break diff --git a/src/processors/generic.py b/src/processors/generic.py index db20c07..9ae0179 100644 --- a/src/processors/generic.py +++ b/src/processors/generic.py @@ -209,6 +209,6 @@ def _truncate_middle(self, lines: list[str]) -> list[str]: removed = total - keep_head - keep_tail return [ *lines[:keep_head], - f"\n... ({removed} lines truncated, {total} total) ...\n", + f"... ({removed} lines truncated, {total} total) ...", *lines[-keep_tail:], ] diff --git a/src/processors/utils.py b/src/processors/utils.py index 619593f..b2a1d5d 100644 --- a/src/processors/utils.py +++ b/src/processors/utils.py @@ -3,6 +3,14 @@ import re from collections import defaultdict +# Shared Rust compiler output patterns (used by cargo and cargo_clippy processors) +RUST_WARNING_START_RE = re.compile(r"^warning(?:\[(\S+)\])?:\s+(.+)") +RUST_ERROR_START_RE = re.compile(r"^error(?:\[(\S+)\])?:\s+(.+)") +RUST_SPAN_LINE_RE = re.compile(r"^\s*(-->|\d+\s*\||=\s+)") +RUST_WARNING_SUMMARY_RE = re.compile(r"^warning:\s+.+generated\s+\d+\s+warning") +RUST_FINISHED_RE = re.compile(r"^\s*Finished\s+") +RUST_COMPILING_RE = re.compile(r"^\s*Compiling\s+\S+\s+v") + _DEFAULT_ERROR_RE = re.compile( r"\b(error|Error|ERROR|exception|Exception|EXCEPTION|" r"fatal|Fatal|FATAL|panic|Panic|PANIC|traceback|Traceback)\b"