Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions src/reviewer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,6 @@ def cmd_review(args: argparse.Namespace) -> None:
method = args.method
print(f"Running method: {method}...")

if args.skip_nonsubstantial == "true":
skip = True
else:
skip = False

reasoning = getattr(args, "reasoning_effort", None)

if method == "zero_shot":
Expand All @@ -117,7 +112,6 @@ def cmd_review(args: argparse.Namespace) -> None:
slug, content,
model=args.model,
reasoning_effort=reasoning,
skip_nonsubstantial=skip,
ocr=was_ocr,
)
result = full if method == "progressive_full" else consolidated
Expand Down Expand Up @@ -521,12 +515,6 @@ def main() -> None:
default=None,
help="Reasoning effort level (default: adaptive/auto)",
)
review_parser.add_argument(
"--skip-nonsubstantial",
choices=["true", "false"],
default="false",
help="Skip non-substantial passages (default: false)",
)
review_parser.add_argument(
"--ocr",
choices=["mistral", "deepseek", "marker", "pymupdf"],
Expand Down
25 changes: 22 additions & 3 deletions src/reviewer/method_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,28 @@
from .client import chat
from .models import ReviewResult
from .prompts import DEEP_CHECK_PROMPT, OCR_CAVEAT, OVERALL_FEEDBACK_PROMPT
from .utils import count_tokens, locate_comments_in_window, parse_comments_from_list, split_into_paragraphs
from .utils import count_tokens, locate_comments_in_window, parse_comments_from_list


def split_into_paragraphs(text: str, min_chars: int = 100) -> list[str]:
"""Split document into paragraphs, merging short ones with the next."""
raw = [p.strip() for p in text.split("\n\n") if p.strip()]
paragraphs: list[str] = []
carry = ""
for p in raw:
if carry:
p = carry + "\n\n" + p
carry = ""
if len(p) < min_chars:
carry = p
else:
paragraphs.append(p)
if carry:
if paragraphs:
paragraphs[-1] = paragraphs[-1] + "\n\n" + carry
else:
paragraphs.append(carry)
return paragraphs


def merge_into_chunks(
Expand Down Expand Up @@ -45,10 +66,8 @@ def get_chunk_window_context(
"""Get surrounding passages as context (asymmetric: more before, less after)."""
before = window + 2
after = max(1, window - 1)

start = max(0, chunk_idx - before)
end = min(len(chunks), chunk_idx + after + 1)

context_parts = []
for i in range(start, end):
_, text = chunks[i]
Expand Down
49 changes: 35 additions & 14 deletions src/reviewer/method_progressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Processes the paper sequentially, maintaining a running summary of definitions,
equations, theorems, and key claims. For each passage:
1. (Optional) Pre-filter to skip non-substantial content
1. (Optional) Pre-filter to skip non-technical content
2. Deep-check: running summary + window context + passage → find errors
3. Summary update: current summary + passage → updated summary
4. Post-hoc consolidation: one final call to deduplicate and prune low-confidence issues
Expand All @@ -16,19 +16,40 @@
from .models import ReviewResult
from .prompts import (
CONSOLIDATION_PROMPT,
DEEP_CHECK_PROMPT,
DEEP_CHECK_PROGRESSIVE_PROMPT as DEEP_CHECK_PROMPT,
OCR_CAVEAT,
OVERALL_FEEDBACK_PROMPT,
SUMMARY_UPDATE_PROMPT,
SUBSTANTIAL_FILTER_PROMPT,
TECHNICAL_FILTER_PROMPT,
)
from .utils import count_tokens, locate_comments_in_window, parse_comments_from_list, split_into_paragraphs
from .utils import count_tokens, locate_comments_in_window, parse_comments_from_list


# ---------------------------------------------------------------------------
# Paragraph / passage helpers
# ---------------------------------------------------------------------------

def split_into_paragraphs(text: str, min_chars: int = 100) -> list[str]:
"""Split document into paragraphs, merging short ones with the next."""
raw = [p.strip() for p in text.split("\n\n") if p.strip()]
paragraphs: list[str] = []
carry = ""
for p in raw:
if carry:
p = carry + "\n\n" + p
carry = ""
if len(p) < min_chars:
carry = p
else:
paragraphs.append(p)
if carry:
if paragraphs:
paragraphs[-1] = paragraphs[-1] + "\n\n" + carry
else:
paragraphs.append(carry)
return paragraphs


def merge_into_passages(
paragraphs: list[str],
target_chars: int = 8000,
Expand Down Expand Up @@ -114,14 +135,14 @@ def update_running_summary(
return updated


def is_substantial_passage(
def is_technical_passage(
passage_text: str,
model: str,
result: ReviewResult,
reasoning_effort: str | None = None,
) -> bool:
"""Use the model to decide if a passage has substantial content worth checking."""
prompt = SUBSTANTIAL_FILTER_PROMPT.format(passage=passage_text[:2000])
"""Use the model to decide if a passage has technical content worth checking."""
prompt = TECHNICAL_FILTER_PROMPT.format(passage=passage_text[:2000])
response, usage = chat(
messages=[{"role": "user", "content": prompt}],
model=model,
Expand Down Expand Up @@ -185,14 +206,14 @@ def review_progressive(
document_content: str,
model: str = "anthropic/claude-opus-4-6",
reasoning_effort: str | None = None,
skip_nonsubstantial: bool = False,
skip_nontechnical: bool = False,
window_size: int = 3,
ocr: bool = False,
) -> tuple[ReviewResult, ReviewResult]:
"""Review a paper using progressive summary approach.

Processes the paper sequentially. For each passage:
1. (Optional) Pre-filter non-substantial content
1. (Optional) Pre-filter non-technical content
2. Deep-check with running summary + window context
3. Update the running summary
Then consolidate all comments in a final pass.
Expand Down Expand Up @@ -222,10 +243,10 @@ def review_progressive(
_, passage_text = passages[idx]

# Step 0: Optional pre-filter
if skip_nonsubstantial:
if not is_substantial_passage(passage_text, model, result, reasoning_effort):
if skip_nontechnical:
if not is_technical_passage(passage_text, model, result, reasoning_effort):
skipped += 1
print(f" Passage {idx+1}/{len(passages)}: SKIPPED (non-substantial)")
print(f" Passage {idx+1}/{len(passages)}: SKIPPED (non-technical)")
# Still update summary even for skipped passages (may have definitions)
running_summary = update_running_summary(
current_summary=running_summary,
Expand Down Expand Up @@ -296,8 +317,8 @@ def review_progressive(
max_summary_tokens=max_summary_tokens,
)

if skip_nonsubstantial:
print(f" Skipped {skipped}/{len(passages)} non-substantial passages")
if skip_nontechnical:
print(f" Skipped {skipped}/{len(passages)} non-technical passages")

# Generate overall feedback
paper_start = document_content[:8000]
Expand Down
4 changes: 2 additions & 2 deletions src/reviewer/method_zero_shot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from .client import chat
from .models import ReviewResult
from .prompts import ZERO_SHOT_CHUNK_PROMPT, OCR_CAVEAT, ZERO_SHOT_PROMPT
from .prompts import LARGE_PAPER_CHUNK_PROMPT, OCR_CAVEAT, ZERO_SHOT_PROMPT
from .utils import assign_paragraph_indices, chunk_text, count_tokens, parse_review_response

MAX_TOKENS_SINGLE = 100_000 # use single prompt if paper fits
Expand Down Expand Up @@ -44,7 +44,7 @@ def review_zero_shot(
overall_parts = []
for i, chunk in enumerate(chunks):
ocr_caveat = OCR_CAVEAT if ocr else ""
prompt = ZERO_SHOT_CHUNK_PROMPT.format(
prompt = LARGE_PAPER_CHUNK_PROMPT.format(
chunk_num=i + 1,
total_chunks=len(chunks),
chunk_text=chunk,
Expand Down
Loading