Skip to content

Commit dec53b5

Browse files
authored
Enhance LFS error handling and logging in fetch and push operations (#8)
* Add error handling for LFS fetch and push operations with logging * Enhance LFS handling and review logging in absorb process * Fix LFS fetch references in absorb and publish methods
1 parent 77e7541 commit dec53b5

6 files changed

Lines changed: 157 additions & 20 deletions

File tree

SPEC.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ pubgate supports repositories that use Git LFS. LFS support is auto-detected via
150150

151151
**How it works:** LFS-tracked files are stored as pointer files in git. pubgate reads and writes these pointers as-is; they pass through the snapshot, stage, and publish pipelines without modification. When files are staged with `git add`, git's clean/smudge filters handle the LFS encoding automatically via `.gitattributes`.
152152

153-
**LFS object transfer:** pubgate runs `git lfs fetch` during command startups (absorb, publish) to ensure LFS objects are locally cached, and `git lfs push` after pushing branches to transfer LFS objects to the destination remote's LFS server.
153+
**LFS object transfer:** pubgate runs `git lfs fetch` before file operations that need blob content (absorb, publish), skipping the fetch when there is no work to do (e.g. already up to date) or during `--dry-run`. After pushing branches, `git lfs push` transfers LFS objects to the destination remote's LFS server.
154154

155155
**Limitations:**
156156
- **LFS files are treated as binary**: they are never merged during `absorb` (copied/overwritten instead) and never scrubbed for `BEGIN-INTERNAL`/`END-INTERNAL` markers during `stage`. Do not place internal markers inside LFS-tracked files; use ignore patterns in `pubgate.toml` to exclude sensitive LFS files from publication.

pubgate/absorb.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def absorb_commit_message(
6767
last_absorbed: str,
6868
public_head: str,
6969
conflicted: list[str] | None = None,
70+
needs_review: list[str] | None = None,
7071
) -> str:
7172
subject = f"pubgate: absorb public changes {last_absorbed[:7]}..{public_head[:7]}"
7273
commits = git.log_oneline(last_absorbed, public_head)
@@ -80,6 +81,11 @@ def absorb_commit_message(
8081
lines.append("CONFLICTS (resolve before merging):")
8182
for path in conflicted:
8283
lines.append(f" {path}")
84+
if needs_review:
85+
lines.append("")
86+
lines.append("Review manually:")
87+
for item in needs_review:
88+
lines.append(f" {item}")
8389
return "\n".join(lines)
8490

8591

@@ -95,6 +101,40 @@ def _read_text_at_ref(git: GitRepo, ref: str, path: str) -> str | None:
95101
return data.decode("utf-8")
96102

97103

104+
def _log_review_diff(git: GitRepo, public_ref: str, path: str, local_path: Path) -> None:
105+
"""Log debug info comparing local vs public version of a file needing review."""
106+
try:
107+
public_bytes = git.read_file_at_ref_bytes(public_ref, path)
108+
local_bytes = local_path.read_bytes() if local_path.exists() else None
109+
pub_size = len(public_bytes) if public_bytes is not None else 0
110+
loc_size = len(local_bytes) if local_bytes is not None else 0
111+
same = public_bytes == local_bytes
112+
logger.debug(" public: %d bytes, local: %d bytes, identical: %s", pub_size, loc_size, same)
113+
if same:
114+
return
115+
# For text files, show a unified diff
116+
if public_bytes is not None and local_bytes is not None:
117+
try:
118+
pub_text = public_bytes.decode("utf-8")
119+
loc_text = local_bytes.decode("utf-8")
120+
except UnicodeDecodeError:
121+
return
122+
import difflib
123+
124+
diff = difflib.unified_diff(
125+
pub_text.splitlines(keepends=True),
126+
loc_text.splitlines(keepends=True),
127+
fromfile=f"public ({public_ref})",
128+
tofile="local (working tree)",
129+
)
130+
diff_text = "".join(diff)
131+
if diff_text:
132+
for line in diff_text.splitlines():
133+
logger.debug(" %s", line)
134+
except Exception:
135+
pass # best-effort debug logging
136+
137+
98138
def _apply_absorb_changes(
99139
git: GitRepo,
100140
base_sha: str,
@@ -116,11 +156,20 @@ def _apply_absorb_changes(
116156
kind = git.classify_at_ref(public_ref, change.path)
117157
if kind != "text":
118158
label = "LFS file" if kind == "lfs" else "binary"
119-
actions.append(f" {label} added on public (kept local version, review manually): {change.path}")
159+
public_bytes = git.read_file_at_ref_bytes(public_ref, change.path)
160+
local_bytes = local_path.read_bytes()
161+
if public_bytes == local_bytes:
162+
actions.append(f" {label} (identical): {change.path}")
163+
else:
164+
actions.append(
165+
f" {label} added on public (kept local version, review manually): {change.path}"
166+
)
167+
_log_review_diff(git, public_ref, change.path, local_path)
120168
else:
121169
theirs_content = _read_text_at_ref(git, public_ref, change.path)
122170
if theirs_content is None:
123171
actions.append(f" added on public (kept local version, review manually): {change.path}")
172+
_log_review_diff(git, public_ref, change.path, local_path)
124173
continue
125174
# Try to find the published base: the scrubbed version of
126175
# the file at the internal commit that was staged.
@@ -145,6 +194,7 @@ def _apply_absorb_changes(
145194
else:
146195
# No staged version; file wasn't published through pubgate
147196
actions.append(f" added on public (kept local, review manually): {change.path}")
197+
_log_review_diff(git, public_ref, change.path, local_path)
148198
else:
149199
is_binary = git.copy_file_from_ref(public_ref, change.path)
150200
if is_binary:
@@ -161,6 +211,7 @@ def _apply_absorb_changes(
161211
elif change.is_delete:
162212
if (git.repo_dir / change.path).exists():
163213
actions.append(f" deleted on public (kept locally, review manually): {change.path}")
214+
logger.debug(" local file exists at %s", git.repo_dir / change.path)
164215

165216
elif change.is_rename:
166217
old_path = change.old_path or ""
@@ -192,6 +243,7 @@ def _merge_file(
192243
git.write_file_and_stage_bytes(path, theirs_bytes)
193244
label = "LFS file" if is_lfs_pointer(theirs_bytes) else "binary"
194245
actions.append(f" {label} changed on public (replaced locally, review manually): {path}")
246+
_log_review_diff(git, public_ref, path, git.repo_dir / path)
195247
return
196248

197249
# Use the scrubbed staged content as merge base when available.

pubgate/core.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ def _bootstrap_work() -> bool:
152152
)
153153
return
154154

155+
git.lfs_fetch(cfg.public_remote, public_head)
156+
155157
def _absorb_work() -> bool:
156158
actions = resolve_and_apply(cfg, git, last_absorbed, public_head)
157159
if actions:
@@ -163,7 +165,8 @@ def _absorb_work() -> bool:
163165
logger.info("%s", a)
164166
git.write_file_and_stage(cfg.absorb_state_file, public_head + "\n")
165167
conflicted = [a.split(": ", 1)[1] for a in actions if "CONFLICTS" in a]
166-
msg = absorb_commit_message(git, last_absorbed, public_head, conflicted)
168+
needs_review = [a.strip() for a in actions if "review manually" in a]
169+
msg = absorb_commit_message(git, last_absorbed, public_head, conflicted, needs_review)
167170
sha = git.commit(msg)
168171
logger.info("Committed on %s (%s %s)", cfg.internal_absorb_branch, sha[:7], msg.split("\n", 1)[0])
169172
return True
@@ -198,7 +201,9 @@ def stage(self, *, dry_run: bool = False, force: bool = False, no_pr: bool = Fal
198201
origin_preview_ref = f"origin/{cfg.internal_approved_branch}"
199202

200203
ignore_patterns = list(cfg.ignore)
201-
snapshot = build_stage_snapshot(git, cfg.internal_main_branch, ignore_patterns, frozenset({CONFIG_FILE}))
204+
snapshot, lfs_count = build_stage_snapshot(
205+
git, cfg.internal_main_branch, ignore_patterns, frozenset({CONFIG_FILE})
206+
)
202207

203208
unchanged_ref = snapshot_unchanged_ref(cfg, git, snapshot)
204209
if unchanged_ref is not None:
@@ -233,6 +238,9 @@ def stage(self, *, dry_run: bool = False, force: bool = False, no_pr: bool = Fal
233238
else:
234239
logger.info("Staging changes into %s", cfg.internal_approved_branch)
235240

241+
if lfs_count:
242+
logger.info("Snapshot includes %d LFS-tracked %s", lfs_count, "file" if lfs_count == 1 else "files")
243+
236244
if dry_run:
237245
logger.info("[dry-run] Would commit on %s", cfg.internal_stage_branch)
238246
logger.info("[dry-run] Would push %s to origin/%s", cfg.internal_stage_branch, cfg.internal_stage_branch)
@@ -383,6 +391,8 @@ def publish(self, *, dry_run: bool = False, force: bool = False, no_pr: bool = F
383391
)
384392
return
385393

394+
git.lfs_fetch("origin", origin_preview_ref)
395+
386396
def _publish_work() -> bool:
387397
existing = git.ls_tree("HEAD")
388398
for path in existing:
@@ -518,7 +528,6 @@ def _absorb_startup(self) -> AbsorbResult:
518528
logger.debug("Starting absorb startup")
519529
self._require_on_main()
520530
self.git.fetch(self.cfg.public_remote)
521-
self.git.lfs_fetch(self.cfg.public_remote, self.cfg.public_main_branch)
522531
self._prune_internal_pr_branches()
523532
self._prune_public_publish_branch()
524533
return check_absorb(self.cfg, self.git)
@@ -535,7 +544,6 @@ def _publish_startup(self) -> None:
535544
git, cfg = self.git, self.cfg
536545
git.ensure_clean_worktree()
537546
git.fetch("origin")
538-
git.lfs_fetch("origin", cfg.internal_approved_branch)
539547
git.fetch(cfg.public_remote)
540548
self._prune_public_publish_branch()
541549

pubgate/git.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,14 +440,22 @@ def lfs_fetch(self, remote: str, ref: str) -> None:
440440
if not self.is_lfs_available():
441441
return
442442
logger.info("Fetching LFS objects from %s for %s", remote, ref)
443-
result = self._run("lfs", "fetch", remote, ref, check=False, timeout=_TIMEOUT_NETWORK)
443+
try:
444+
result = self._run("lfs", "fetch", remote, ref, check=False, timeout=_TIMEOUT_NETWORK)
445+
except GitError as exc:
446+
logger.warning("LFS fetch failed: %s", exc)
447+
return
444448
if result.returncode != 0:
445449
logger.warning("LFS fetch failed (exit %d): %s", result.returncode, result.stderr.strip())
446450

447451
def lfs_push(self, remote: str, branch: str) -> None:
448452
if not self.is_lfs_available():
449453
return
450454
logger.info("Pushing LFS objects to %s for %s", remote, branch)
451-
result = self._run("lfs", "push", remote, branch, check=False, timeout=_TIMEOUT_NETWORK)
455+
try:
456+
result = self._run("lfs", "push", remote, branch, check=False, timeout=_TIMEOUT_NETWORK)
457+
except GitError as exc:
458+
logger.warning("LFS push failed: %s", exc)
459+
return
452460
if result.returncode != 0:
453461
logger.warning("LFS push failed (exit %d): %s", result.returncode, result.stderr.strip())

pubgate/stage_snapshot.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def build_stage_snapshot(
1515
ref: str,
1616
ignore_patterns: list[str],
1717
excluded: frozenset[str],
18-
) -> dict[str, str | bytes]:
18+
) -> tuple[dict[str, str | bytes], int]:
1919
all_files = git.ls_tree(ref)
2020
snapshot: dict[str, str | bytes] = {}
2121
lfs_files: list[str] = []
@@ -47,12 +47,10 @@ def build_stage_snapshot(
4747
raise PubGateError(f"Error: {exc}") from exc
4848
snapshot[path] = content
4949

50-
if lfs_files:
51-
logger.info("Snapshot includes %d LFS-tracked %s", len(lfs_files), "file" if len(lfs_files) == 1 else "files")
52-
for path in lfs_files:
53-
logger.debug(" LFS: %s", path)
50+
for path in lfs_files:
51+
logger.debug(" LFS: %s", path)
5452
logger.debug("Snapshot contains %d files", len(snapshot))
55-
return snapshot
53+
return snapshot, len(lfs_files)
5654

5755

5856
def snapshot_unchanged_ref(

tests/test_lfs.py

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import logging
2+
from unittest.mock import patch
23

34
import pytest
45
from conftest import Topology
56

67
from pubgate.config import DEFAULT_IGNORE_PATTERNS
8+
from pubgate.errors import GitError
79
from pubgate.filtering import is_ignored
810
from pubgate.git import is_lfs_pointer
911
from pubgate.stage_snapshot import build_stage_snapshot, snapshot_unchanged_ref
@@ -91,7 +93,7 @@ def test_lfs_pointer_detected_as_binary(self, topo: Topology):
9193
class TestStageSnapshotLfs:
9294
def test_lfs_pointer_passes_through_without_scrub(self, topo: Topology):
9395
topo.commit_internal({"data.bin": SAMPLE_LFS_POINTER_STR})
94-
snapshot = build_stage_snapshot(
96+
snapshot, _ = build_stage_snapshot(
9597
topo.work_dir.git,
9698
"HEAD",
9799
ignore_patterns=[],
@@ -104,7 +106,7 @@ def test_lfs_pointer_not_scrubbed_for_internal_markers(self, topo: Topology):
104106
# Even though the pointer text is valid UTF-8, it should not be
105107
# passed through scrub_internal_blocks
106108
topo.commit_internal({"model.bin": SAMPLE_LFS_POINTER_STR})
107-
snapshot = build_stage_snapshot(
109+
snapshot, _ = build_stage_snapshot(
108110
topo.work_dir.git,
109111
"HEAD",
110112
ignore_patterns=[],
@@ -125,7 +127,7 @@ def test_gitattributes_not_matched_by_default_patterns(self):
125127

126128
def test_gitattributes_included_in_snapshot(self, topo: Topology):
127129
topo.commit_internal({".gitattributes": "*.bin filter=lfs diff=lfs merge=lfs -text\n"})
128-
snapshot = build_stage_snapshot(
130+
snapshot, _ = build_stage_snapshot(
129131
topo.work_dir.git,
130132
"HEAD",
131133
ignore_patterns=list(DEFAULT_IGNORE_PATTERNS),
@@ -186,7 +188,7 @@ def test_identical_lfs_pointer_detected_as_unchanged(self, topo: Topology):
186188
topo.work_dir.run("checkout", "main")
187189

188190
# Build the same snapshot again (no changes)
189-
snapshot = build_stage_snapshot(
191+
snapshot, _ = build_stage_snapshot(
190192
topo.work_dir.git,
191193
topo.cfg.internal_main_branch,
192194
ignore_patterns=list(topo.cfg.ignore),
@@ -318,5 +320,74 @@ def test_lfs_pointer_with_gitattributes_in_publish(self, topo: Topology, monkeyp
318320
assert attrs is not None
319321
assert "filter=lfs" in attrs
320322

321-
pub_content = topo.external_contributor.read_file_at_ref("HEAD", "data.bin")
322-
assert pub_content == SAMPLE_LFS_POINTER_STR
323+
324+
# ---------------------------------------------------------------------------
325+
# LFS fetch/push failure handling
326+
# ---------------------------------------------------------------------------
327+
328+
329+
class TestLfsFetchFailure:
330+
def test_nonzero_exit_logs_warning_and_continues(self, topo: Topology, caplog):
331+
git = topo.work_dir.git
332+
git._lfs_available = True # force LFS as available
333+
with caplog.at_level(logging.WARNING, logger="pubgate"):
334+
# Fetch from a nonexistent remote ref — should warn, not raise
335+
git.lfs_fetch("origin", "nonexistent-ref-that-will-fail")
336+
assert "LFS fetch failed" in caplog.text
337+
338+
def test_timeout_logs_warning_and_continues(self, topo: Topology, caplog):
339+
git = topo.work_dir.git
340+
git._lfs_available = True
341+
342+
original_run = git._run
343+
344+
def _timeout_run(*args, **kwargs):
345+
if args and args[0] == "lfs":
346+
raise GitError(list(args), -1, "timed out after 300s")
347+
return original_run(*args, **kwargs)
348+
349+
with patch.object(git, "_run", side_effect=_timeout_run):
350+
with caplog.at_level(logging.WARNING, logger="pubgate"):
351+
# Should not raise — timeout is caught and logged
352+
git.lfs_fetch("origin", "main")
353+
assert "LFS fetch failed" in caplog.text
354+
355+
def test_lfs_not_available_skips_silently(self, topo: Topology, caplog):
356+
git = topo.work_dir.git
357+
git._lfs_available = False
358+
with caplog.at_level(logging.WARNING, logger="pubgate"):
359+
git.lfs_fetch("origin", "main")
360+
assert "LFS fetch failed" not in caplog.text
361+
362+
363+
class TestLfsPushFailure:
364+
def test_timeout_logs_warning_and_continues(self, topo: Topology, caplog):
365+
git = topo.work_dir.git
366+
git._lfs_available = True
367+
368+
original_run = git._run
369+
370+
def _timeout_run(*args, **kwargs):
371+
if args and args[0] == "lfs":
372+
raise GitError(list(args), -1, "timed out after 300s")
373+
return original_run(*args, **kwargs)
374+
375+
with patch.object(git, "_run", side_effect=_timeout_run):
376+
with caplog.at_level(logging.WARNING, logger="pubgate"):
377+
git.lfs_push("origin", "main")
378+
assert "LFS push failed" in caplog.text
379+
380+
def test_nonzero_exit_logs_warning_and_continues(self, topo: Topology, caplog):
381+
git = topo.work_dir.git
382+
git._lfs_available = True
383+
with caplog.at_level(logging.WARNING, logger="pubgate"):
384+
# Push to a nonexistent remote — should warn, not raise
385+
git.lfs_push("nonexistent-remote", "main")
386+
assert "LFS push failed" in caplog.text
387+
388+
def test_lfs_not_available_skips_silently(self, topo: Topology, caplog):
389+
git = topo.work_dir.git
390+
git._lfs_available = False
391+
with caplog.at_level(logging.WARNING, logger="pubgate"):
392+
git.lfs_push("origin", "main")
393+
assert "LFS push failed" not in caplog.text

0 commit comments

Comments
 (0)