Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions python/langsmith/integrations/claude_agent_sdk/_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,13 +498,15 @@ async def subagent_stop_hook(
if transcript_path:
session.subagent_transcript_paths.append((transcript_path, subagent_run))

# Move to ended state so PostToolUse can set outputs.
# Record end_time now so the run reflects actual subagent completion
# rather than the cleanup time in ``clear_active_tool_runs``. ``.patch()``
# is deferred so ``PostToolUse`` can still attach outputs.
subagent_run.end()
agent_tool_id = session.agent_to_tool_mapping.pop(agent_id, None)
if agent_tool_id:
session.ended_subagent_runs[agent_tool_id] = subagent_run
else:
# No matching Agent tool — just end it now
subagent_run.end()
# No matching Agent tool — patch it now
try:
subagent_run.patch()
except Exception as e:
Expand Down Expand Up @@ -537,10 +539,10 @@ def clear_active_tool_runs(session: Optional[SessionState] = None) -> None:
except Exception as e:
logger.debug(f"Failed to clean up orphaned subagent run {agent_id}: {e}")

# 2. Finalise ended subagent runs (outputs already set by PostToolUse)
# 2. Patch ended subagent runs (``end_time`` already set by
# ``subagent_stop_hook``; outputs already set by ``PostToolUse``)
for tool_use_id, subagent_run in session.ended_subagent_runs.items():
try:
subagent_run.end()
subagent_run.patch()
except Exception as e:
logger.debug(f"Failed to finalise ended subagent run {tool_use_id}: {e}")
Expand Down
68 changes: 61 additions & 7 deletions python/tests/unit_tests/wrappers/test_claude_agent_sdk_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import asyncio
import sys
import time
from unittest.mock import MagicMock

import pytest
Expand Down Expand Up @@ -302,7 +303,8 @@ def test_subagent_stop_and_post_tool_use_set_outputs(self):
)
subagent_run = _hooks_module._default_session.subagent_runs["agent_123"]

# Subagent stop — run should be stashed, not ended yet
# Subagent stop — end_time recorded now (actual subagent completion);
# patch deferred so PostToolUse can attach outputs.
asyncio.run(
subagent_stop_hook(
{"agent_id": "agent_123", "agent_type": "foo"},
Expand All @@ -316,9 +318,10 @@ def test_subagent_stop_and_post_tool_use_set_outputs(self):
assert (
_hooks_module._default_session.ended_subagent_runs["tool_1"] is subagent_run
)
assert subagent_run.end_time is None
stop_end_time = subagent_run.end_time
assert stop_end_time is not None

# PostToolUse for Agent — sets outputs on subagent but doesn't end it
# PostToolUse for Agent — sets outputs on subagent; must not move end_time
asyncio.run(
post_tool_use_hook(
{
Expand All @@ -333,18 +336,69 @@ def test_subagent_stop_and_post_tool_use_set_outputs(self):
# Agent tool run should be ended
assert "tool_1" not in _hooks_module._default_session.active_tool_runs

# Subagent outputs should be set but run not yet ended
# Subagent outputs should be set; end_time unchanged
assert subagent_run.outputs == {"output": "bar"}
assert subagent_run.end_time is None
assert subagent_run.end_time == stop_end_time

# Subagent can still be found for LLM nesting
assert get_subagent_run_by_tool_id("tool_1") is subagent_run

# clear_active_tool_runs finalises everything
# clear_active_tool_runs patches but preserves end_time
clear_active_tool_runs()
assert subagent_run.end_time is not None
assert subagent_run.end_time == stop_end_time
assert len(_hooks_module._default_session.ended_subagent_runs) == 0

def test_subagent_end_time_recorded_at_stop_not_conversation_end(self):
"""``end_time`` must reflect actual subagent completion.

Regression test: when multiple subagents run in a session, deferring
``RunTree.end()`` to ``clear_active_tool_runs`` caused every subagent's
``end_time`` to be stamped at conversation-termination time instead of
its real completion time, making child chain spans visually outlast
their parent ``Agent`` tool spans.
"""
asyncio.run(
pre_tool_use_hook(
{"tool_name": "Agent", "tool_input": {"agent": "foo"}},
"tool_x",
MagicMock(),
)
)
asyncio.run(
subagent_start_hook(
{"agent_id": "agent_x", "agent_type": "foo"},
"sdk_session_id",
MagicMock(),
)
)
subagent_run = _hooks_module._default_session.subagent_runs["agent_x"]

asyncio.run(
subagent_stop_hook(
{"agent_id": "agent_x", "agent_type": "foo"},
"sdk_session_id",
MagicMock(),
)
)
stop_end_time = subagent_run.end_time
assert stop_end_time is not None

# Simulate the gap before PostToolUse fires for the Agent tool.
time.sleep(0.05)
asyncio.run(
post_tool_use_hook(
{"tool_name": "Agent", "tool_response": {"output": "bar"}},
"tool_x",
MagicMock(),
)
)
assert subagent_run.end_time == stop_end_time

# Simulate further delay before the conversation actually ends.
time.sleep(0.05)
clear_active_tool_runs()
assert subagent_run.end_time == stop_end_time


class TestTranscriptPathCapture:
"""Pre-tool-use hook captures transcript_path from BaseHookInput."""
Expand Down