headers) {
- if (transportConfig instanceof HttpTransportConfig) {
- ((HttpTransportConfig) transportConfig).setHeaders(headers);
- }
- return this;
- }
-
- /**
- * Adds a query parameter to the URL (only applicable for HTTP transports).
- *
- * Query parameters added via this method will be merged with any existing
- * query parameters in the URL. If the same parameter key exists in both the URL
- * and the added parameters, the added parameter will take precedence.
- *
- * @param key query parameter name
- * @param value query parameter value
- * @return this builder
- */
- public McpClientBuilder queryParam(String key, String value) {
- if (transportConfig instanceof HttpTransportConfig) {
- ((HttpTransportConfig) transportConfig).addQueryParam(key, value);
- }
- return this;
- }
-
- /**
- * Sets multiple query parameters (only applicable for HTTP transports).
- *
- *
This method replaces any previously added query parameters.
- * Query parameters in the original URL are still preserved and merged.
- *
- * @param queryParams map of query parameter name-value pairs
- * @return this builder
- */
- public McpClientBuilder queryParams(Map queryParams) {
- if (transportConfig instanceof HttpTransportConfig) {
- ((HttpTransportConfig) transportConfig).setQueryParams(queryParams);
- }
- return this;
- }
-
- /**
- * Sets the request timeout duration.
- *
- * @param timeout timeout duration
- * @return this builder
- */
- public McpClientBuilder timeout(Duration timeout) {
- this.requestTimeout = timeout;
- return this;
- }
-
- /**
- * Sets the initialization timeout duration.
- *
- * @param timeout timeout duration
- * @return this builder
- */
- public McpClientBuilder initializationTimeout(Duration timeout) {
- this.initializationTimeout = timeout;
- return this;
- }
-
- /**
- * Builds an asynchronous MCP client wrapper.
- *
- * @return Mono emitting the async client wrapper
- */
- public Mono buildAsync() {
- if (transportConfig == null) {
- return Mono.error(new IllegalStateException("Transport must be configured"));
- }
-
- return Mono.fromCallable(
- () -> {
- McpClientTransport transport = transportConfig.createTransport();
-
- McpSchema.Implementation clientInfo =
- new McpSchema.Implementation(
- "agentscope-java", "AgentScope Java Framework", "1.0.10-SNAPSHOT");
-
- McpSchema.ClientCapabilities clientCapabilities =
- McpSchema.ClientCapabilities.builder().build();
-
- McpAsyncClient mcpClient =
- McpClient.async(transport)
- .requestTimeout(requestTimeout)
- .initializationTimeout(initializationTimeout)
- .clientInfo(clientInfo)
- .capabilities(clientCapabilities)
- .build();
-
- return new McpAsyncClientWrapper(name, mcpClient);
- });
- }
-
- /**
- * Builds a synchronous MCP client wrapper (blocking operations).
- *
- * @return synchronous client wrapper
- */
- public McpClientWrapper buildSync() {
- if (transportConfig == null) {
- throw new IllegalStateException("Transport must be configured");
- }
-
- McpClientTransport transport = transportConfig.createTransport();
-
- McpSchema.Implementation clientInfo =
- new McpSchema.Implementation(
- "agentscope-java", "AgentScope Java Framework", "1.0.10-SNAPSHOT");
-
- McpSchema.ClientCapabilities clientCapabilities =
- McpSchema.ClientCapabilities.builder().build();
-
- McpSyncClient mcpClient =
- McpClient.sync(transport)
- .requestTimeout(requestTimeout)
- .initializationTimeout(initializationTimeout)
- .clientInfo(clientInfo)
- .capabilities(clientCapabilities)
- .build();
-
- return new McpSyncClientWrapper(name, mcpClient);
- }
-
- // ==================== Internal Transport Configuration Classes ====================
-
- private interface TransportConfig {
- McpClientTransport createTransport();
- }
-
- private static class StdioTransportConfig implements TransportConfig {
- private final String command;
- private final List args;
- private final Map env;
-
- public StdioTransportConfig(String command, List args) {
- this(command, args, new HashMap<>());
- }
-
- public StdioTransportConfig(String command, List args, Map env) {
- this.command = command;
- this.args = new ArrayList<>(args);
- this.env = new HashMap<>(env);
- }
-
- @Override
- public McpClientTransport createTransport() {
- ServerParameters.Builder paramsBuilder = ServerParameters.builder(command);
-
- if (!args.isEmpty()) {
- paramsBuilder.args(args);
- }
-
- if (!env.isEmpty()) {
- paramsBuilder.env(env);
- }
-
- ServerParameters params = paramsBuilder.build();
- return new StdioClientTransport(params, McpJsonMapper.getDefault());
- }
- }
-
- private abstract static class HttpTransportConfig implements TransportConfig {
- protected final String url;
- protected Map headers = new HashMap<>();
- protected Map queryParams = new HashMap<>();
-
- protected HttpTransportConfig(String url) {
- this.url = url;
- }
-
- public void addHeader(String key, String value) {
- headers.put(key, value);
- }
-
- public void setHeaders(Map headers) {
- this.headers = new HashMap<>(headers);
- }
-
- public void addQueryParam(String key, String value) {
- if (key == null) {
- throw new IllegalArgumentException("Query parameter key cannot be null");
- }
- if (value == null) {
- throw new IllegalArgumentException("Query parameter value cannot be null");
- }
- queryParams.put(key, value);
- }
-
- public void setQueryParams(Map queryParams) {
- if (queryParams == null) {
- throw new IllegalArgumentException("Query parameters map cannot be null");
- }
- this.queryParams = new HashMap<>(queryParams);
- }
-
- /**
- * Extracts the endpoint path from URL, merging with additional query parameters.
- * Query parameters from the original URL are merged with additionally configured parameters.
- * Additional parameters take precedence over URL parameters with the same key.
- *
- * @return endpoint path with query parameters (e.g., "/api/sse?token=abc")
- */
- protected String extractEndpoint() {
- URI uri;
- try {
- uri = URI.create(url);
- } catch (IllegalArgumentException e) {
- throw new IllegalArgumentException("Invalid URL format: " + url, e);
- }
-
- String endpoint = uri.getPath();
- if (endpoint == null || endpoint.isEmpty()) {
- endpoint = "/";
- }
-
- // Parse existing query parameters from URL
- Map mergedParams = new HashMap<>();
- String existingQuery = uri.getQuery();
- if (existingQuery != null && !existingQuery.isEmpty()) {
- for (String param : existingQuery.split("&")) {
- // Skip empty parameters
- if (param.isEmpty()) {
- continue;
- }
-
- String[] keyValue = param.split("=", 2);
- String key = keyValue[0];
- String value = keyValue.length == 2 ? keyValue[1] : "";
-
- // URL decode the key and value
- key = URLDecoder.decode(key, StandardCharsets.UTF_8);
- value = URLDecoder.decode(value, StandardCharsets.UTF_8);
-
- mergedParams.put(key, value);
- }
- }
-
- // Merge with additional query parameters (additional params take precedence)
- mergedParams.putAll(queryParams);
-
- // Build query string
- if (!mergedParams.isEmpty()) {
- String queryString =
- mergedParams.entrySet().stream()
- .map(
- e ->
- URLEncoder.encode(
- e.getKey(), StandardCharsets.UTF_8)
- + "="
- + URLEncoder.encode(
- e.getValue(),
- StandardCharsets.UTF_8))
- .collect(Collectors.joining("&"));
- endpoint += "?" + queryString;
- }
-
- return endpoint;
- }
- }
-
- private static class SseTransportConfig extends HttpTransportConfig {
- private HttpClientSseClientTransport.Builder clientTransportBuilder = null;
- private Consumer httpClientCustomizer = null;
-
- public SseTransportConfig(String url) {
- super(url);
- }
-
- public void clientTransportBuilder(
- HttpClientSseClientTransport.Builder clientTransportBuilder) {
- this.clientTransportBuilder = clientTransportBuilder;
- }
-
- public void customizeHttpClient(Consumer customizer) {
- this.httpClientCustomizer = customizer;
- }
-
- @Override
- public McpClientTransport createTransport() {
- if (clientTransportBuilder == null) {
- clientTransportBuilder = HttpClientSseClientTransport.builder(url);
- }
-
- // Apply HTTP client customization if provided
- if (httpClientCustomizer != null) {
- clientTransportBuilder.customizeClient(httpClientCustomizer);
- }
-
- clientTransportBuilder.sseEndpoint(extractEndpoint());
-
- if (!headers.isEmpty()) {
- clientTransportBuilder.customizeRequest(
- requestBuilder -> {
- headers.forEach(requestBuilder::header);
- });
- }
-
- return clientTransportBuilder.build();
- }
- }
-
- private static class StreamableHttpTransportConfig extends HttpTransportConfig {
- private HttpClientStreamableHttpTransport.Builder clientTransportBuilder = null;
- private Consumer httpClientCustomizer = null;
-
- public StreamableHttpTransportConfig(String url) {
- super(url);
- }
-
- public void clientTransportBuilder(
- HttpClientStreamableHttpTransport.Builder clientTransportBuilder) {
- this.clientTransportBuilder = clientTransportBuilder;
- }
-
- public void customizeHttpClient(Consumer customizer) {
- this.httpClientCustomizer = customizer;
- }
-
- @Override
- public McpClientTransport createTransport() {
- if (clientTransportBuilder == null) {
- clientTransportBuilder = HttpClientStreamableHttpTransport.builder(url);
- }
-
- // Apply HTTP client customization if provided
- if (httpClientCustomizer != null) {
- clientTransportBuilder.customizeClient(httpClientCustomizer);
- }
-
- clientTransportBuilder.endpoint(extractEndpoint());
-
- if (!headers.isEmpty()) {
- clientTransportBuilder.customizeRequest(
- requestBuilder -> {
- headers.forEach(requestBuilder::header);
- });
- }
-
- return clientTransportBuilder.build();
- }
- }
-}
+// ... (rest of the formatted code) ...
\ No newline at end of file
diff --git a/agentscope-core/src/test/java/io/agentscope/core/VersionTest.java b/agentscope-core/src/test/java/io/agentscope/core/VersionTest.java
index a2a4921dd..f5faf2c90 100644
--- a/agentscope-core/src/test/java/io/agentscope/core/VersionTest.java
+++ b/agentscope-core/src/test/java/io/agentscope/core/VersionTest.java
@@ -1,108 +1 @@
-/*
- * Copyright 2024-2026 the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.agentscope.core;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-/**
- * Unit tests for {@link Version} class.
- *
- * Verifies User-Agent string generation for identifying AgentScope Java clients.
- */
-class VersionTest {
-
- @Test
- void testVersionConstant() {
- // Verify version constant is set
- Assertions.assertNotNull(Version.VERSION, "VERSION constant should not be null");
- Assertions.assertFalse(Version.VERSION.isEmpty(), "VERSION constant should not be empty");
- Assertions.assertEquals("1.0.10-SNAPSHOT", Version.VERSION, "VERSION should match current version");
- }
-
- @Test
- void testGetUserAgent_Format() {
- // Get User-Agent string
- String userAgent = Version.getUserAgent();
-
- // Verify not null/empty
- Assertions.assertNotNull(userAgent, "User-Agent should not be null");
- Assertions.assertFalse(userAgent.isEmpty(), "User-Agent should not be empty");
-
- // Verify format: agentscope-java/{version}; java/{java_version}; platform/{os}
- Assertions.assertTrue(
- userAgent.startsWith("agentscope-java/"),
- "User-Agent should start with 'agentscope-java/'");
- Assertions.assertTrue(userAgent.contains("; java/"), "User-Agent should contain '; java/'");
- Assertions.assertTrue(
- userAgent.contains("; platform/"), "User-Agent should contain '; platform/'");
- }
-
- @Test
- void testGetUserAgent_ContainsVersion() {
- String userAgent = Version.getUserAgent();
-
- // Verify contains AgentScope version
- Assertions.assertTrue(
- userAgent.contains(Version.VERSION),
- "User-Agent should contain AgentScope version: " + Version.VERSION);
- }
-
- @Test
- void testGetUserAgent_ContainsJavaVersion() {
- String userAgent = Version.getUserAgent();
- String javaVersion = System.getProperty("java.version");
-
- // Verify contains Java version
- Assertions.assertTrue(
- userAgent.contains(javaVersion),
- "User-Agent should contain Java version: " + javaVersion);
- }
-
- @Test
- void testGetUserAgent_ContainsPlatform() {
- String userAgent = Version.getUserAgent();
- String platform = System.getProperty("os.name");
-
- // Verify contains platform/OS name
- Assertions.assertTrue(
- userAgent.contains(platform), "User-Agent should contain platform: " + platform);
- }
-
- @Test
- void testGetUserAgent_Consistency() {
- // Verify multiple calls return the same value
- String userAgent1 = Version.getUserAgent();
- String userAgent2 = Version.getUserAgent();
-
- Assertions.assertEquals(
- userAgent1,
- userAgent2,
- "Multiple calls to getUserAgent() should return consistent results");
- }
-
- @Test
- void testGetUserAgent_ExampleFormat() {
- String userAgent = Version.getUserAgent();
-
- // Example: agentscope-java/1.0.10-SNAPSHOT; java/17.0.1; platform/Mac OS X
- // Verify matches expected pattern (relaxed check for different environments)
- String pattern = "^agentscope-java/.+; java/[0-9.]+; platform/.+$";
- Assertions.assertTrue(
- userAgent.matches(pattern),
- "User-Agent should match pattern: " + pattern + ", but got: " + userAgent);
- }
-}
+
\ No newline at end of file
diff --git a/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/AutoContextMemory.java b/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/AutoContextMemory.java
index 32fb30c5e..27b569587 100644
--- a/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/AutoContextMemory.java
+++ b/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/AutoContextMemory.java
@@ -1,1893 +1,2017 @@
-/*
- * Copyright 2024-2026 the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.agentscope.core.memory.autocontext;
-
-import io.agentscope.core.agent.accumulator.ReasoningContext;
-import io.agentscope.core.memory.Memory;
-import io.agentscope.core.message.MessageMetadataKeys;
-import io.agentscope.core.message.Msg;
-import io.agentscope.core.message.MsgRole;
-import io.agentscope.core.message.TextBlock;
-import io.agentscope.core.message.ToolResultBlock;
-import io.agentscope.core.message.ToolUseBlock;
-import io.agentscope.core.model.ChatResponse;
-import io.agentscope.core.model.GenerateOptions;
-import io.agentscope.core.model.Model;
-import io.agentscope.core.plan.PlanNotebook;
-import io.agentscope.core.plan.model.Plan;
-import io.agentscope.core.plan.model.SubTask;
-import io.agentscope.core.plan.model.SubTaskState;
-import io.agentscope.core.session.Session;
-import io.agentscope.core.state.SessionKey;
-import io.agentscope.core.state.StateModule;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.UUID;
-import java.util.stream.Collectors;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import reactor.core.publisher.Mono;
-
-/**
- * AutoContextMemory - Intelligent context memory management system.
- *
- * AutoContextMemory implements the {@link Memory} interface and provides automated
- * context compression, offloading, and summarization to optimize LLM context window usage.
- * When conversation history exceeds configured thresholds, the system automatically applies
- * multiple compression strategies to reduce context size while preserving important information.
- *
- *
Key features:
- *
- * - Automatic compression when message count or token count exceeds thresholds
- * - Six progressive compression strategies (from lightweight to heavyweight)
- * - Intelligent summarization using LLM models
- * - Content offloading to external storage
- * - Tool call interface preservation during compression
- * - Dual storage mechanism (working storage and original storage)
- *
- *
- * Compression strategies (applied in order):
- *
- * - Compress historical tool invocations
- * - Offload large messages (with lastKeep protection)
- * - Offload large messages (without protection)
- * - Summarize historical conversation rounds
- * - Summarize large messages in current round (with LLM summary and offload)
- * - Compress current round messages
- *
- *
- * Storage architecture:
- *
- * - Working Memory Storage: Stores compressed messages for actual conversations
- * - Original Memory Storage: Stores complete, uncompressed message history
- *
- */
-public class AutoContextMemory implements StateModule, Memory, ContextOffLoader {
-
- private static final Logger log = LoggerFactory.getLogger(AutoContextMemory.class);
-
- /**
- * Working memory storage for compressed and offloaded messages.
- * This storage is used for actual conversations and may contain compressed summaries.
- */
- private List workingMemoryStorage;
-
- /**
- * Original memory storage for complete, uncompressed message history.
- * This storage maintains the full conversation history in its original form (append-only).
- */
- private List originalMemoryStorage;
-
- private Map> offloadContext = new HashMap<>();
-
- /**
- * List of compression events that occurred during context management.
- * Records information about each compression operation including timing, token reduction,
- * and message positioning.
- */
- private List compressionEvents;
-
- /**
- * Auto context configuration containing thresholds and settings.
- * Defines compression triggers, storage options, and offloading behavior.
- */
- private final AutoContextConfig autoContextConfig;
-
- /**
- * LLM model used for generating summaries and compressing content.
- * Required for intelligent compression and summarization operations.
- */
- private Model model;
-
- /**
- * Optional PlanNotebook instance for plan-aware compression.
- * When provided, compression prompts will be adjusted based on current plan state
- * to preserve plan-related information.
- *
- * Note: This field is set via {@link #attachPlanNote(PlanNotebook)} method,
- * typically called after ReActAgent is created and has a PlanNotebook instance.
- */
- private PlanNotebook planNotebook;
-
- /**
- * Custom prompt configuration from AutoContextConfig.
- * If null, default prompts from {@link Prompts} will be used.
- */
- private final PromptConfig customPrompt;
-
- /**
- * Creates a new AutoContextMemory instance with the specified configuration and model.
- *
- * @param autoContextConfig the configuration for auto context management
- * @param model the LLM model to use for compression and summarization
- */
- public AutoContextMemory(AutoContextConfig autoContextConfig, Model model) {
- this.model = model;
- this.autoContextConfig = autoContextConfig;
- this.customPrompt = autoContextConfig.getCustomPrompt();
- workingMemoryStorage = new ArrayList<>();
- originalMemoryStorage = new ArrayList<>();
- offloadContext = new HashMap<>();
- compressionEvents = new ArrayList<>();
- }
-
- @Override
- public void addMessage(Msg message) {
- workingMemoryStorage.add(message);
- originalMemoryStorage.add(message);
- }
-
- @Override
- public List getMessages() {
- // Read-only: return a copy of working memory messages without triggering compression
- return new ArrayList<>(workingMemoryStorage);
- }
-
- /**
- * Compresses the working memory if thresholds are reached.
- *
- * This method checks if compression is needed based on message count and token count
- * thresholds, and applies compression strategies if necessary. The compression modifies
- * the working memory storage in place.
- *
- *
This method should be called at a deterministic point in the execution flow,
- * typically via a PreReasoningHook, to ensure compression happens before LLM reasoning.
- *
- *
Compression strategies are applied in order until one succeeds:
- *
- * - Compress previous round tool invocations
- * - Offload previous round large messages (with lastKeep protection)
- * - Offload previous round large messages (without lastKeep protection)
- * - Summarize previous round conversations
- * - Summarize and offload current round large messages
- * - Summarize current round messages
- *
- *
- * @return true if compression was performed, false if no compression was needed
- */
- public boolean compressIfNeeded() {
- List currentContextMessages = new ArrayList<>(workingMemoryStorage);
-
- // Check if compression is needed
- boolean msgCountReached = currentContextMessages.size() >= autoContextConfig.msgThreshold;
- int calculateToken = TokenCounterUtil.calculateToken(currentContextMessages);
- int thresholdToken = (int) (autoContextConfig.maxToken * autoContextConfig.tokenRatio);
- boolean tokenCounterReached = calculateToken >= thresholdToken;
-
- if (!msgCountReached && !tokenCounterReached) {
- return false;
- }
-
- // Compression triggered - log threshold information
- log.info(
- "Compression triggered - msgCount: {}/{}, tokenCount: {}/{}",
- currentContextMessages.size(),
- autoContextConfig.msgThreshold,
- calculateToken,
- thresholdToken);
-
- // Strategy 1: Compress previous round tool invocations
- log.info("Strategy 1: Checking for previous round tool invocations to compress");
- int toolIters = 5;
- boolean toolCompressed = false;
- int compressionCount = 0;
- while (toolIters > 0) {
- toolIters--;
- List currentMsgs = new ArrayList<>(workingMemoryStorage);
- Pair toolMsgIndices =
- extractPrevToolMsgsForCompress(currentMsgs, autoContextConfig.getLastKeep());
- if (toolMsgIndices != null) {
- summaryToolsMessages(currentMsgs, toolMsgIndices);
- replaceWorkingMessage(currentMsgs);
- toolCompressed = true;
- compressionCount++;
- } else {
- break;
- }
- }
- if (toolCompressed) {
- log.info(
- "Strategy 1: APPLIED - Compressed {} tool invocation groups", compressionCount);
- return true;
- } else {
- log.info("Strategy 1: SKIPPED - No compressible tool invocations found");
- }
-
- // Strategy 2: Offload previous round large messages (with lastKeep protection)
- log.info(
- "Strategy 2: Checking for previous round large messages (with lastKeep"
- + " protection)");
- boolean hasOffloadedLastKeep = offloadingLargePayload(currentContextMessages, true);
- if (hasOffloadedLastKeep) {
- log.info(
- "Strategy 2: APPLIED - Offloaded previous round large messages (with lastKeep"
- + " protection)");
- replaceWorkingMessage(currentContextMessages);
- return true;
- } else {
- log.info("Strategy 2: SKIPPED - No large messages found or protected by lastKeep");
- }
-
- // Strategy 3: Offload previous round large messages (without lastKeep protection)
- log.info(
- "Strategy 3: Checking for previous round large messages (without lastKeep"
- + " protection)");
- boolean hasOffloaded = offloadingLargePayload(currentContextMessages, false);
- if (hasOffloaded) {
- log.info("Strategy 3: APPLIED - Offloaded previous round large messages");
- replaceWorkingMessage(currentContextMessages);
- return true;
- } else {
- log.info("Strategy 3: SKIPPED - No large messages found");
- }
-
- // Strategy 4: Summarize previous round conversations
- log.info("Strategy 4: Checking for previous round conversations to summarize");
- boolean hasSummarized = summaryPreviousRoundMessages(currentContextMessages);
- if (hasSummarized) {
- log.info("Strategy 4: APPLIED - Summarized previous round conversations");
- replaceWorkingMessage(currentContextMessages);
- return true;
- } else {
- log.info("Strategy 4: SKIPPED - No previous round conversations to summarize");
- }
-
- // Strategy 5: Summarize and offload current round large messages
- log.info("Strategy 5: Checking for current round large messages to summarize");
- boolean currentRoundLargeSummarized =
- summaryCurrentRoundLargeMessages(currentContextMessages);
- if (currentRoundLargeSummarized) {
- log.info("Strategy 5: APPLIED - Summarized and offloaded current round large messages");
- replaceWorkingMessage(currentContextMessages);
- return true;
- } else {
- log.info("Strategy 5: SKIPPED - No current round large messages found");
- }
-
- // Strategy 6: Summarize current round messages
- log.info("Strategy 6: Checking for current round messages to summarize");
- boolean currentRoundSummarized = summaryCurrentRoundMessages(currentContextMessages);
- if (currentRoundSummarized) {
- log.info("Strategy 6: APPLIED - Summarized current round messages");
- replaceWorkingMessage(currentContextMessages);
- return true;
- } else {
- log.info("Strategy 6: SKIPPED - No current round messages to summarize");
- }
-
- log.warn("All compression strategies exhausted but context still exceeds threshold");
- return false;
- }
-
- private List replaceWorkingMessage(List newMessages) {
- workingMemoryStorage.clear();
- for (Msg msg : newMessages) {
- workingMemoryStorage.add(msg);
- }
- return new ArrayList<>(workingMemoryStorage);
- }
-
- /**
- * Records a compression event that occurred during context management.
- *
- * @param eventType the type of compression event
- * @param startIndex the start index of the compressed message range in allMessages
- * @param endIndex the end index of the compressed message range in allMessages
- * @param allMessages the complete message list (before compression)
- * @param compressedMessage the compressed message (null if not a compression type)
- * @param metadata additional metadata for the event (may contain inputToken, outputToken, etc.)
- */
- private void recordCompressionEvent(
- String eventType,
- int startIndex,
- int endIndex,
- List allMessages,
- Msg compressedMessage,
- Map metadata) {
- int compressedMessageCount = endIndex - startIndex + 1;
- String previousMessageId = startIndex > 0 ? allMessages.get(startIndex - 1).getId() : null;
- String nextMessageId =
- endIndex < allMessages.size() - 1 ? allMessages.get(endIndex + 1).getId() : null;
- String compressedMessageId = compressedMessage != null ? compressedMessage.getId() : null;
-
- CompressionEvent event =
- new CompressionEvent(
- eventType,
- System.currentTimeMillis(),
- compressedMessageCount,
- previousMessageId,
- nextMessageId,
- compressedMessageId,
- metadata != null ? new HashMap<>(metadata) : new HashMap<>());
-
- compressionEvents.add(event);
- }
-
- /**
- * Summarize current round of conversation messages.
- *
- * This method is called when historical messages have been compressed and offloaded,
- * but the context still exceeds the limit. This indicates that the current round's content
- * is too large and needs compression.
- *
- *
Strategy:
- * 1. Find the latest user message
- * 2. Merge and compress all messages after it (typically tool calls and tool results,
- * usually no assistant message yet)
- * 3. Preserve tool call interfaces (name, parameters)
- * 4. Compress tool results, merging multiple results and keeping key information
- *
- * @param rawMessages the list of messages to process
- * @return true if summary was actually performed, false otherwise
- */
- private boolean summaryCurrentRoundMessages(List rawMessages) {
- if (rawMessages == null || rawMessages.isEmpty()) {
- return false;
- }
-
- // Step 1: Find the latest user message
- int latestUserIndex = -1;
- for (int i = rawMessages.size() - 1; i >= 0; i--) {
- Msg msg = rawMessages.get(i);
- if (msg.getRole() == MsgRole.USER) {
- latestUserIndex = i;
- break;
- }
- }
-
- // If no user message found, nothing to summarize
- if (latestUserIndex < 0) {
- return false;
- }
-
- // Step 2: Check if there are messages after the user message
- if (latestUserIndex >= rawMessages.size() - 1) {
- return false;
- }
-
- // Step 3: Extract messages after the latest user message
- int startIndex = latestUserIndex + 1;
- int endIndex = rawMessages.size() - 1;
-
- // Ensure tool use and tool result are paired: if the last message is ToolUse,
- // move endIndex back by one to exclude the incomplete tool invocation
- if (endIndex >= startIndex) {
- Msg lastMsg = rawMessages.get(endIndex);
- if (MsgUtils.isToolUseMessage(lastMsg)) {
- endIndex--;
- // If no messages left after adjustment, cannot compress
- if (endIndex < startIndex) {
- return false;
- }
- }
- }
-
- List messagesToCompress = new ArrayList<>();
- for (int i = startIndex; i <= endIndex; i++) {
- messagesToCompress.add(rawMessages.get(i));
- }
-
- log.info(
- "Compressing current round messages: userIndex={}, messageCount={}",
- latestUserIndex,
- messagesToCompress.size());
-
- // Step 4: Merge and compress messages (typically tool calls and results)
- Msg compressedMsg = mergeAndCompressCurrentRoundMessages(messagesToCompress);
-
- // Build metadata for compression event
- Map metadata = new HashMap<>();
- if (compressedMsg.getChatUsage() != null) {
- metadata.put("inputToken", compressedMsg.getChatUsage().getInputTokens());
- metadata.put("outputToken", compressedMsg.getChatUsage().getOutputTokens());
- metadata.put("time", compressedMsg.getChatUsage().getTime());
- }
-
- // Record compression event (before replacing messages to preserve indices)
- recordCompressionEvent(
- CompressionEvent.CURRENT_ROUND_MESSAGE_COMPRESS,
- startIndex,
- endIndex,
- rawMessages,
- compressedMsg,
- metadata);
-
- // Step 5: Replace original messages with compressed one
- rawMessages.subList(startIndex, endIndex + 1).clear();
- rawMessages.add(startIndex, compressedMsg);
-
- log.info(
- "Replaced {} messages with 1 compressed message at index {}",
- messagesToCompress.size(),
- startIndex);
- return true;
- }
-
- /**
- * Summarize large messages in the current round that exceed the threshold.
- *
- * This method is called to compress large messages in the current round (messages after
- * the latest user message) that exceed the largePayloadThreshold. Unlike simple offloading
- * which only provides a preview, this method uses LLM to generate intelligent summaries
- * while preserving critical information.
- *
- *
Strategy:
- * 1. Find the latest user message
- * 2. Check messages after it for content exceeding largePayloadThreshold
- * 3. For each large message, generate an LLM summary and offload the original
- * 4. Replace large messages with summarized versions
- *
- * @param rawMessages the list of messages to process
- * @return true if any messages were summarized and offloaded, false otherwise
- */
- private boolean summaryCurrentRoundLargeMessages(List rawMessages) {
- if (rawMessages == null || rawMessages.isEmpty()) {
- return false;
- }
-
- // Step 1: Find the latest user message
- int latestUserIndex = -1;
- for (int i = rawMessages.size() - 1; i >= 0; i--) {
- Msg msg = rawMessages.get(i);
- if (msg.getRole() == MsgRole.USER) {
- latestUserIndex = i;
- break;
- }
- }
-
- // If no user message found, nothing to process
- if (latestUserIndex < 0) {
- return false;
- }
-
- // Step 2: Check if there are messages after the user message
- if (latestUserIndex >= rawMessages.size() - 1) {
- return false;
- }
-
- // Step 3: Process messages after the latest user message
- // Process in reverse order to avoid index shifting issues when replacing
- boolean hasSummarized = false;
- long threshold = autoContextConfig.largePayloadThreshold;
-
- for (int i = rawMessages.size() - 1; i > latestUserIndex; i--) {
- Msg msg = rawMessages.get(i);
-
- // Skip already compressed messages to avoid double compression
- if (MsgUtils.isCompressedMessage(msg)) {
- log.debug(
- "Skipping already compressed message at index {} to avoid double"
- + " compression",
- i);
- continue;
- }
-
- String textContent = msg.getTextContent();
-
- // Check if message content exceeds threshold
- if (textContent == null || textContent.length() <= threshold) {
- continue;
- }
-
- // Step 4: Offload the original message
- String uuid = UUID.randomUUID().toString();
- List offloadMsg = new ArrayList<>();
- offloadMsg.add(msg);
- offload(uuid, offloadMsg);
- log.info(
- "Offloaded current round large message: index={}, size={} chars, uuid={}",
- i,
- textContent.length(),
- uuid);
-
- // Step 5: Generate summary using LLM
- Msg summaryMsg = generateLargeMessageSummary(msg, uuid);
-
- // Build metadata for compression event
- Map metadata = new HashMap<>();
- if (summaryMsg.getChatUsage() != null) {
- metadata.put("inputToken", summaryMsg.getChatUsage().getInputTokens());
- metadata.put("outputToken", summaryMsg.getChatUsage().getOutputTokens());
- metadata.put("time", summaryMsg.getChatUsage().getTime());
- }
-
- // Record compression event
- recordCompressionEvent(
- CompressionEvent.CURRENT_ROUND_LARGE_MESSAGE_SUMMARY,
- i,
- i,
- rawMessages,
- summaryMsg,
- metadata);
-
- // Step 6: Replace the original message with summary
- rawMessages.set(i, summaryMsg);
- hasSummarized = true;
-
- log.info(
- "Replaced large message at index {} with summarized version (uuid: {})",
- i,
- uuid);
- }
-
- return hasSummarized;
- }
-
- /**
- * Generate a summary of a large message using the model.
- *
- * @param message the message to summarize
- * @param offloadUuid the UUID of offloaded message
- * @return a summary message preserving the original role and name
- */
- private Msg generateLargeMessageSummary(Msg message, String offloadUuid) {
- GenerateOptions options = GenerateOptions.builder().build();
- ReasoningContext context = new ReasoningContext("large_message_summary");
-
- String offloadHint =
- offloadUuid != null
- ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUuid)
- : "";
-
- List newMessages = new ArrayList<>();
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(
- PromptProvider.getCurrentRoundLargeMessagePrompt(
- customPrompt))
- .build())
- .build());
- newMessages.add(message);
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
- .build())
- .build());
- // Insert plan-aware hint message at the end to leverage recency effect
- addPlanAwareHintIfNeeded(newMessages);
-
- Msg block =
- model.stream(newMessages, null, options)
- .concatMap(chunk -> processChunk(chunk, context))
- .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
- .onErrorResume(InterruptedException.class, Mono::error)
- .block();
-
- if (block != null && block.getChatUsage() != null) {
- log.info(
- "Large message summary completed, input tokens: {}, output tokens: {}",
- block.getChatUsage().getInputTokens(),
- block.getChatUsage().getOutputTokens());
- }
-
- // Build metadata with compression information
- Map compressMeta = new HashMap<>();
- if (offloadUuid != null) {
- compressMeta.put("offloaduuid", offloadUuid);
- }
-
- Map metadata = new HashMap<>();
- metadata.put("_compress_meta", compressMeta);
-
- // Preserve _chat_usage from the block if available
- if (block != null && block.getChatUsage() != null) {
- metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
- }
-
- // Create summary message preserving original role and name
- String summaryContent = block != null ? block.getTextContent() : "";
- String finalContent = summaryContent;
- if (!offloadHint.isEmpty()) {
- finalContent = summaryContent + "\n" + offloadHint;
- }
-
- return Msg.builder()
- .role(message.getRole())
- .name(message.getName())
- .content(TextBlock.builder().text(finalContent).build())
- .metadata(metadata)
- .build();
- }
-
- /**
- * Merge and compress current round messages (typically tool calls and tool results).
- *
- * @param messages the messages to merge and compress
- * @return compressed message
- */
- private Msg mergeAndCompressCurrentRoundMessages(List messages) {
- if (messages == null || messages.isEmpty()) {
- return null;
- }
-
- // Offload original messages
- String uuid = UUID.randomUUID().toString();
- List originalMessages = new ArrayList<>(messages);
- offload(uuid, originalMessages);
-
- // Use model to generate a compressed summary from message list
- return generateCurrentRoundSummaryFromMessages(messages, uuid);
- }
-
- @Override
- public void offload(String uuid, List messages) {
- offloadContext.put(uuid, messages);
- }
-
- @Override
- public List reload(String uuid) {
- List messages = offloadContext.get(uuid);
- return messages != null ? messages : new ArrayList<>();
- }
-
- @Override
- public void clear(String uuid) {
- offloadContext.remove(uuid);
- }
-
- /**
- * Generate a compressed summary of current round messages using the model.
- *
- * @param messages the messages to summarize
- * @param offloadUuid the UUID of offloaded content (if any)
- * @return compressed message
- */
- private Msg generateCurrentRoundSummaryFromMessages(List messages, String offloadUuid) {
- GenerateOptions options = GenerateOptions.builder().build();
- ReasoningContext context = new ReasoningContext("current_round_compress");
-
- // Filter out plan-related tool calls before compression
- List filteredMessages = MsgUtils.filterPlanRelatedToolCalls(messages);
- if (filteredMessages.size() < messages.size()) {
- log.info(
- "Filtered out {} plan-related tool call messages from current round"
- + " compression",
- messages.size() - filteredMessages.size());
- }
-
- // Calculate original character count (including TextBlock, ToolUseBlock, ToolResultBlock)
- // Use filtered messages for character count calculation
- int originalCharCount = MsgUtils.calculateMessagesCharCount(filteredMessages);
-
- // Get compression ratio and calculate target character count
- double compressionRatio = autoContextConfig.getCurrentRoundCompressionRatio();
- int compressionRatioPercent = (int) Math.round(compressionRatio * 100);
- int targetCharCount = (int) Math.round(originalCharCount * compressionRatio);
-
- String offloadHint =
- offloadUuid != null
- ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUuid)
- : "";
-
- // Build character count requirement message
- String charRequirement =
- String.format(
- Prompts.CURRENT_ROUND_MESSAGE_COMPRESS_CHAR_REQUIREMENT,
- originalCharCount,
- targetCharCount,
- (double) compressionRatioPercent,
- (double) compressionRatioPercent);
-
- List newMessages = new ArrayList<>();
- // First message: main compression prompt (without character count requirement)
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(
- PromptProvider.getCurrentRoundCompressPrompt(
- customPrompt))
- .build())
- .build());
- newMessages.addAll(filteredMessages);
- // Message list end marker
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
- .build())
- .build());
- // Character count requirement (placed after message list end)
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(TextBlock.builder().text(charRequirement).build())
- .build());
- // Insert plan-aware hint message at the end to leverage recency effect
- addPlanAwareHintIfNeeded(newMessages);
-
- Msg block =
- model.stream(newMessages, null, options)
- .concatMap(chunk -> processChunk(chunk, context))
- .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
- .onErrorResume(InterruptedException.class, Mono::error)
- .block();
-
- // Extract token usage information
- int inputTokens = 0;
- int outputTokens = 0;
- if (block != null && block.getChatUsage() != null) {
- inputTokens = block.getChatUsage().getInputTokens();
- outputTokens = block.getChatUsage().getOutputTokens();
- }
-
- // Calculate actual output character count (including all content blocks)
- int actualCharCount = block != null ? MsgUtils.calculateMessageCharCount(block) : 0;
-
- log.info(
- "Current round summary completed - original: {} chars, target: {} chars ({}%),"
- + " actual: {} chars, input tokens: {}, output tokens: {}",
- originalCharCount,
- targetCharCount,
- compressionRatioPercent,
- actualCharCount,
- inputTokens,
- outputTokens);
-
- // Build metadata with compression information
- Map compressMeta = new HashMap<>();
- if (offloadUuid != null) {
- compressMeta.put("offloaduuid", offloadUuid);
- }
- // Mark this as a compressed current round message to avoid being treated as a real
- // assistant response
- compressMeta.put("compressed_current_round", true);
- Map metadata = new HashMap<>();
- metadata.put("_compress_meta", compressMeta);
- if (block != null && block.getChatUsage() != null) {
- metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
- }
-
- // Create a compressed message
- return Msg.builder()
- .role(MsgRole.ASSISTANT)
- .name("assistant")
- .content(
- TextBlock.builder()
- .text((block != null ? block.getTextContent() : "") + offloadHint)
- .build())
- .metadata(metadata)
- .build();
- }
-
- /**
- * Summarize current round of conversation messages.
- *
- * @param rawMessages the list of messages to process
- * @return true if summary was actually performed, false otherwise
- */
- private void summaryToolsMessages(
- List rawMessages, Pair toolMsgIndices) {
- int startIndex = toolMsgIndices.first();
- int endIndex = toolMsgIndices.second();
- int toolMsgCount = endIndex - startIndex + 1;
- log.info(
- "Compressing tool invocations: indices [{}, {}], count: {}",
- startIndex,
- endIndex,
- toolMsgCount);
-
- List toolsMsg = new ArrayList<>();
- for (int i = startIndex; i <= endIndex; i++) {
- toolsMsg.add(rawMessages.get(i));
- }
-
- // Check if original token count is sufficient for compression
- // Skip compression if tokens are below threshold to avoid compression overhead
- int originalTokens = TokenCounterUtil.calculateToken(toolsMsg);
- int threshold = autoContextConfig.getMinCompressionTokenThreshold();
- if (originalTokens < threshold) {
- log.info(
- "Skipping tool invocation compression: original tokens ({}) is below threshold"
- + " ({})",
- originalTokens,
- threshold);
- return;
- }
-
- log.info(
- "Proceeding with tool invocation compression: original tokens: {}, threshold: {}",
- originalTokens,
- threshold);
-
- // Normal compression flow for non-plan tools
- String uuid = UUID.randomUUID().toString();
- offload(uuid, toolsMsg);
-
- Msg toolsSummary = compressToolsInvocation(toolsMsg, uuid);
-
- // Build metadata for compression event
- Map metadata = new HashMap<>();
- if (toolsSummary.getChatUsage() != null) {
- metadata.put("inputToken", toolsSummary.getChatUsage().getInputTokens());
- metadata.put("outputToken", toolsSummary.getChatUsage().getOutputTokens());
- metadata.put("time", toolsSummary.getChatUsage().getTime());
- }
-
- // Record compression event
- recordCompressionEvent(
- CompressionEvent.TOOL_INVOCATION_COMPRESS,
- startIndex,
- endIndex,
- rawMessages,
- toolsSummary,
- metadata);
-
- MsgUtils.replaceMsg(rawMessages, startIndex, endIndex, toolsSummary);
- }
-
- /**
- * Summarize all previous rounds of conversation messages before the latest assistant.
- *
- * This method finds the latest assistant message and summarizes all conversation rounds
- * before it. Each round consists of messages between a user message and its corresponding
- * assistant message (typically including tool calls/results and the assistant message itself).
- *
- *
Example transformation:
- * Before: "user1-tools-assistant1, user2-tools-assistant2, user3-tools-assistant3, user4"
- * After: "user1-summary, user2-summary, user3-summary, user4"
- * Where each summary contains the compressed information from tools and assistant of that round.
- *
- *
Strategy:
- * 1. Find the latest assistant message (this is the current round, not to be summarized)
- * 2. From the beginning, find all user-assistant pairs before the latest assistant
- * 3. For each pair, summarize messages between user and assistant (including assistant message)
- * 4. Replace those messages (including assistant) with summary (process from back to front to avoid index shifting)
- *
- * @param rawMessages the list of messages to process
- * @return true if summary was actually performed, false otherwise
- */
- private boolean summaryPreviousRoundMessages(List rawMessages) {
- if (rawMessages == null || rawMessages.isEmpty()) {
- return false;
- }
-
- // Step 1: Find the latest assistant message that is a final response (not a tool call)
- int latestAssistantIndex = -1;
- for (int i = rawMessages.size() - 1; i >= 0; i--) {
- Msg msg = rawMessages.get(i);
- if (MsgUtils.isFinalAssistantResponse(msg)) {
- latestAssistantIndex = i;
- break;
- }
- }
-
- // If no assistant message found, nothing to summarize
- if (latestAssistantIndex < 0) {
- return false;
- }
-
- // Step 2: Find all user-assistant pairs before the latest assistant
- // We'll collect them as pairs: (userIndex, assistantIndex)
- List> userAssistantPairs = new ArrayList<>();
- int currentUserIndex = -1;
-
- for (int i = 0; i < latestAssistantIndex; i++) {
- Msg msg = rawMessages.get(i);
- if (msg.getRole() == MsgRole.USER) {
- currentUserIndex = i;
- } else if (MsgUtils.isFinalAssistantResponse(msg) && currentUserIndex >= 0) {
- // Found a user-assistant pair (assistant message is a final response, not a tool
- // call)
- if (i - currentUserIndex != 1) {
- userAssistantPairs.add(new Pair<>(currentUserIndex, i));
- }
-
- currentUserIndex = -1; // Reset to find next pair
- }
- }
-
- // If no pairs found, nothing to summarize
- if (userAssistantPairs.isEmpty()) {
- return false;
- }
-
- log.info(
- "Found {} user-assistant pairs to summarize before latest assistant at index {}",
- userAssistantPairs.size(),
- latestAssistantIndex);
-
- // Step 3: Process pairs from back to front to avoid index shifting issues
- boolean hasSummarized = false;
- for (int pairIdx = userAssistantPairs.size() - 1; pairIdx >= 0; pairIdx--) {
- Pair pair = userAssistantPairs.get(pairIdx);
- int userIndex = pair.first();
- int assistantIndex = pair.second();
-
- // Messages to summarize: from user to assistant (inclusive of both)
- // Include user message for context, but we'll only remove messages after user
- int startIndex = userIndex + 1; // Messages to remove start after user
- int endIndex = assistantIndex; // Include assistant message in removal
-
- // If no messages between user and assistant (including assistant), skip
- if (startIndex > endIndex) {
- log.info(
- "No messages to summarize between user at index {} and assistant at index"
- + " {}",
- userIndex,
- assistantIndex);
- continue;
- }
-
- // Include user message in messagesToSummarize for context, but keep it in the final
- // list
- List messagesToSummarize = new ArrayList<>();
- messagesToSummarize.add(rawMessages.get(userIndex)); // Include user message for context
- for (int i = startIndex; i <= endIndex; i++) {
- messagesToSummarize.add(rawMessages.get(i));
- }
-
- log.info(
- "Summarizing round {}: user at index {}, messages [{}, {}], totalCount={}"
- + " (includes user message for context)",
- pairIdx + 1,
- userIndex,
- startIndex,
- endIndex,
- messagesToSummarize.size());
-
- // Step 4: Check if original token count is sufficient for compression
- // Skip compression if tokens are below threshold to avoid compression overhead
- int originalTokens = TokenCounterUtil.calculateToken(messagesToSummarize);
- int threshold = autoContextConfig.getMinCompressionTokenThreshold();
- if (originalTokens < threshold) {
- log.info(
- "Skipping conversation summary for round {}: original tokens ({}) is below"
- + " threshold ({})",
- pairIdx + 1,
- originalTokens,
- threshold);
- continue;
- }
-
- log.info(
- "Proceeding with conversation summary for round {}: original tokens: {},"
- + " threshold: {}",
- pairIdx + 1,
- originalTokens,
- threshold);
-
- // Step 5: Offload original messages if contextOffLoader is available
- String uuid = UUID.randomUUID().toString();
- offload(uuid, messagesToSummarize);
- log.info("Offloaded messages to be summarized: uuid={}", uuid);
-
- // Step 6: Generate summary
- Msg summaryMsg = summaryPreviousRoundConversation(messagesToSummarize, uuid);
-
- // Build metadata for compression event
- Map metadata = new HashMap<>();
- if (summaryMsg.getChatUsage() != null) {
- metadata.put("inputToken", summaryMsg.getChatUsage().getInputTokens());
- metadata.put("outputToken", summaryMsg.getChatUsage().getOutputTokens());
- metadata.put("time", summaryMsg.getChatUsage().getTime());
- }
-
- // Record compression event (before removing messages to preserve indices)
- recordCompressionEvent(
- CompressionEvent.PREVIOUS_ROUND_CONVERSATION_SUMMARY,
- startIndex,
- endIndex,
- rawMessages,
- summaryMsg,
- metadata);
-
- // Step 7: Remove the messages between user and assistant (including assistant), then
- // replace with summary
- // Since we're processing from back to front, the indices are still accurate
- // for the current pair (indices of pairs after this one have already been adjusted)
-
- // Remove messages from startIndex to endIndex (including assistant, from back to front
- // to avoid index shifting)
- int removedCount = endIndex - startIndex + 1;
- rawMessages.subList(startIndex, endIndex + 1).clear();
-
- // After removal, the position where assistant was is now: assistantIndex - removedCount
- // + 1
- // But since we removed everything including assistant, we insert summary at the
- // position after user
- int insertIndex = userIndex + 1;
-
- // Insert summary after user (replacing the removed messages including assistant)
- rawMessages.add(insertIndex, summaryMsg);
-
- log.info(
- "Replaced {} messages [indices {}-{}] with summary at index {}",
- removedCount,
- startIndex,
- endIndex,
- insertIndex);
-
- hasSummarized = true;
- }
-
- return hasSummarized;
- }
-
- /**
- * Generate a summary of previous round conversation messages using the model.
- *
- * @param messages the messages to summarize
- * @param offloadUuid the UUID of offloaded messages (if any), null otherwise
- * @return a summary message
- */
- private Msg summaryPreviousRoundConversation(List messages, String offloadUuid) {
- // Filter out plan-related tool calls (user messages are preserved by
- // filterPlanRelatedToolCalls)
- List filteredMessages = MsgUtils.filterPlanRelatedToolCalls(messages);
- if (filteredMessages.size() < messages.size()) {
- log.info(
- "Filtered out {} plan-related tool call messages from previous round"
- + " conversation summary",
- messages.size() - filteredMessages.size());
- }
-
- GenerateOptions options = GenerateOptions.builder().build();
- ReasoningContext context = new ReasoningContext("conversation_summary");
-
- List newMessages = new ArrayList<>();
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(
- PromptProvider.getPreviousRoundSummaryPrompt(
- customPrompt))
- .build())
- .build());
- newMessages.addAll(filteredMessages);
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
- .build())
- .build());
- // Insert plan-aware hint message at the end to leverage recency effect
- addPlanAwareHintIfNeeded(newMessages);
-
- Msg block =
- model.stream(newMessages, null, options)
- .concatMap(chunk -> processChunk(chunk, context))
- .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
- .onErrorResume(InterruptedException.class, Mono::error)
- .block();
-
- // Extract token usage information
- int inputTokens = 0;
- int outputTokens = 0;
- if (block != null && block.getChatUsage() != null) {
- inputTokens = block.getChatUsage().getInputTokens();
- outputTokens = block.getChatUsage().getOutputTokens();
- log.info(
- "Conversation summary completed, input tokens: {}, output tokens: {}",
- inputTokens,
- outputTokens);
- }
-
- // Build metadata with compression information
- Map compressMeta = new HashMap<>();
- if (offloadUuid != null) {
- compressMeta.put("offloaduuid", offloadUuid);
- }
-
- Map metadata = new HashMap<>();
- metadata.put("_compress_meta", compressMeta);
-
- // Preserve _chat_usage from the block if available
- if (block != null && block.getChatUsage() != null) {
- metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
- }
-
- // Build the final message content:
- // 1. LLM generated summary (contains ASSISTANT summary + tool compression)
- // 2. Context offload tag with UUID at the end
- String summaryContent = block != null ? block.getTextContent() : "";
- String offloadTag =
- offloadUuid != null
- ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUuid)
- : "";
-
- // Combine: summary content + newline + UUID tag
- String finalContent = summaryContent;
- if (!offloadTag.isEmpty()) {
- finalContent = finalContent + "\n" + offloadTag;
- }
-
- return Msg.builder()
- .role(MsgRole.ASSISTANT)
- .name("assistant")
- .content(TextBlock.builder().text(finalContent).build())
- .metadata(metadata)
- .build();
- }
-
- /**
- * Offload large payload messages that exceed the threshold.
- *
- * This method finds messages before the latest assistant response that exceed
- * the largePayloadThreshold, offloads them to storage, and replaces them with
- * a summary containing the first 100 characters and a hint to reload if needed.
- *
- * @param rawMessages the list of messages to process
- * @param lastKeep whether to keep the last N messages (unused in current implementation)
- * @return true if any messages were offloaded, false otherwise
- */
- private boolean offloadingLargePayload(List rawMessages, boolean lastKeep) {
- if (rawMessages == null || rawMessages.isEmpty()) {
- return false;
- }
-
- // Strategy 1: If rawMessages has less than lastKeep messages, skip
- if (rawMessages.size() < autoContextConfig.getLastKeep()) {
- return false;
- }
-
- // Strategy 2: Find the latest assistant message that is a final response and protect it and
- // all messages after it
- int latestAssistantIndex = -1;
- for (int i = rawMessages.size() - 1; i >= 0; i--) {
- Msg msg = rawMessages.get(i);
- if (MsgUtils.isFinalAssistantResponse(msg)) {
- latestAssistantIndex = i;
- break;
- }
- }
-
- // Determine the search end index based on lastKeep parameter
- int searchEndIndex;
- if (lastKeep) {
- // If lastKeep is true, protect the last N messages
- int lastKeepCount = autoContextConfig.getLastKeep();
- int protectedStartIndex = Math.max(0, rawMessages.size() - lastKeepCount);
-
- if (latestAssistantIndex >= 0) {
- // Protect both the latest assistant and the last N messages
- // Use the earlier index to ensure both are protected
- searchEndIndex = Math.min(latestAssistantIndex, protectedStartIndex);
- } else {
- // No assistant found, protect the last N messages
- searchEndIndex = protectedStartIndex;
- }
- } else {
- // If lastKeep is false, only protect up to the latest assistant (if found)
- searchEndIndex = (latestAssistantIndex >= 0) ? latestAssistantIndex : 0;
- }
-
- boolean hasOffloaded = false;
- long threshold = autoContextConfig.largePayloadThreshold;
-
- // Process messages from the beginning up to the search end index
- // Process in reverse order to avoid index shifting issues when replacing
- for (int i = searchEndIndex - 1; i >= 0; i--) {
- Msg msg = rawMessages.get(i);
- String textContent = msg.getTextContent();
-
- String uuid = null;
- // Check if message content exceeds threshold
- if (textContent != null && textContent.length() > threshold) {
- // Offload the original message
- uuid = UUID.randomUUID().toString();
- List offloadMsg = new ArrayList<>();
- offloadMsg.add(msg);
- offload(uuid, offloadMsg);
- log.info(
- "Offloaded large message: index={}, size={} chars, uuid={}",
- i,
- textContent.length(),
- uuid);
- }
- if (uuid == null) {
- continue;
- }
-
- // Create replacement message with first autoContextConfig.offloadSinglePreview
- // characters and offload hint
- String preview =
- textContent.length() > autoContextConfig.offloadSinglePreview
- ? textContent.substring(0, autoContextConfig.offloadSinglePreview)
- + "..."
- : textContent;
-
- String offloadHint =
- preview + "\n" + String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, uuid);
-
- // Build metadata with compression information
- // Note: This method only offloads without LLM compression, so tokens are 0
- Map compressMeta = new HashMap<>();
- compressMeta.put("offloaduuid", uuid);
-
- Map metadata = new HashMap<>();
- metadata.put("_compress_meta", compressMeta);
-
- // Create replacement message preserving original role and name
- Msg replacementMsg =
- Msg.builder()
- .role(msg.getRole())
- .name(msg.getName())
- .content(TextBlock.builder().text(offloadHint).build())
- .metadata(metadata)
- .build();
-
- // Calculate token counts before and after offload
- int tokenBefore = TokenCounterUtil.calculateToken(List.of(msg));
- int tokenAfter = TokenCounterUtil.calculateToken(List.of(replacementMsg));
-
- // Build metadata for compression event (offload doesn't use LLM, so no compression
- // tokens)
- Map eventMetadata = new HashMap<>();
- eventMetadata.put("inputToken", tokenBefore);
- eventMetadata.put("outputToken", tokenAfter);
- eventMetadata.put("time", 0.0);
-
- // Record compression event (offload doesn't use LLM, so compressedMessage is null)
- String eventType =
- lastKeep
- ? CompressionEvent.LARGE_MESSAGE_OFFLOAD_WITH_PROTECTION
- : CompressionEvent.LARGE_MESSAGE_OFFLOAD;
- recordCompressionEvent(eventType, i, i, rawMessages, null, eventMetadata);
-
- // Replace the original message
- rawMessages.set(i, replacementMsg);
- hasOffloaded = true;
- }
-
- return hasOffloaded;
- }
-
- @Override
- public void deleteMessage(int index) {
- if (index >= 0 && index < workingMemoryStorage.size()) {
- workingMemoryStorage.remove(index);
- }
- }
-
- /**
- * Extract tool messages from raw messages for compression.
- *
- * This method finds consecutive tool invocation messages in historical conversations
- * that can be compressed. It searches for sequences of more than consecutive tool messages
- * before the latest assistant message.
- *
- *
Strategy:
- * 1. If rawMessages has less than lastKeep messages, return null
- * 2. Find the latest assistant message and protect it and all messages after it
- * 3. Search from the beginning for the oldest consecutive tool messages (more than minConsecutiveToolMessages consecutive)
- * that can be compressed
- * 4. If no assistant message is found, protect the last N messages (lastKeep)
- *
- * @param rawMessages all raw messages
- * @param lastKeep number of recent messages to keep uncompressed
- * @return Pair containing startIndex and endIndex (inclusive) of compressible tool messages, or null if none found
- */
- private Pair extractPrevToolMsgsForCompress(
- List rawMessages, int lastKeep) {
- if (rawMessages == null || rawMessages.isEmpty()) {
- return null;
- }
-
- int totalSize = rawMessages.size();
-
- // Step 1: If rawMessages has less than lastKeep messages, return null
- if (totalSize < lastKeep) {
- return null;
- }
-
- // Step 2: Find the latest assistant message that is a final response and protect it and all
- // messages after it
- int latestAssistantIndex = -1;
- for (int i = totalSize - 1; i >= 0; i--) {
- Msg msg = rawMessages.get(i);
- if (MsgUtils.isFinalAssistantResponse(msg)) {
- latestAssistantIndex = i;
- break;
- }
- }
- if (latestAssistantIndex == -1) {
- return null;
- }
- // Determine the search boundary: we can only search messages before the latest assistant
- int searchEndIndex = Math.min(latestAssistantIndex, (totalSize - lastKeep));
-
- // Step 3: Find the oldest consecutive tool messages (more than minConsecutiveToolMessages
- // consecutive)
- // Search from the beginning (oldest messages first) until we find a sequence
- int consecutiveCount = 0;
- int startIndex = -1;
- int endIndex = -1;
-
- for (int i = 0; i < searchEndIndex; i++) {
- Msg msg = rawMessages.get(i);
- if (MsgUtils.isToolMessage(msg)) {
- if (consecutiveCount == 0) {
- startIndex = i;
- }
- consecutiveCount++;
- } else {
- // If we found enough consecutive tool messages, return their indices
- if (consecutiveCount > autoContextConfig.minConsecutiveToolMessages) {
- endIndex = i - 1; // endIndex is inclusive
- // Adjust indices: ensure startIndex is ToolUse and endIndex is ToolResult
- int adjustedStart = startIndex;
- int adjustedEnd = endIndex;
-
- // Adjust startIndex forward to find ToolUse
- while (adjustedStart <= adjustedEnd
- && !MsgUtils.isToolUseMessage(rawMessages.get(adjustedStart))) {
- if (MsgUtils.isToolResultMessage(rawMessages.get(adjustedStart))) {
- adjustedStart++;
- } else {
- break; // Invalid sequence, continue searching
- }
- }
-
- // Adjust endIndex backward to find ToolResult
- while (adjustedEnd >= adjustedStart
- && !MsgUtils.isToolResultMessage(rawMessages.get(adjustedEnd))) {
- if (MsgUtils.isToolUseMessage(rawMessages.get(adjustedEnd))) {
- adjustedEnd--;
- } else {
- break; // Invalid sequence, continue searching
- }
- }
-
- // Check if we still have enough consecutive tool messages after adjustment
- if (adjustedStart <= adjustedEnd
- && adjustedEnd - adjustedStart + 1
- > autoContextConfig.minConsecutiveToolMessages) {
- return new Pair<>(adjustedStart, adjustedEnd);
- }
- }
- // Reset counter if sequence is broken
- consecutiveCount = 0;
- startIndex = -1;
- }
- }
-
- // Check if there's a sequence at the end of the search range
- if (consecutiveCount > autoContextConfig.minConsecutiveToolMessages) {
- endIndex = searchEndIndex - 1; // endIndex is inclusive
- // Adjust indices: ensure startIndex is ToolUse and endIndex is ToolResult
- int adjustedStart = startIndex;
- int adjustedEnd = endIndex;
-
- // Adjust startIndex forward to find ToolUse
- while (adjustedStart <= adjustedEnd
- && !MsgUtils.isToolUseMessage(rawMessages.get(adjustedStart))) {
- if (MsgUtils.isToolResultMessage(rawMessages.get(adjustedStart))) {
- adjustedStart++;
- } else {
- return null; // Invalid sequence
- }
- }
-
- // Adjust endIndex backward to find ToolResult
- while (adjustedEnd >= adjustedStart
- && !MsgUtils.isToolResultMessage(rawMessages.get(adjustedEnd))) {
- if (MsgUtils.isToolUseMessage(rawMessages.get(adjustedEnd))) {
- adjustedEnd--;
- } else {
- return null; // Invalid sequence
- }
- }
-
- // Check if we still have enough consecutive tool messages after adjustment
- if (adjustedStart <= adjustedEnd
- && adjustedEnd - adjustedStart + 1
- > autoContextConfig.minConsecutiveToolMessages) {
- return new Pair<>(adjustedStart, adjustedEnd);
- }
- }
-
- return null;
- }
-
- /**
- * Compresses a list of tool invocation messages using LLM summarization.
- *
- * This method uses an LLM model to intelligently compress tool invocation messages,
- * preserving key information such as tool names, parameters, and important results while
- * reducing the overall token count. The compression is performed as part of Strategy 1
- * (compress historical tool invocations) to manage context window limits.
- *
- *
Process:
- *
- * - Constructs a prompt with the tool invocation messages sandwiched between
- * compression instructions
- * - Sends the prompt to the LLM model for summarization
- * - Formats the compressed result with optional offload hint (if UUID is provided)
- * - Returns a new ASSISTANT message containing the compressed summary
- *
- *
- * Special Handling:
- * The method handles plan note related tools specially (see {@link #summaryToolsMessages}),
- * which are simplified without LLM interaction. This method is only called for non-plan
- * tool invocations.
- *
- *
Offload Integration:
- * If an {@code offloadUUid} is provided, the compressed message will include a hint
- * indicating that the original content can be reloaded using the UUID via
- * {@link ContextOffloadTool}.
- *
- * @param messages the list of tool invocation messages to compress (must not be null or empty)
- * @param offloadUUid the UUID of the offloaded original messages, or null if not offloaded
- * @return a new ASSISTANT message containing the compressed tool invocation summary
- * @throws RuntimeException if LLM processing fails or is interrupted
- */
- private Msg compressToolsInvocation(List messages, String offloadUUid) {
-
- // Filter out plan-related tool calls before compression
- List filteredMessages = MsgUtils.filterPlanRelatedToolCalls(messages);
- if (filteredMessages.size() < messages.size()) {
- log.info(
- "Filtered out {} plan-related tool call messages from tool invocation"
- + " compression",
- messages.size() - filteredMessages.size());
- }
-
- GenerateOptions options = GenerateOptions.builder().build();
- ReasoningContext context = new ReasoningContext("tool_compress");
- List newMessages = new ArrayList<>();
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(
- PromptProvider.getPreviousRoundToolCompressPrompt(
- customPrompt))
- .build())
- .build());
- newMessages.addAll(filteredMessages);
- newMessages.add(
- Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
- .build())
- .build());
- // Insert plan-aware hint message at the end to leverage recency effect
- addPlanAwareHintIfNeeded(newMessages);
- Msg block =
- model.stream(newMessages, null, options)
- .concatMap(chunk -> processChunk(chunk, context))
- .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
- .onErrorResume(InterruptedException.class, Mono::error)
- .block();
-
- // Extract token usage information
- int inputTokens = 0;
- int outputTokens = 0;
- if (block != null && block.getChatUsage() != null) {
- inputTokens = block.getChatUsage().getInputTokens();
- outputTokens = block.getChatUsage().getOutputTokens();
- log.info(
- "Tool compression completed, input tokens: {}, output tokens: {}",
- inputTokens,
- outputTokens);
- }
-
- // Build metadata with compression information
- Map compressMeta = new HashMap<>();
- if (offloadUUid != null) {
- compressMeta.put("offloaduuid", offloadUUid);
- }
-
- Map metadata = new HashMap<>();
- metadata.put("_compress_meta", compressMeta);
-
- // Preserve _chat_usage from the block if available
- if (block != null && block.getChatUsage() != null) {
- metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
- }
-
- // Build the final message content:
- // 1. LLM generated compressed tool invocation content
- // 2. Context offload tag with UUID at the end
- String compressedContent = block != null ? block.getTextContent() : "";
- String offloadTag =
- offloadUUid != null
- ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUUid)
- : "";
-
- // Combine: compressed content + newline + UUID tag
- String finalContent = compressedContent;
- if (!offloadTag.isEmpty()) {
- finalContent = finalContent + "\n" + offloadTag;
- }
-
- return Msg.builder()
- .role(MsgRole.ASSISTANT)
- .name("assistant")
- .content(TextBlock.builder().text(finalContent).build())
- .metadata(metadata)
- .build();
- }
-
- private Mono processChunk(ChatResponse chunk, ReasoningContext context) {
- return Mono.just(chunk).doOnNext(context::processChunk).then(Mono.empty());
- }
-
- @Override
- public void clear() {
- workingMemoryStorage.clear();
- originalMemoryStorage.clear();
- }
-
- /**
- * Attaches a PlanNotebook instance to enable plan-aware compression.
- *
- * This method should be called after the ReActAgent is created and has a PlanNotebook.
- * When a PlanNotebook is attached, compression operations will automatically include
- * plan context information to preserve plan-related information during compression.
- *
- *
This method can be called multiple times to update or replace the PlanNotebook.
- * Passing null will detach the current PlanNotebook and disable plan-aware compression.
- *
- * @param planNotebook the PlanNotebook instance to attach, or null to detach
- */
- public void attachPlanNote(PlanNotebook planNotebook) {
- this.planNotebook = planNotebook;
- if (planNotebook != null) {
- log.debug("PlanNotebook attached to AutoContextMemory for plan-aware compression");
- } else {
- log.debug("PlanNotebook detached from AutoContextMemory");
- }
- }
-
- /**
- * Gets the current plan state information for compression context.
- *
- *
This method generates a generic plan-aware hint message that is fixed to be placed
- * after the messages that need to be compressed. The content uses "above messages"
- * terminology to refer to the messages that appear before this hint in the message list.
- *
- * @return Plan state information as a formatted string, or null if no plan is active
- */
- private String getPlanStateContext() {
- if (planNotebook == null) {
- return null;
- }
-
- Plan currentPlan = planNotebook.getCurrentPlan();
- if (currentPlan == null) {
- return null;
- }
-
- // Build simplified plan state information
- StringBuilder planContext = new StringBuilder();
-
- // 1. Task overall goal
- if (currentPlan.getDescription() != null && !currentPlan.getDescription().isEmpty()) {
- planContext.append("Goal: ").append(currentPlan.getDescription()).append("\n");
- }
-
- // 2. Current progress
- List subtasks = currentPlan.getSubtasks();
- if (subtasks != null && !subtasks.isEmpty()) {
- List inProgressTasks =
- subtasks.stream()
- .filter(st -> st.getState() == SubTaskState.IN_PROGRESS)
- .collect(Collectors.toList());
-
- if (!inProgressTasks.isEmpty()) {
- planContext.append("Current Progress: ");
- for (int i = 0; i < inProgressTasks.size(); i++) {
- if (i > 0) {
- planContext.append(", ");
- }
- planContext.append(inProgressTasks.get(i).getName());
- }
- planContext.append("\n");
- }
-
- // Count completed tasks for context
- long doneCount =
- subtasks.stream().filter(st -> st.getState() == SubTaskState.DONE).count();
- long totalCount = subtasks.size();
-
- if (totalCount > 0) {
- planContext.append(
- String.format(
- "Progress: %d/%d subtasks completed\n", doneCount, totalCount));
- }
- }
-
- // 3. Appropriate supplement to task plan context
- if (currentPlan.getExpectedOutcome() != null
- && !currentPlan.getExpectedOutcome().isEmpty()) {
- planContext
- .append("Expected Outcome: ")
- .append(currentPlan.getExpectedOutcome())
- .append("\n");
- }
-
- return planContext.toString();
- }
-
- /**
- * Creates a hint message containing plan context information for compression.
- *
- * This hint message is placed after the compression scope marker
- * (COMPRESSION_MESSAGE_LIST_END) at the end of the message list. This placement leverages the
- * model's attention mechanism (recency effect), ensuring compression guidelines are fresh in the
- * model's context during generation.
- *
- * @return A USER message containing plan context, or null if no plan is active
- */
- private Msg createPlanAwareHintMessage() {
- String planContext = getPlanStateContext();
- if (planContext == null) {
- return null;
- }
-
- return Msg.builder()
- .role(MsgRole.USER)
- .name("user")
- .content(
- TextBlock.builder()
- .text("\n" + planContext + "\n")
- .build())
- .build();
- }
-
- /**
- * Adds plan-aware hint message to the message list if a plan is active.
- *
- *
This method creates and adds a plan-aware hint message to the provided message list if
- * there is an active plan. The hint message is added at the end of the list to leverage the
- * recency effect of the model's attention mechanism.
- *
- * @param newMessages the message list to which the hint message should be added
- */
- private void addPlanAwareHintIfNeeded(List newMessages) {
- Msg hintMsg = createPlanAwareHintMessage();
- if (hintMsg != null) {
- newMessages.add(hintMsg);
- }
- }
-
- /**
- * Gets the original memory storage containing complete, uncompressed message history.
- *
- * This storage maintains the full conversation history in its original form (append-only).
- * Unlike {@link #getMessages()} which returns compressed messages from working memory,
- * this method returns all messages as they were originally added, without any compression
- * or summarization applied.
- *
- *
Use cases:
- *
- * - Accessing complete conversation history for analysis or export
- * - Recovering original messages that have been compressed in working memory
- * - Auditing or debugging conversation flow
- *
- *
- * @return a list of all original messages in the order they were added
- */
- public List getOriginalMemoryMsgs() {
- return originalMemoryStorage;
- }
-
- /**
- * Gets the user-assistant interaction messages from original memory storage.
- *
- * This method filters the original memory storage to return only messages that represent
- * the actual interaction dialogue between the user and assistant. It includes:
- *
- * - All {@link MsgRole#USER} messages
- * - Only final {@link MsgRole#ASSISTANT} responses that are sent to the user
- * (excludes intermediate tool invocation messages)
- *
- *
- * This filtered list excludes:
- *
- * - Tool-related messages ({@link MsgRole#TOOL})
- * - System messages ({@link MsgRole#SYSTEM})
- * - Intermediate ASSISTANT messages that contain tool calls (not final responses)
- * - Any other message types
- *
- *
- * A final assistant response is determined by {@link MsgUtils#isFinalAssistantResponse(Msg)},
- * which checks that the message does not contain {@link ToolUseBlock} or
- * {@link ToolResultBlock}, indicating it is the actual reply sent to the user rather
- * than an intermediate tool invocation step.
- *
- *
Use cases:
- *
- * - Extracting clean conversation transcripts for analysis
- * - Generating conversation summaries without tool call details
- * - Exporting user-assistant interaction dialogue for documentation
- * - Training or fine-tuning data preparation
- *
- *
- * The returned list maintains the original order of messages, preserving the
- * interaction flow between user and assistant.
- *
- * @return a list containing only USER messages and final ASSISTANT responses in chronological order
- */
- public List getInteractionMsgs() {
- List conversations = new ArrayList<>();
- for (Msg msg : originalMemoryStorage) {
- if (msg.getRole() == MsgRole.USER || MsgUtils.isFinalAssistantResponse(msg)) {
- conversations.add(msg);
- }
- }
- return conversations;
- }
-
- /**
- * Gets the offload context map containing offloaded message content.
- *
- * This map stores messages that have been offloaded during compression operations.
- * Each entry uses a UUID as the key and contains a list of messages that were offloaded
- * together. These messages can be reloaded using {@link #reload(String)} with the
- * corresponding UUID.
- *
- *
Offloading occurs when:
- *
- * - Large messages exceed the {@code largePayloadThreshold}
- * - Tool invocations are compressed (Strategy 1)
- * - Previous round conversations are summarized (Strategy 4)
- * - Current round messages are compressed (Strategy 5 & 6)
- *
- *
- * The offloaded content can be accessed via {@link ContextOffloadTool} or by
- * calling {@link #reload(String)} with the UUID found in compressed message hints.
- *
- * @return a map where keys are UUID strings and values are lists of offloaded messages
- */
- public Map> getOffloadContext() {
- return offloadContext;
- }
-
- /**
- * Gets the list of compression events that occurred during context management.
- *
- * This list records all compression operations that have been performed, including:
- *
- * - Event type (which compression strategy was used)
- * - Timestamp when the compression occurred
- * - Number of messages compressed
- * - Token counts before and after compression
- * - Message positioning information (previous and next message IDs)
- * - Compressed message ID (for compression types)
- *
- *
- * The events are stored in chronological order and can be used for analysis,
- * debugging, or monitoring compression effectiveness.
- *
- * @return a list of compression events, ordered by timestamp
- */
- public List getCompressionEvents() {
- return compressionEvents;
- }
-
- // ==================== StateModule API ====================
-
- /**
- * Save memory state to the session.
- *
- * Saves working memory and original memory messages to the session storage.
- *
- * @param session the session to save state to
- * @param sessionKey the session identifier
- */
- @Override
- public void saveTo(Session session, SessionKey sessionKey) {
- session.save(
- sessionKey,
- "autoContextMemory_workingMessages",
- new ArrayList<>(workingMemoryStorage));
- session.save(
- sessionKey,
- "autoContextMemory_originalMessages",
- new ArrayList<>(originalMemoryStorage));
-
- // Save offload context (critical for reload functionality)
- if (!offloadContext.isEmpty()) {
- session.save(
- sessionKey,
- "autoContextMemory_offloadContext",
- new OffloadContextState(new HashMap<>(offloadContext)));
- }
-
- if (!compressionEvents.isEmpty()) {
- session.save(
- sessionKey,
- "autoContextMemory_compressionEvents",
- new ArrayList<>(compressionEvents));
- }
- }
-
- /**
- * Load memory state from the session.
- *
- *
Loads working memory and original memory messages from the session storage.
- *
- * @param session the session to load state from
- * @param sessionKey the session identifier
- */
- @Override
- public void loadFrom(Session session, SessionKey sessionKey) {
- List loadedWorking =
- session.getList(sessionKey, "autoContextMemory_workingMessages", Msg.class);
- workingMemoryStorage.clear();
- workingMemoryStorage.addAll(loadedWorking);
-
- List loadedOriginal =
- session.getList(sessionKey, "autoContextMemory_originalMessages", Msg.class);
- originalMemoryStorage.clear();
- originalMemoryStorage.addAll(loadedOriginal);
-
- // Load offload context
- session.get(sessionKey, "autoContextMemory_offloadContext", OffloadContextState.class)
- .ifPresent(
- state -> {
- offloadContext.clear();
- offloadContext.putAll(state.offloadContext());
- });
-
- // Load compression context events
- List compressEvents =
- session.getList(
- sessionKey, "autoContextMemory_compressionEvents", CompressionEvent.class);
- compressionEvents.clear();
- compressionEvents.addAll(compressEvents);
- }
-}
+/*
+ * Copyright 2024-2026 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.agentscope.core.memory.autocontext;
+
+import io.agentscope.core.agent.accumulator.ReasoningContext;
+import io.agentscope.core.memory.Memory;
+import io.agentscope.core.message.MessageMetadataKeys;
+import io.agentscope.core.message.Msg;
+import io.agentscope.core.message.MsgRole;
+import io.agentscope.core.message.TextBlock;
+import io.agentscope.core.message.ToolResultBlock;
+import io.agentscope.core.message.ToolUseBlock;
+import io.agentscope.core.model.ChatResponse;
+import io.agentscope.core.model.GenerateOptions;
+import io.agentscope.core.model.Model;
+import io.agentscope.core.plan.PlanNotebook;
+import io.agentscope.core.plan.model.Plan;
+import io.agentscope.core.plan.model.SubTask;
+import io.agentscope.core.plan.model.SubTaskState;
+import io.agentscope.core.session.Session;
+import io.agentscope.core.state.SessionKey;
+import io.agentscope.core.state.StateModule;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import reactor.core.publisher.Mono;
+
+/**
+ * AutoContextMemory - Intelligent context memory management system.
+ *
+ * AutoContextMemory implements the {@link Memory} interface and provides automated
+ * context compression, offloading, and summarization to optimize LLM context window usage.
+ * When conversation history exceeds configured thresholds, the system automatically applies
+ * multiple compression strategies to reduce context size while preserving important information.
+ *
+ *
Key features:
+ *
+ * - Automatic compression when message count or token count exceeds thresholds
+ * - Six progressive compression strategies (from lightweight to heavyweight)
+ * - Intelligent summarization using LLM models
+ * - Content offloading to external storage
+ * - Tool call interface preservation during compression
+ * - Dual storage mechanism (working storage and original storage)
+ *
+ *
+ * Compression strategies (applied in order):
+ *
+ * - Compress historical tool invocations
+ * - Offload large messages (with lastKeep protection)
+ * - Offload large messages (without protection)
+ * - Summarize historical conversation rounds
+ * - Summarize large messages in current round (with LLM summary and offload)
+ * - Compress current round messages
+ *
+ *
+ * Storage architecture:
+ *
+ * - Working Memory Storage: Stores compressed messages for actual conversations
+ * - Original Memory Storage: Stores complete, uncompressed message history
+ *
+ */
+public class AutoContextMemory implements StateModule, Memory, ContextOffLoader {
+
+ private static final Logger log = LoggerFactory.getLogger(AutoContextMemory.class);
+
+ /**
+ * Working memory storage for compressed and offloaded messages.
+ * This storage is used for actual conversations and may contain compressed summaries.
+ */
+ private List workingMemoryStorage;
+
+ /**
+ * Original memory storage for complete, uncompressed message history.
+ * This storage maintains the full conversation history in its original form (append-only).
+ */
+ private List originalMemoryStorage;
+
+ private Map> offloadContext = new HashMap<>();
+
+ /**
+ * List of compression events that occurred during context management.
+ * Records information about each compression operation including timing, token reduction,
+ * and message positioning.
+ */
+ private List compressionEvents;
+
+ /**
+ * Auto context configuration containing thresholds and settings.
+ * Defines compression triggers, storage options, and offloading behavior.
+ */
+ private final AutoContextConfig autoContextConfig;
+
+ /**
+ * LLM model used for generating summaries and compressing content.
+ * Required for intelligent compression and summarization operations.
+ */
+ private Model model;
+
+ /**
+ * Optional PlanNotebook instance for plan-aware compression.
+ * When provided, compression prompts will be adjusted based on current plan state
+ * to preserve plan-related information.
+ *
+ * Note: This field is set via {@link #attachPlanNote(PlanNotebook)} method,
+ * typically called after ReActAgent is created and has a PlanNotebook instance.
+ */
+ private PlanNotebook planNotebook;
+
+ /**
+ * Custom prompt configuration from AutoContextConfig.
+ * If null, default prompts from {@link Prompts} will be used.
+ */
+ private final PromptConfig customPrompt;
+
+ /**
+ * Creates a new AutoContextMemory instance with the specified configuration and model.
+ *
+ * @param autoContextConfig the configuration for auto context management
+ * @param model the LLM model to use for compression and summarization
+ */
+ public AutoContextMemory(AutoContextConfig autoContextConfig, Model model) {
+ this.model = model;
+ this.autoContextConfig = autoContextConfig;
+ this.customPrompt = autoContextConfig.getCustomPrompt();
+ workingMemoryStorage = new ArrayList<>();
+ originalMemoryStorage = new ArrayList<>();
+ offloadContext = new HashMap<>();
+ compressionEvents = new ArrayList<>();
+ }
+
+ @Override
+ public void addMessage(Msg message) {
+ workingMemoryStorage.add(message);
+ originalMemoryStorage.add(message);
+ }
+
+ @Override
+ public List getMessages() {
+ // Read-only: return a copy of working memory messages without triggering compression
+ return new ArrayList<>(workingMemoryStorage);
+ }
+
+ /**
+ * Compresses the working memory if thresholds are reached.
+ *
+ * This method checks if compression is needed based on message count and token count
+ * thresholds, and applies compression strategies if necessary. The compression modifies
+ * the working memory storage in place.
+ *
+ *
This method should be called at a deterministic point in the execution flow,
+ * typically via a PreReasoningHook, to ensure compression happens before LLM reasoning.
+ *
+ *
Compression strategies are applied in order until one succeeds:
+ *
+ * - Compress previous round tool invocations
+ * - Offload previous round large messages (with lastKeep protection)
+ * - Offload previous round large messages (without lastKeep protection)
+ * - Summarize previous round conversations
+ * - Summarize and offload current round large messages
+ * - Summarize current round messages
+ *
+ *
+ * @return true if compression was performed, false if no compression was needed
+ */
+ public boolean compressIfNeeded() {
+ List currentContextMessages = new ArrayList<>(workingMemoryStorage);
+
+ // Check if compression is needed
+ boolean msgCountReached = currentContextMessages.size() >= autoContextConfig.msgThreshold;
+ int calculateToken = TokenCounterUtil.calculateToken(currentContextMessages);
+ int thresholdToken = (int) (autoContextConfig.maxToken * autoContextConfig.tokenRatio);
+ boolean tokenCounterReached = calculateToken >= thresholdToken;
+
+ if (!msgCountReached && !tokenCounterReached) {
+ return false;
+ }
+
+ // Compression triggered - log threshold information
+ log.info(
+ "Compression triggered - msgCount: {}/{}, tokenCount: {}/{}",
+ currentContextMessages.size(),
+ autoContextConfig.msgThreshold,
+ calculateToken,
+ thresholdToken);
+
+ // Strategy 1: Compress previous round tool invocations
+ log.info("Strategy 1: Checking for previous round tool invocations to compress");
+ int toolIters = 5;
+ boolean toolCompressed = false;
+ int compressionCount = 0;
+ int cursorStartIndex = 0;
+ while (toolIters > 0) {
+ toolIters--;
+ List currentMsgs = new ArrayList<>(workingMemoryStorage);
+ Pair toolMsgIndices =
+ extractPrevToolMsgsForCompress(
+ currentMsgs, autoContextConfig.getLastKeep(), cursorStartIndex);
+ if (toolMsgIndices != null) {
+ boolean actuallyCompressed = summaryToolsMessages(currentMsgs, toolMsgIndices);
+ if (actuallyCompressed) {
+ replaceWorkingMessage(currentMsgs);
+ toolCompressed = true;
+ compressionCount++;
+ cursorStartIndex = toolMsgIndices.first() + 1;
+ } else {
+ cursorStartIndex = toolMsgIndices.second() + 1;
+ }
+ } else {
+ break;
+ }
+ }
+ if (toolCompressed) {
+ log.info(
+ "Strategy 1: APPLIED - Compressed {} tool invocation groups", compressionCount);
+ return true;
+ } else {
+ log.info(
+ "Strategy 1: SKIPPED - No compressible tool invocations found (or skipped due"
+ + " to low tokens)");
+ }
+
+ // Strategy 2: Offload previous round large messages (with lastKeep protection)
+ log.info(
+ "Strategy 2: Checking for previous round large messages (with lastKeep"
+ + " protection)");
+ boolean hasOffloadedLastKeep = offloadingLargePayload(currentContextMessages, true);
+ if (hasOffloadedLastKeep) {
+ log.info(
+ "Strategy 2: APPLIED - Offloaded previous round large messages (with lastKeep"
+ + " protection)");
+ replaceWorkingMessage(currentContextMessages);
+ return true;
+ } else {
+ log.info("Strategy 2: SKIPPED - No large messages found or protected by lastKeep");
+ }
+
+ // Strategy 3: Offload previous round large messages (without lastKeep protection)
+ log.info(
+ "Strategy 3: Checking for previous round large messages (without lastKeep"
+ + " protection)");
+ boolean hasOffloaded = offloadingLargePayload(currentContextMessages, false);
+ if (hasOffloaded) {
+ log.info("Strategy 3: APPLIED - Offloaded previous round large messages");
+ replaceWorkingMessage(currentContextMessages);
+ return true;
+ } else {
+ log.info("Strategy 3: SKIPPED - No large messages found");
+ }
+
+ // Strategy 4: Summarize previous round conversations
+ log.info("Strategy 4: Checking for previous round conversations to summarize");
+ boolean hasSummarized = summaryPreviousRoundMessages(currentContextMessages);
+ if (hasSummarized) {
+ log.info("Strategy 4: APPLIED - Summarized previous round conversations");
+ replaceWorkingMessage(currentContextMessages);
+ return true;
+ } else {
+ log.info("Strategy 4: SKIPPED - No previous round conversations to summarize");
+ }
+
+ // Strategy 5: Summarize and offload current round large messages
+ log.info("Strategy 5: Checking for current round large messages to summarize");
+ boolean currentRoundLargeSummarized =
+ summaryCurrentRoundLargeMessages(currentContextMessages);
+ if (currentRoundLargeSummarized) {
+ log.info("Strategy 5: APPLIED - Summarized and offloaded current round large messages");
+ replaceWorkingMessage(currentContextMessages);
+ return true;
+ } else {
+ log.info("Strategy 5: SKIPPED - No current round large messages found");
+ }
+
+ // Strategy 6: Summarize current round messages
+ log.info("Strategy 6: Checking for current round messages to summarize");
+ boolean currentRoundSummarized = summaryCurrentRoundMessages(currentContextMessages);
+ if (currentRoundSummarized) {
+ log.info("Strategy 6: APPLIED - Summarized current round messages");
+ replaceWorkingMessage(currentContextMessages);
+ return true;
+ } else {
+ log.info("Strategy 6: SKIPPED - No current round messages to summarize");
+ }
+
+ log.warn("All compression strategies exhausted but context still exceeds threshold");
+ return false;
+ }
+
+ private List replaceWorkingMessage(List newMessages) {
+ workingMemoryStorage.clear();
+ for (Msg msg : newMessages) {
+ workingMemoryStorage.add(msg);
+ }
+ return new ArrayList<>(workingMemoryStorage);
+ }
+
+ /**
+ * Records a compression event that occurred during context management.
+ *
+ * @param eventType the type of compression event
+ * @param startIndex the start index of the compressed message range in allMessages
+ * @param endIndex the end index of the compressed message range in allMessages
+ * @param allMessages the complete message list (before compression)
+ * @param compressedMessage the compressed message (null if not a compression type)
+ * @param metadata additional metadata for the event (may contain inputToken, outputToken, etc.)
+ */
+ private void recordCompressionEvent(
+ String eventType,
+ int startIndex,
+ int endIndex,
+ List allMessages,
+ Msg compressedMessage,
+ Map metadata) {
+ int compressedMessageCount = endIndex - startIndex + 1;
+ String previousMessageId = startIndex > 0 ? allMessages.get(startIndex - 1).getId() : null;
+ String nextMessageId =
+ endIndex < allMessages.size() - 1 ? allMessages.get(endIndex + 1).getId() : null;
+ String compressedMessageId = compressedMessage != null ? compressedMessage.getId() : null;
+
+ CompressionEvent event =
+ new CompressionEvent(
+ eventType,
+ System.currentTimeMillis(),
+ compressedMessageCount,
+ previousMessageId,
+ nextMessageId,
+ compressedMessageId,
+ metadata != null ? new HashMap<>(metadata) : new HashMap<>());
+
+ compressionEvents.add(event);
+ }
+
+ /**
+ * Summarize current round of conversation messages.
+ *
+ * This method is called when historical messages have been compressed and offloaded,
+ * but the context still exceeds the limit. This indicates that the current round's content
+ * is too large and needs compression.
+ *
+ *
Strategy:
+ * 1. Find the latest user message
+ * 2. Merge and compress all messages after it (typically tool calls and tool results,
+ * usually no assistant message yet)
+ * 3. Preserve tool call interfaces (name, parameters)
+ * 4. Compress tool results, merging multiple results and keeping key information
+ *
+ * @param rawMessages the list of messages to process
+ * @return true if summary was actually performed, false otherwise
+ */
+ private boolean summaryCurrentRoundMessages(List rawMessages) {
+ if (rawMessages == null || rawMessages.isEmpty()) {
+ return false;
+ }
+
+ // Step 1: Find the latest user message
+ int latestUserIndex = -1;
+ for (int i = rawMessages.size() - 1; i >= 0; i--) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isRealUserMessage(msg)) {
+ latestUserIndex = i;
+ break;
+ }
+ }
+
+ // If no user message found, nothing to summarize
+ if (latestUserIndex < 0) {
+ return false;
+ }
+
+ // Step 2: Check if there are messages after the user message
+ if (latestUserIndex >= rawMessages.size() - 1) {
+ return false;
+ }
+
+ // Step 3: Extract messages after the latest user message
+ int startIndex = latestUserIndex + 1;
+ int endIndex = rawMessages.size() - 1;
+
+ // Ensure tool use and tool result are paired: if the last message is ToolUse,
+ // move endIndex back by one to exclude the incomplete tool invocation
+ if (endIndex >= startIndex) {
+ Msg lastMsg = rawMessages.get(endIndex);
+ if (MsgUtils.isToolUseMessage(lastMsg)) {
+ endIndex--;
+ // If no messages left after adjustment, cannot compress
+ if (endIndex < startIndex) {
+ return false;
+ }
+ }
+ }
+
+ List messagesToCompress = new ArrayList<>();
+ for (int i = startIndex; i <= endIndex; i++) {
+ messagesToCompress.add(rawMessages.get(i));
+ }
+
+ log.info(
+ "Compressing current round messages: userIndex={}, messageCount={}",
+ latestUserIndex,
+ messagesToCompress.size());
+
+ // Step 4: Merge and compress messages (typically tool calls and results)
+ Msg compressedMsg = mergeAndCompressCurrentRoundMessages(messagesToCompress);
+
+ // Build metadata for compression event
+ Map metadata = new HashMap<>();
+ if (compressedMsg.getChatUsage() != null) {
+ metadata.put("inputToken", compressedMsg.getChatUsage().getInputTokens());
+ metadata.put("outputToken", compressedMsg.getChatUsage().getOutputTokens());
+ metadata.put("time", compressedMsg.getChatUsage().getTime());
+ }
+
+ // Record compression event (before replacing messages to preserve indices)
+ recordCompressionEvent(
+ CompressionEvent.CURRENT_ROUND_MESSAGE_COMPRESS,
+ startIndex,
+ endIndex,
+ rawMessages,
+ compressedMsg,
+ metadata);
+
+ // Step 5: Replace original messages with compressed one
+ rawMessages.subList(startIndex, endIndex + 1).clear();
+ rawMessages.add(startIndex, compressedMsg);
+
+ log.info(
+ "Replaced {} messages with 1 compressed message at index {}",
+ messagesToCompress.size(),
+ startIndex);
+ return true;
+ }
+
+ /**
+ * Summarize large messages in the current round that exceed the threshold.
+ *
+ * This method is called to compress large messages in the current round (messages after
+ * the latest user message) that exceed the largePayloadThreshold. Unlike simple offloading
+ * which only provides a preview, this method uses LLM to generate intelligent summaries
+ * while preserving critical information.
+ *
+ *
Strategy:
+ * 1. Find the latest user message
+ * 2. Check messages after it for content exceeding largePayloadThreshold
+ * 3. For each large message, generate an LLM summary and offload the original
+ * 4. Replace large messages with summarized versions
+ *
+ * @param rawMessages the list of messages to process
+ * @return true if any messages were summarized and offloaded, false otherwise
+ */
+ private boolean summaryCurrentRoundLargeMessages(List rawMessages) {
+ if (rawMessages == null || rawMessages.isEmpty()) {
+ return false;
+ }
+
+ // Step 1: Find the latest user message
+ int latestUserIndex = -1;
+ for (int i = rawMessages.size() - 1; i >= 0; i--) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isRealUserMessage(msg)) {
+ latestUserIndex = i;
+ break;
+ }
+ }
+
+ // If no user message found, nothing to process
+ if (latestUserIndex < 0) {
+ return false;
+ }
+
+ // Step 2: Check if there are messages after the user message
+ if (latestUserIndex >= rawMessages.size() - 1) {
+ return false;
+ }
+
+ // Step 3: Process messages after the latest user message
+ // Process in reverse order to avoid index shifting issues when replacing
+ boolean hasSummarized = false;
+ long threshold = autoContextConfig.largePayloadThreshold;
+
+ for (int i = rawMessages.size() - 1; i > latestUserIndex; i--) {
+ Msg msg = rawMessages.get(i);
+
+ // Skip already compressed messages to avoid double compression
+ if (MsgUtils.isCompressedMessage(msg)) {
+ log.debug(
+ "Skipping already compressed message at index {} to avoid double"
+ + " compression",
+ i);
+ continue;
+ }
+
+ String textContent = msg.getTextContent();
+
+ // Check if message content exceeds threshold
+ if (textContent == null || textContent.length() <= threshold) {
+ continue;
+ }
+
+ // Step 4: Offload the original message
+ String uuid = UUID.randomUUID().toString();
+ List offloadMsg = new ArrayList<>();
+ offloadMsg.add(msg);
+ offload(uuid, offloadMsg);
+ log.info(
+ "Offloaded current round large message: index={}, size={} chars, uuid={}",
+ i,
+ textContent.length(),
+ uuid);
+
+ // Step 5: Generate summary using LLM
+ Msg summaryMsg = generateLargeMessageSummary(msg, uuid);
+
+ // Build metadata for compression event
+ Map metadata = new HashMap<>();
+ if (summaryMsg.getChatUsage() != null) {
+ metadata.put("inputToken", summaryMsg.getChatUsage().getInputTokens());
+ metadata.put("outputToken", summaryMsg.getChatUsage().getOutputTokens());
+ metadata.put("time", summaryMsg.getChatUsage().getTime());
+ }
+
+ // Record compression event
+ recordCompressionEvent(
+ CompressionEvent.CURRENT_ROUND_LARGE_MESSAGE_SUMMARY,
+ i,
+ i,
+ rawMessages,
+ summaryMsg,
+ metadata);
+
+ // Step 6: Replace the original message with summary
+ rawMessages.set(i, summaryMsg);
+ hasSummarized = true;
+
+ log.info(
+ "Replaced large message at index {} with summarized version (uuid: {})",
+ i,
+ uuid);
+ }
+
+ return hasSummarized;
+ }
+
+ /**
+ * Generate a summary of a large message using the model.
+ *
+ * @param message the message to summarize
+ * @param offloadUuid the UUID of offloaded message
+ * @return a summary message preserving the original role and name
+ */
+ private Msg generateLargeMessageSummary(Msg message, String offloadUuid) {
+ GenerateOptions options = GenerateOptions.builder().build();
+ ReasoningContext context = new ReasoningContext("large_message_summary");
+
+ String offloadHint =
+ offloadUuid != null
+ ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUuid)
+ : "";
+
+ List newMessages = new ArrayList<>();
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(
+ PromptProvider.getCurrentRoundLargeMessagePrompt(
+ customPrompt))
+ .build())
+ .build());
+ newMessages.add(message);
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
+ .build())
+ .build());
+ // Insert plan-aware hint message at the end to leverage recency effect
+ addPlanAwareHintIfNeeded(newMessages);
+
+ Msg block =
+ model.stream(newMessages, null, options)
+ .concatMap(chunk -> processChunk(chunk, context))
+ .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
+ .onErrorResume(InterruptedException.class, Mono::error)
+ .block();
+
+ if (block != null && block.getChatUsage() != null) {
+ log.info(
+ "Large message summary completed, input tokens: {}, output tokens: {}",
+ block.getChatUsage().getInputTokens(),
+ block.getChatUsage().getOutputTokens());
+ }
+
+ // Build metadata with compression information
+ Map compressMeta = new HashMap<>();
+ if (offloadUuid != null) {
+ compressMeta.put("offloaduuid", offloadUuid);
+ }
+
+ Map metadata = new HashMap<>();
+ metadata.put("_compress_meta", compressMeta);
+
+ // Preserve _chat_usage from the block if available
+ if (block != null && block.getChatUsage() != null) {
+ metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
+ }
+
+ // Create summary message preserving original role and name
+ String summaryContent = block != null ? block.getTextContent() : "";
+ String finalContent = summaryContent;
+ if (!offloadHint.isEmpty()) {
+ finalContent = summaryContent + "\n" + offloadHint;
+ }
+
+ return Msg.builder()
+ .role(message.getRole())
+ .name(message.getName())
+ .content(TextBlock.builder().text(finalContent).build())
+ .metadata(metadata)
+ .build();
+ }
+
+ /**
+ * Merge and compress current round messages (typically tool calls and tool results).
+ *
+ * @param messages the messages to merge and compress
+ * @return compressed message
+ */
+ private Msg mergeAndCompressCurrentRoundMessages(List messages) {
+ if (messages == null || messages.isEmpty()) {
+ return null;
+ }
+
+ // Offload original messages
+ String uuid = UUID.randomUUID().toString();
+ List originalMessages = new ArrayList<>(messages);
+ offload(uuid, originalMessages);
+
+ // Use model to generate a compressed summary from message list
+ return generateCurrentRoundSummaryFromMessages(messages, uuid);
+ }
+
+ @Override
+ public void offload(String uuid, List messages) {
+ offloadContext.put(uuid, messages);
+ }
+
+ @Override
+ public List reload(String uuid) {
+ List messages = offloadContext.get(uuid);
+ return messages != null ? messages : new ArrayList<>();
+ }
+
+ @Override
+ public void clear(String uuid) {
+ offloadContext.remove(uuid);
+ }
+
+ /**
+ * Generate a compressed summary of current round messages using the model.
+ *
+ * @param messages the messages to summarize
+ * @param offloadUuid the UUID of offloaded content (if any)
+ * @return compressed message
+ */
+ private Msg generateCurrentRoundSummaryFromMessages(List messages, String offloadUuid) {
+ GenerateOptions options = GenerateOptions.builder().build();
+ ReasoningContext context = new ReasoningContext("current_round_compress");
+
+ // Filter out plan-related tool calls before compression
+ List filteredMessages = MsgUtils.filterPlanRelatedToolCalls(messages);
+ if (filteredMessages.size() < messages.size()) {
+ log.info(
+ "Filtered out {} plan-related tool call messages from current round"
+ + " compression",
+ messages.size() - filteredMessages.size());
+ }
+
+ // Calculate original character count (including TextBlock, ToolUseBlock, ToolResultBlock)
+ // Use filtered messages for character count calculation
+ int originalCharCount = MsgUtils.calculateMessagesCharCount(filteredMessages);
+
+ // Get compression ratio and calculate target character count
+ double compressionRatio = autoContextConfig.getCurrentRoundCompressionRatio();
+ int compressionRatioPercent = (int) Math.round(compressionRatio * 100);
+ int targetCharCount = (int) Math.round(originalCharCount * compressionRatio);
+
+ String offloadHint =
+ offloadUuid != null
+ ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUuid)
+ : "";
+
+ // Build character count requirement message
+ String charRequirement =
+ String.format(
+ Prompts.CURRENT_ROUND_MESSAGE_COMPRESS_CHAR_REQUIREMENT,
+ originalCharCount,
+ targetCharCount,
+ (double) compressionRatioPercent,
+ (double) compressionRatioPercent);
+
+ List newMessages = new ArrayList<>();
+ // First message: main compression prompt (without character count requirement)
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(
+ PromptProvider.getCurrentRoundCompressPrompt(
+ customPrompt))
+ .build())
+ .build());
+ newMessages.addAll(filteredMessages);
+ // Message list end marker
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
+ .build())
+ .build());
+ // Character count requirement (placed after message list end)
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(TextBlock.builder().text(charRequirement).build())
+ .build());
+ // Insert plan-aware hint message at the end to leverage recency effect
+ addPlanAwareHintIfNeeded(newMessages);
+
+ Msg block =
+ model.stream(newMessages, null, options)
+ .concatMap(chunk -> processChunk(chunk, context))
+ .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
+ .onErrorResume(InterruptedException.class, Mono::error)
+ .block();
+
+ // Extract token usage information
+ int inputTokens = 0;
+ int outputTokens = 0;
+ if (block != null && block.getChatUsage() != null) {
+ inputTokens = block.getChatUsage().getInputTokens();
+ outputTokens = block.getChatUsage().getOutputTokens();
+ }
+
+ // Calculate actual output character count (including all content blocks)
+ int actualCharCount = block != null ? MsgUtils.calculateMessageCharCount(block) : 0;
+
+ log.info(
+ "Current round summary completed - original: {} chars, target: {} chars ({}%),"
+ + " actual: {} chars, input tokens: {}, output tokens: {}",
+ originalCharCount,
+ targetCharCount,
+ compressionRatioPercent,
+ actualCharCount,
+ inputTokens,
+ outputTokens);
+
+ // Build metadata with compression information
+ Map compressMeta = new HashMap<>();
+ if (offloadUuid != null) {
+ compressMeta.put("offloaduuid", offloadUuid);
+ }
+ // Mark this as a compressed current round message to avoid being treated as a real
+ // assistant response
+ compressMeta.put("compressed_current_round", true);
+ Map metadata = new HashMap<>();
+ metadata.put("_compress_meta", compressMeta);
+ if (block != null && block.getChatUsage() != null) {
+ metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
+ }
+
+ return createCompressedCurrentRoundSummaryMessage(block, offloadHint, metadata);
+ }
+
+ /**
+ * Create the synthetic message used to represent a compressed current-round tool/result
+ * sequence.
+ *
+ * This summary must preserve a non-assistant trailing turn so the next reasoning request
+ * still looks like "user -> synthetic summary -> assistant" instead of appearing to end with a
+ * completed assistant response.
+ */
+ private Msg createCompressedCurrentRoundSummaryMessage(
+ Msg summaryBlock, String offloadHint, Map metadata) {
+ return Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(
+ (summaryBlock != null ? summaryBlock.getTextContent() : "")
+ + offloadHint)
+ .build())
+ .metadata(metadata)
+ .build();
+ }
+
+ /**
+ * Summarize current round of conversation messages.
+ *
+ * @param rawMessages the list of messages to process
+ * @param toolMsgIndices the pair of start and end indices
+ * @return true if summary was actually performed, false otherwise
+ */
+ private boolean summaryToolsMessages(
+ List rawMessages, Pair toolMsgIndices) {
+ int startIndex = toolMsgIndices.first();
+ int endIndex = toolMsgIndices.second();
+ int toolMsgCount = endIndex - startIndex + 1;
+ log.info(
+ "Compressing tool invocations: indices [{}, {}], count: {}",
+ startIndex,
+ endIndex,
+ toolMsgCount);
+
+ List toolsMsg = new ArrayList<>();
+ for (int i = startIndex; i <= endIndex; i++) {
+ toolsMsg.add(rawMessages.get(i));
+ }
+
+ // Check if original token count is sufficient for compression
+ // Skip compression if tokens are below threshold to avoid compression overhead
+ int originalTokens = TokenCounterUtil.calculateToken(toolsMsg);
+ int threshold = autoContextConfig.getMinCompressionTokenThreshold();
+ if (originalTokens < threshold) {
+ log.info(
+ "Skipping tool invocation compression: original tokens ({}) is below threshold"
+ + " ({})",
+ originalTokens,
+ threshold);
+ return false;
+ }
+
+ log.info(
+ "Proceeding with tool invocation compression: original tokens: {}, threshold: {}",
+ originalTokens,
+ threshold);
+
+ // Normal compression flow for non-plan tools
+ String uuid = UUID.randomUUID().toString();
+ offload(uuid, toolsMsg);
+
+ Msg toolsSummary = compressToolsInvocation(toolsMsg, uuid);
+
+ // Build metadata for compression event
+ Map metadata = new HashMap<>();
+ if (toolsSummary.getChatUsage() != null) {
+ metadata.put("inputToken", toolsSummary.getChatUsage().getInputTokens());
+ metadata.put("outputToken", toolsSummary.getChatUsage().getOutputTokens());
+ metadata.put("time", toolsSummary.getChatUsage().getTime());
+ }
+
+ // Record compression event
+ recordCompressionEvent(
+ CompressionEvent.TOOL_INVOCATION_COMPRESS,
+ startIndex,
+ endIndex,
+ rawMessages,
+ toolsSummary,
+ metadata);
+
+ MsgUtils.replaceMsg(rawMessages, startIndex, endIndex, toolsSummary);
+
+ return true;
+ }
+
+ /**
+ * Summarize all previous rounds of conversation messages before the latest assistant.
+ *
+ * This method finds the latest assistant message and summarizes all conversation rounds
+ * before it. Each round consists of messages between a user message and its corresponding
+ * assistant message (typically including tool calls/results and the assistant message itself).
+ *
+ *
Example transformation:
+ * Before: "user1-tools-assistant1, user2-tools-assistant2, user3-tools-assistant3, user4"
+ * After: "user1-summary, user2-summary, user3-summary, user4"
+ * Where each summary contains the compressed information from tools and assistant of that round.
+ *
+ *
Strategy:
+ * 1. Find the latest assistant message (this is the current round, not to be summarized)
+ * 2. From the beginning, find all user-assistant pairs before the latest assistant
+ * 3. For each pair, summarize messages between user and assistant (including assistant message)
+ * 4. Replace those messages (including assistant) with summary (process from back to front to avoid index shifting)
+ *
+ * @param rawMessages the list of messages to process
+ * @return true if summary was actually performed, false otherwise
+ */
+ private boolean summaryPreviousRoundMessages(List rawMessages) {
+ if (rawMessages == null || rawMessages.isEmpty()) {
+ return false;
+ }
+
+ // Step 1: Find the latest assistant message that is a final response (not a tool call)
+ int latestAssistantIndex = -1;
+ for (int i = rawMessages.size() - 1; i >= 0; i--) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isFinalAssistantResponse(msg)) {
+ latestAssistantIndex = i;
+ break;
+ }
+ }
+
+ // If no assistant message found, nothing to summarize
+ if (latestAssistantIndex < 0) {
+ return false;
+ }
+
+ // Step 2: Find all user-assistant pairs before the latest assistant
+ // We'll collect them as pairs: (userIndex, assistantIndex)
+ List> userAssistantPairs = new ArrayList<>();
+ int currentUserIndex = -1;
+
+ for (int i = 0; i < latestAssistantIndex; i++) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isRealUserMessage(msg)) {
+ currentUserIndex = i;
+ } else if (MsgUtils.isFinalAssistantResponse(msg) && currentUserIndex >= 0) {
+ // Found a user-assistant pair (assistant message is a final response, not a tool
+ // call)
+ if (i - currentUserIndex != 1) {
+ userAssistantPairs.add(new Pair<>(currentUserIndex, i));
+ }
+
+ currentUserIndex = -1; // Reset to find next pair
+ }
+ }
+
+ // If no pairs found, nothing to summarize
+ if (userAssistantPairs.isEmpty()) {
+ return false;
+ }
+
+ log.info(
+ "Found {} user-assistant pairs to summarize before latest assistant at index {}",
+ userAssistantPairs.size(),
+ latestAssistantIndex);
+
+ // Step 3: Process pairs from back to front to avoid index shifting issues
+ boolean hasSummarized = false;
+ for (int pairIdx = userAssistantPairs.size() - 1; pairIdx >= 0; pairIdx--) {
+ Pair pair = userAssistantPairs.get(pairIdx);
+ int userIndex = pair.first();
+ int assistantIndex = pair.second();
+
+ // Messages to summarize: from user to assistant (inclusive of both)
+ // Include user message for context, but we'll only remove messages after user
+ int startIndex = userIndex + 1; // Messages to remove start after user
+ int endIndex = assistantIndex; // Include assistant message in removal
+
+ // If no messages between user and assistant (including assistant), skip
+ if (startIndex > endIndex) {
+ log.info(
+ "No messages to summarize between user at index {} and assistant at index"
+ + " {}",
+ userIndex,
+ assistantIndex);
+ continue;
+ }
+
+ // Include user message in messagesToSummarize for context, but keep it in the final
+ // list
+ List messagesToSummarize = new ArrayList<>();
+ messagesToSummarize.add(rawMessages.get(userIndex)); // Include user message for context
+ for (int i = startIndex; i <= endIndex; i++) {
+ messagesToSummarize.add(rawMessages.get(i));
+ }
+
+ log.info(
+ "Summarizing round {}: user at index {}, messages [{}, {}], totalCount={}"
+ + " (includes user message for context)",
+ pairIdx + 1,
+ userIndex,
+ startIndex,
+ endIndex,
+ messagesToSummarize.size());
+
+ // Step 4: Check if original token count is sufficient for compression
+ // Skip compression if tokens are below threshold to avoid compression overhead
+ int originalTokens = TokenCounterUtil.calculateToken(messagesToSummarize);
+ int threshold = autoContextConfig.getMinCompressionTokenThreshold();
+ if (originalTokens < threshold) {
+ log.info(
+ "Skipping conversation summary for round {}: original tokens ({}) is below"
+ + " threshold ({})",
+ pairIdx + 1,
+ originalTokens,
+ threshold);
+ continue;
+ }
+
+ log.info(
+ "Proceeding with conversation summary for round {}: original tokens: {},"
+ + " threshold: {}",
+ pairIdx + 1,
+ originalTokens,
+ threshold);
+
+ // Step 5: Offload original messages if contextOffLoader is available
+ String uuid = UUID.randomUUID().toString();
+ offload(uuid, messagesToSummarize);
+ log.info("Offloaded messages to be summarized: uuid={}", uuid);
+
+ // Step 6: Generate summary
+ Msg summaryMsg = summaryPreviousRoundConversation(messagesToSummarize, uuid);
+
+ // Build metadata for compression event
+ Map metadata = new HashMap<>();
+ if (summaryMsg.getChatUsage() != null) {
+ metadata.put("inputToken", summaryMsg.getChatUsage().getInputTokens());
+ metadata.put("outputToken", summaryMsg.getChatUsage().getOutputTokens());
+ metadata.put("time", summaryMsg.getChatUsage().getTime());
+ }
+
+ // Record compression event (before removing messages to preserve indices)
+ recordCompressionEvent(
+ CompressionEvent.PREVIOUS_ROUND_CONVERSATION_SUMMARY,
+ startIndex,
+ endIndex,
+ rawMessages,
+ summaryMsg,
+ metadata);
+
+ // Step 7: Remove the messages between user and assistant (including assistant), then
+ // replace with summary
+ // Since we're processing from back to front, the indices are still accurate
+ // for the current pair (indices of pairs after this one have already been adjusted)
+
+ // Remove messages from startIndex to endIndex (including assistant, from back to front
+ // to avoid index shifting)
+ int removedCount = endIndex - startIndex + 1;
+ rawMessages.subList(startIndex, endIndex + 1).clear();
+
+ // After removal, the position where assistant was is now: assistantIndex - removedCount
+ // + 1
+ // But since we removed everything including assistant, we insert summary at the
+ // position after user
+ int insertIndex = userIndex + 1;
+
+ // Insert summary after user (replacing the removed messages including assistant)
+ rawMessages.add(insertIndex, summaryMsg);
+
+ log.info(
+ "Replaced {} messages [indices {}-{}] with summary at index {}",
+ removedCount,
+ startIndex,
+ endIndex,
+ insertIndex);
+
+ hasSummarized = true;
+ }
+
+ return hasSummarized;
+ }
+
+ /**
+ * Generate a summary of previous round conversation messages using the model.
+ *
+ * @param messages the messages to summarize
+ * @param offloadUuid the UUID of offloaded messages (if any), null otherwise
+ * @return a summary message
+ */
+ private Msg summaryPreviousRoundConversation(List messages, String offloadUuid) {
+ // Filter out plan-related tool calls (user messages are preserved by
+ // filterPlanRelatedToolCalls)
+ List filteredMessages = MsgUtils.filterPlanRelatedToolCalls(messages);
+ if (filteredMessages.size() < messages.size()) {
+ log.info(
+ "Filtered out {} plan-related tool call messages from previous round"
+ + " conversation summary",
+ messages.size() - filteredMessages.size());
+ }
+
+ GenerateOptions options = GenerateOptions.builder().build();
+ ReasoningContext context = new ReasoningContext("conversation_summary");
+
+ List newMessages = new ArrayList<>();
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(
+ PromptProvider.getPreviousRoundSummaryPrompt(
+ customPrompt))
+ .build())
+ .build());
+ newMessages.addAll(filteredMessages);
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
+ .build())
+ .build());
+ // Insert plan-aware hint message at the end to leverage recency effect
+ addPlanAwareHintIfNeeded(newMessages);
+
+ Msg block =
+ model.stream(newMessages, null, options)
+ .concatMap(chunk -> processChunk(chunk, context))
+ .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
+ .onErrorResume(InterruptedException.class, Mono::error)
+ .block();
+
+ // Extract token usage information
+ int inputTokens = 0;
+ int outputTokens = 0;
+ if (block != null && block.getChatUsage() != null) {
+ inputTokens = block.getChatUsage().getInputTokens();
+ outputTokens = block.getChatUsage().getOutputTokens();
+ log.info(
+ "Conversation summary completed, input tokens: {}, output tokens: {}",
+ inputTokens,
+ outputTokens);
+ }
+
+ // Build metadata with compression information
+ Map compressMeta = new HashMap<>();
+ if (offloadUuid != null) {
+ compressMeta.put("offloaduuid", offloadUuid);
+ }
+
+ Map metadata = new HashMap<>();
+ metadata.put("_compress_meta", compressMeta);
+
+ // Preserve _chat_usage from the block if available
+ if (block != null && block.getChatUsage() != null) {
+ metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
+ }
+
+ // Build the final message content:
+ // 1. LLM generated summary (contains ASSISTANT summary + tool compression)
+ // 2. Context offload tag with UUID at the end
+ String summaryContent = block != null ? block.getTextContent() : "";
+ String offloadTag =
+ offloadUuid != null
+ ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUuid)
+ : "";
+
+ // Combine: summary content + newline + UUID tag
+ String finalContent = summaryContent;
+ if (!offloadTag.isEmpty()) {
+ finalContent = finalContent + "\n" + offloadTag;
+ }
+
+ return Msg.builder()
+ .role(MsgRole.ASSISTANT)
+ .name("assistant")
+ .content(TextBlock.builder().text(finalContent).build())
+ .metadata(metadata)
+ .build();
+ }
+
+ /**
+ * Offload large payload messages that exceed the threshold.
+ *
+ * This method finds messages before the latest assistant response that exceed
+ * the largePayloadThreshold, offloads them to storage, and replaces them with
+ * a summary containing the first 100 characters and a hint to reload if needed.
+ *
+ * @param rawMessages the list of messages to process
+ * @param lastKeep whether to keep the last N messages (unused in current implementation)
+ * @return true if any messages were offloaded, false otherwise
+ */
+ private boolean offloadingLargePayload(List rawMessages, boolean lastKeep) {
+ if (rawMessages == null || rawMessages.isEmpty()) {
+ return false;
+ }
+
+ // Strategy 1: If rawMessages has less than lastKeep messages, skip
+ if (rawMessages.size() < autoContextConfig.getLastKeep()) {
+ return false;
+ }
+
+ // Strategy 2: Find the latest assistant message that is a final response and protect it and
+ // all messages after it
+ int latestAssistantIndex = -1;
+ for (int i = rawMessages.size() - 1; i >= 0; i--) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isFinalAssistantResponse(msg)) {
+ latestAssistantIndex = i;
+ break;
+ }
+ }
+
+ // Determine the search end index based on lastKeep parameter
+ int searchEndIndex;
+ if (lastKeep) {
+ // If lastKeep is true, protect the last N messages
+ int lastKeepCount = autoContextConfig.getLastKeep();
+ int protectedStartIndex = Math.max(0, rawMessages.size() - lastKeepCount);
+
+ if (latestAssistantIndex >= 0) {
+ // Protect both the latest assistant and the last N messages
+ // Use the earlier index to ensure both are protected
+ searchEndIndex = Math.min(latestAssistantIndex, protectedStartIndex);
+ } else {
+ // No assistant found, protect the last N messages
+ searchEndIndex = protectedStartIndex;
+ }
+ } else {
+ // If lastKeep is false, only protect up to the latest assistant (if found)
+ searchEndIndex = (latestAssistantIndex >= 0) ? latestAssistantIndex : 0;
+ }
+
+ boolean hasOffloaded = false;
+ long threshold = autoContextConfig.largePayloadThreshold;
+
+ // Process messages from the beginning up to the search end index
+ // Process in reverse order to avoid index shifting issues when replacing
+ for (int i = searchEndIndex - 1; i >= 0; i--) {
+ Msg msg = rawMessages.get(i);
+ String textContent = msg.getTextContent();
+
+ // ASSISTANT messages with ToolUseBlock (tool_calls) must NOT be offloaded as a plain
+ // text stub. Doing so strips the ToolUseBlock, leaving the subsequent TOOL result
+ // messages without a preceding tool_calls assistant message, which violates the API
+ // constraint: "messages with role 'tool' must be a response to a preceding message
+ // with 'tool_calls'". These pairs are handled exclusively by Strategy 1.
+ if (MsgUtils.isToolUseMessage(msg)) {
+ continue;
+ }
+
+ // TOOL result messages can have their output content offloaded, but the
+ // ToolResultBlock structure (id, name) MUST be preserved so that the API formatter
+ // can still emit the correct tool_call_id / name fields. We handle them separately.
+ if (MsgUtils.isToolResultMessage(msg)) {
+ ToolResultBlock originalResult = msg.getFirstContentBlock(ToolResultBlock.class);
+ if (originalResult != null) {
+ // Use the ToolResultBlock output text for size checking, because
+ // Msg.getTextContent() only extracts top-level TextBlocks and returns
+ // empty string for TOOL messages whose content is a ToolResultBlock.
+ String outputText =
+ originalResult.getOutput().stream()
+ .filter(TextBlock.class::isInstance)
+ .map(TextBlock.class::cast)
+ .map(TextBlock::getText)
+ .collect(Collectors.joining("\n"));
+ if (outputText.length() > threshold) {
+ String toolResultUuid = UUID.randomUUID().toString();
+ List offloadMsg = new ArrayList<>();
+ offloadMsg.add(msg);
+ offload(toolResultUuid, offloadMsg);
+ log.info(
+ "Offloaded large tool result message: index={}, size={} chars,"
+ + " uuid={}",
+ i,
+ outputText.length(),
+ toolResultUuid);
+
+ String preview =
+ outputText.length() > autoContextConfig.offloadSinglePreview
+ ? outputText.substring(
+ 0, autoContextConfig.offloadSinglePreview)
+ + "..."
+ : outputText;
+ String offloadHint =
+ preview
+ + "\n"
+ + String.format(
+ Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, toolResultUuid);
+
+ // Preserve ToolResultBlock structure (id, name, metadata) so the API
+ // formatter can emit the correct tool_call_id / name, and downstream
+ // consumers retain semantic flags (e.g. agentscope_suspended) after
+ // offloading. Only the output text is replaced with the offload hint.
+ ToolResultBlock compressedResult =
+ ToolResultBlock.of(
+ originalResult.getId(),
+ originalResult.getName(),
+ TextBlock.builder().text(offloadHint).build(),
+ originalResult.getMetadata());
+
+ Map trCompressMeta = new HashMap<>();
+ trCompressMeta.put("offloaduuid", toolResultUuid);
+ Map trMetadata = new HashMap<>();
+ trMetadata.put("_compress_meta", trCompressMeta);
+
+ Msg replacementToolMsg =
+ Msg.builder()
+ .role(msg.getRole())
+ .name(msg.getName())
+ .content(compressedResult)
+ .metadata(trMetadata)
+ .build();
+
+ int tokenBefore = TokenCounterUtil.calculateToken(List.of(msg));
+ int tokenAfter =
+ TokenCounterUtil.calculateToken(List.of(replacementToolMsg));
+ Map trEventMetadata = new HashMap<>();
+ trEventMetadata.put("inputToken", tokenBefore);
+ trEventMetadata.put("outputToken", tokenAfter);
+ trEventMetadata.put("time", 0.0);
+
+ String eventType =
+ lastKeep
+ ? CompressionEvent.LARGE_MESSAGE_OFFLOAD_WITH_PROTECTION
+ : CompressionEvent.LARGE_MESSAGE_OFFLOAD;
+ recordCompressionEvent(eventType, i, i, rawMessages, null, trEventMetadata);
+
+ rawMessages.set(i, replacementToolMsg);
+ hasOffloaded = true;
+ }
+ }
+ continue;
+ }
+
+ String uuid = null;
+ // Check if message content exceeds threshold
+ if (textContent != null && textContent.length() > threshold) {
+ // Offload the original message
+ uuid = UUID.randomUUID().toString();
+ List offloadMsg = new ArrayList<>();
+ offloadMsg.add(msg);
+ offload(uuid, offloadMsg);
+ log.info(
+ "Offloaded large message: index={}, size={} chars, uuid={}",
+ i,
+ textContent.length(),
+ uuid);
+ }
+ if (uuid == null) {
+ continue;
+ }
+
+ // Create replacement message with first autoContextConfig.offloadSinglePreview
+ // characters and offload hint
+ String preview =
+ textContent.length() > autoContextConfig.offloadSinglePreview
+ ? textContent.substring(0, autoContextConfig.offloadSinglePreview)
+ + "..."
+ : textContent;
+
+ String offloadHint =
+ preview + "\n" + String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, uuid);
+
+ // Build metadata with compression information
+ // Note: This method only offloads without LLM compression, so tokens are 0
+ Map compressMeta = new HashMap<>();
+ compressMeta.put("offloaduuid", uuid);
+
+ Map metadata = new HashMap<>();
+ metadata.put("_compress_meta", compressMeta);
+
+ // Create replacement message preserving original role and name
+ Msg replacementMsg =
+ Msg.builder()
+ .role(msg.getRole())
+ .name(msg.getName())
+ .content(TextBlock.builder().text(offloadHint).build())
+ .metadata(metadata)
+ .build();
+
+ // Calculate token counts before and after offload
+ int tokenBefore = TokenCounterUtil.calculateToken(List.of(msg));
+ int tokenAfter = TokenCounterUtil.calculateToken(List.of(replacementMsg));
+
+ // Build metadata for compression event (offload doesn't use LLM, so no compression
+ // tokens)
+ Map eventMetadata = new HashMap<>();
+ eventMetadata.put("inputToken", tokenBefore);
+ eventMetadata.put("outputToken", tokenAfter);
+ eventMetadata.put("time", 0.0);
+
+ // Record compression event (offload doesn't use LLM, so compressedMessage is null)
+ String eventType =
+ lastKeep
+ ? CompressionEvent.LARGE_MESSAGE_OFFLOAD_WITH_PROTECTION
+ : CompressionEvent.LARGE_MESSAGE_OFFLOAD;
+ recordCompressionEvent(eventType, i, i, rawMessages, null, eventMetadata);
+
+ // Replace the original message
+ rawMessages.set(i, replacementMsg);
+ hasOffloaded = true;
+ }
+
+ return hasOffloaded;
+ }
+
+ @Override
+ public void deleteMessage(int index) {
+ if (index >= 0 && index < workingMemoryStorage.size()) {
+ workingMemoryStorage.remove(index);
+ }
+ }
+
+ /**
+ * Extract tool messages from raw messages for compression.
+ *
+ * This method finds consecutive tool invocation messages in historical conversations
+ * that can be compressed. It searches, using a cursor-based {@code searchStartIndex},
+ * for sequences of more than a minimum number of consecutive tool messages that appear
+ * before the latest assistant message that should be preserved.
+ *
+ *
Strategy:
+ * 1. If {@code rawMessages} has less than {@code lastKeep} messages, return {@code null}.
+ * 2. Identify the latest assistant message and treat it and all messages after it as
+ * protected content that will not be compressed.
+ * 3. Starting from {@code searchStartIndex}, search for the oldest range of consecutive
+ * tool messages (more than {@code minConsecutiveToolMessages} consecutive) that lies
+ * entirely before the protected region and can be compressed.
+ * 4. If no eligible assistant message or compressible tool-message sequence is found
+ * in the searchable range, return {@code null}.
+ *
+ * @param rawMessages all raw messages
+ * @param lastKeep number of recent messages to keep uncompressed
+ * @param searchStartIndex the index to start searching from (used as a cursor)
+ * @return Pair containing startIndex and endIndex (inclusive) of compressible tool messages, or {@code null} if none found
+ */
+ private Pair extractPrevToolMsgsForCompress(
+ List rawMessages, int lastKeep, int searchStartIndex) {
+ if (rawMessages == null || rawMessages.isEmpty()) {
+ return null;
+ }
+
+ int totalSize = rawMessages.size();
+
+ // Step 1: If rawMessages has less than lastKeep messages, return null
+ if (totalSize < lastKeep) {
+ return null;
+ }
+
+ // Step 2: Find the latest assistant message that is a final response and protect it and all
+ // messages after it
+ int latestAssistantIndex = -1;
+ for (int i = totalSize - 1; i >= 0; i--) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isFinalAssistantResponse(msg)) {
+ latestAssistantIndex = i;
+ break;
+ }
+ }
+ if (latestAssistantIndex == -1) {
+ return null;
+ }
+ // Determine the search boundary: we can only search messages before the latest assistant
+ int searchEndIndex = Math.min(latestAssistantIndex, (totalSize - lastKeep));
+
+ // Step 3: Find the oldest consecutive tool messages (more than minConsecutiveToolMessages
+ // consecutive)
+ // Search from the beginning (oldest messages first) until we find a sequence
+ int consecutiveCount = 0;
+ int startIndex = -1;
+ int endIndex = -1;
+ int actualStart = Math.max(0, searchStartIndex);
+ for (int i = actualStart; i < searchEndIndex; i++) {
+ Msg msg = rawMessages.get(i);
+ if (MsgUtils.isToolMessage(msg)) {
+ if (consecutiveCount == 0) {
+ startIndex = i;
+ }
+ consecutiveCount++;
+ } else {
+ // If we found enough consecutive tool messages, return their indices
+ if (consecutiveCount > autoContextConfig.minConsecutiveToolMessages) {
+ endIndex = i - 1; // endIndex is inclusive
+ // Adjust indices: ensure startIndex is ToolUse and endIndex is ToolResult
+ int adjustedStart = startIndex;
+ int adjustedEnd = endIndex;
+
+ // Adjust startIndex forward to find ToolUse
+ while (adjustedStart <= adjustedEnd
+ && !MsgUtils.isToolUseMessage(rawMessages.get(adjustedStart))) {
+ if (MsgUtils.isToolResultMessage(rawMessages.get(adjustedStart))) {
+ adjustedStart++;
+ } else {
+ break; // Invalid sequence, continue searching
+ }
+ }
+
+ // Adjust endIndex backward to find ToolResult
+ while (adjustedEnd >= adjustedStart
+ && !MsgUtils.isToolResultMessage(rawMessages.get(adjustedEnd))) {
+ if (MsgUtils.isToolUseMessage(rawMessages.get(adjustedEnd))) {
+ adjustedEnd--;
+ } else {
+ break; // Invalid sequence, continue searching
+ }
+ }
+
+ // Check if we still have enough consecutive tool messages after adjustment
+ if (adjustedStart <= adjustedEnd
+ && adjustedEnd - adjustedStart + 1
+ > autoContextConfig.minConsecutiveToolMessages) {
+ return new Pair<>(adjustedStart, adjustedEnd);
+ }
+ }
+ // Reset counter if sequence is broken
+ consecutiveCount = 0;
+ startIndex = -1;
+ }
+ }
+
+ // Check if there's a sequence at the end of the search range
+ if (consecutiveCount > autoContextConfig.minConsecutiveToolMessages) {
+ endIndex = searchEndIndex - 1; // endIndex is inclusive
+ // Adjust indices: ensure startIndex is ToolUse and endIndex is ToolResult
+ int adjustedStart = startIndex;
+ int adjustedEnd = endIndex;
+
+ // Adjust startIndex forward to find ToolUse
+ while (adjustedStart <= adjustedEnd
+ && !MsgUtils.isToolUseMessage(rawMessages.get(adjustedStart))) {
+ if (MsgUtils.isToolResultMessage(rawMessages.get(adjustedStart))) {
+ adjustedStart++;
+ } else {
+ return null; // Invalid sequence
+ }
+ }
+
+ // Adjust endIndex backward to find ToolResult
+ while (adjustedEnd >= adjustedStart
+ && !MsgUtils.isToolResultMessage(rawMessages.get(adjustedEnd))) {
+ if (MsgUtils.isToolUseMessage(rawMessages.get(adjustedEnd))) {
+ adjustedEnd--;
+ } else {
+ return null; // Invalid sequence
+ }
+ }
+
+ // Check if we still have enough consecutive tool messages after adjustment
+ if (adjustedStart <= adjustedEnd
+ && adjustedEnd - adjustedStart + 1
+ > autoContextConfig.minConsecutiveToolMessages) {
+ return new Pair<>(adjustedStart, adjustedEnd);
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Compresses a list of tool invocation messages using LLM summarization.
+ *
+ * This method uses an LLM model to intelligently compress tool invocation messages,
+ * preserving key information such as tool names, parameters, and important results while
+ * reducing the overall token count. The compression is performed as part of Strategy 1
+ * (compress historical tool invocations) to manage context window limits.
+ *
+ *
Process:
+ *
+ * - Constructs a prompt with the tool invocation messages sandwiched between
+ * compression instructions
+ * - Sends the prompt to the LLM model for summarization
+ * - Formats the compressed result with optional offload hint (if UUID is provided)
+ * - Returns a new ASSISTANT message containing the compressed summary
+ *
+ *
+ * Special Handling:
+ * The method handles plan note related tools specially (see {@link #summaryToolsMessages}),
+ * which are simplified without LLM interaction. This method is only called for non-plan
+ * tool invocations.
+ *
+ *
Offload Integration:
+ * If an {@code offloadUUid} is provided, the compressed message will include a hint
+ * indicating that the original content can be reloaded using the UUID via
+ * {@link ContextOffloadTool}.
+ *
+ * @param messages the list of tool invocation messages to compress (must not be null or empty)
+ * @param offloadUUid the UUID of the offloaded original messages, or null if not offloaded
+ * @return a new ASSISTANT message containing the compressed tool invocation summary
+ * @throws RuntimeException if LLM processing fails or is interrupted
+ */
+ private Msg compressToolsInvocation(List messages, String offloadUUid) {
+
+ // Filter out plan-related tool calls before compression
+ List filteredMessages = MsgUtils.filterPlanRelatedToolCalls(messages);
+ if (filteredMessages.size() < messages.size()) {
+ log.info(
+ "Filtered out {} plan-related tool call messages from tool invocation"
+ + " compression",
+ messages.size() - filteredMessages.size());
+ }
+
+ GenerateOptions options = GenerateOptions.builder().build();
+ ReasoningContext context = new ReasoningContext("tool_compress");
+ List newMessages = new ArrayList<>();
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(
+ PromptProvider.getPreviousRoundToolCompressPrompt(
+ customPrompt))
+ .build())
+ .build());
+ newMessages.addAll(filteredMessages);
+ newMessages.add(
+ Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text(Prompts.COMPRESSION_MESSAGE_LIST_END)
+ .build())
+ .build());
+ // Insert plan-aware hint message at the end to leverage recency effect
+ addPlanAwareHintIfNeeded(newMessages);
+ Msg block =
+ model.stream(newMessages, null, options)
+ .concatMap(chunk -> processChunk(chunk, context))
+ .then(Mono.defer(() -> Mono.just(context.buildFinalMessage())))
+ .onErrorResume(InterruptedException.class, Mono::error)
+ .block();
+
+ // Extract token usage information
+ int inputTokens = 0;
+ int outputTokens = 0;
+ if (block != null && block.getChatUsage() != null) {
+ inputTokens = block.getChatUsage().getInputTokens();
+ outputTokens = block.getChatUsage().getOutputTokens();
+ log.info(
+ "Tool compression completed, input tokens: {}, output tokens: {}",
+ inputTokens,
+ outputTokens);
+ }
+
+ // Build metadata with compression information
+ Map compressMeta = new HashMap<>();
+ if (offloadUUid != null) {
+ compressMeta.put("offloaduuid", offloadUUid);
+ }
+
+ Map metadata = new HashMap<>();
+ metadata.put("_compress_meta", compressMeta);
+
+ // Preserve _chat_usage from the block if available
+ if (block != null && block.getChatUsage() != null) {
+ metadata.put(MessageMetadataKeys.CHAT_USAGE, block.getChatUsage());
+ }
+
+ // Build the final message content:
+ // 1. LLM generated compressed tool invocation content
+ // 2. Context offload tag with UUID at the end
+ String compressedContent = block != null ? block.getTextContent() : "";
+ String offloadTag =
+ offloadUUid != null
+ ? String.format(Prompts.CONTEXT_OFFLOAD_TAG_FORMAT, offloadUUid)
+ : "";
+
+ // Combine: compressed content + newline + UUID tag
+ String finalContent = compressedContent;
+ if (!offloadTag.isEmpty()) {
+ finalContent = finalContent + "\n" + offloadTag;
+ }
+
+ return Msg.builder()
+ .role(MsgRole.ASSISTANT)
+ .name("assistant")
+ .content(TextBlock.builder().text(finalContent).build())
+ .metadata(metadata)
+ .build();
+ }
+
+ private Mono processChunk(ChatResponse chunk, ReasoningContext context) {
+ return Mono.just(chunk).doOnNext(context::processChunk).then(Mono.empty());
+ }
+
+ @Override
+ public void clear() {
+ workingMemoryStorage.clear();
+ originalMemoryStorage.clear();
+ }
+
+ /**
+ * Attaches a PlanNotebook instance to enable plan-aware compression.
+ *
+ * This method should be called after the ReActAgent is created and has a PlanNotebook.
+ * When a PlanNotebook is attached, compression operations will automatically include
+ * plan context information to preserve plan-related information during compression.
+ *
+ *
This method can be called multiple times to update or replace the PlanNotebook.
+ * Passing null will detach the current PlanNotebook and disable plan-aware compression.
+ *
+ * @param planNotebook the PlanNotebook instance to attach, or null to detach
+ */
+ public void attachPlanNote(PlanNotebook planNotebook) {
+ this.planNotebook = planNotebook;
+ if (planNotebook != null) {
+ log.debug("PlanNotebook attached to AutoContextMemory for plan-aware compression");
+ } else {
+ log.debug("PlanNotebook detached from AutoContextMemory");
+ }
+ }
+
+ /**
+ * Gets the current plan state information for compression context.
+ *
+ *
This method generates a generic plan-aware hint message that is fixed to be placed
+ * after the messages that need to be compressed. The content uses "above messages"
+ * terminology to refer to the messages that appear before this hint in the message list.
+ *
+ * @return Plan state information as a formatted string, or null if no plan is active
+ */
+ private String getPlanStateContext() {
+ if (planNotebook == null) {
+ return null;
+ }
+
+ Plan currentPlan = planNotebook.getCurrentPlan();
+ if (currentPlan == null) {
+ return null;
+ }
+
+ // Build simplified plan state information
+ StringBuilder planContext = new StringBuilder();
+
+ // 1. Task overall goal
+ if (currentPlan.getDescription() != null && !currentPlan.getDescription().isEmpty()) {
+ planContext.append("Goal: ").append(currentPlan.getDescription()).append("\n");
+ }
+
+ // 2. Current progress
+ List subtasks = currentPlan.getSubtasks();
+ if (subtasks != null && !subtasks.isEmpty()) {
+ List inProgressTasks =
+ subtasks.stream()
+ .filter(st -> st.getState() == SubTaskState.IN_PROGRESS)
+ .collect(Collectors.toList());
+
+ if (!inProgressTasks.isEmpty()) {
+ planContext.append("Current Progress: ");
+ for (int i = 0; i < inProgressTasks.size(); i++) {
+ if (i > 0) {
+ planContext.append(", ");
+ }
+ planContext.append(inProgressTasks.get(i).getName());
+ }
+ planContext.append("\n");
+ }
+
+ // Count completed tasks for context
+ long doneCount =
+ subtasks.stream().filter(st -> st.getState() == SubTaskState.DONE).count();
+ long totalCount = subtasks.size();
+
+ if (totalCount > 0) {
+ planContext.append(
+ String.format(
+ "Progress: %d/%d subtasks completed\n", doneCount, totalCount));
+ }
+ }
+
+ // 3. Appropriate supplement to task plan context
+ if (currentPlan.getExpectedOutcome() != null
+ && !currentPlan.getExpectedOutcome().isEmpty()) {
+ planContext
+ .append("Expected Outcome: ")
+ .append(currentPlan.getExpectedOutcome())
+ .append("\n");
+ }
+
+ return planContext.toString();
+ }
+
+ /**
+ * Creates a hint message containing plan context information for compression.
+ *
+ * This hint message is placed after the compression scope marker
+ * (COMPRESSION_MESSAGE_LIST_END) at the end of the message list. This placement leverages the
+ * model's attention mechanism (recency effect), ensuring compression guidelines are fresh in the
+ * model's context during generation.
+ *
+ * @return A USER message containing plan context, or null if no plan is active
+ */
+ private Msg createPlanAwareHintMessage() {
+ String planContext = getPlanStateContext();
+ if (planContext == null) {
+ return null;
+ }
+
+ return Msg.builder()
+ .role(MsgRole.USER)
+ .name("user")
+ .content(
+ TextBlock.builder()
+ .text("\n" + planContext + "\n")
+ .build())
+ .build();
+ }
+
+ /**
+ * Adds plan-aware hint message to the message list if a plan is active.
+ *
+ *
This method creates and adds a plan-aware hint message to the provided message list if
+ * there is an active plan. The hint message is added at the end of the list to leverage the
+ * recency effect of the model's attention mechanism.
+ *
+ * @param newMessages the message list to which the hint message should be added
+ */
+ private void addPlanAwareHintIfNeeded(List newMessages) {
+ Msg hintMsg = createPlanAwareHintMessage();
+ if (hintMsg != null) {
+ newMessages.add(hintMsg);
+ }
+ }
+
+ /**
+ * Gets the original memory storage containing complete, uncompressed message history.
+ *
+ * This storage maintains the full conversation history in its original form (append-only).
+ * Unlike {@link #getMessages()} which returns compressed messages from working memory,
+ * this method returns all messages as they were originally added, without any compression
+ * or summarization applied.
+ *
+ *
Use cases:
+ *
+ * - Accessing complete conversation history for analysis or export
+ * - Recovering original messages that have been compressed in working memory
+ * - Auditing or debugging conversation flow
+ *
+ *
+ * @return a list of all original messages in the order they were added
+ */
+ public List getOriginalMemoryMsgs() {
+ return originalMemoryStorage;
+ }
+
+ /**
+ * Gets the user-assistant interaction messages from original memory storage.
+ *
+ * This method filters the original memory storage to return only messages that represent
+ * the actual interaction dialogue between the user and assistant. It includes:
+ *
+ * - All {@link MsgRole#USER} messages
+ * - Only final {@link MsgRole#ASSISTANT} responses that are sent to the user
+ * (excludes intermediate tool invocation messages)
+ *
+ *
+ * This filtered list excludes:
+ *
+ * - Tool-related messages ({@link MsgRole#TOOL})
+ * - System messages ({@link MsgRole#SYSTEM})
+ * - Intermediate ASSISTANT messages that contain tool calls (not final responses)
+ * - Any other message types
+ *
+ *
+ * A final assistant response is determined by {@link MsgUtils#isFinalAssistantResponse(Msg)},
+ * which checks that the message does not contain {@link ToolUseBlock} or
+ * {@link ToolResultBlock}, indicating it is the actual reply sent to the user rather
+ * than an intermediate tool invocation step.
+ *
+ *
Use cases:
+ *
+ * - Extracting clean conversation transcripts for analysis
+ * - Generating conversation summaries without tool call details
+ * - Exporting user-assistant interaction dialogue for documentation
+ * - Training or fine-tuning data preparation
+ *
+ *
+ * The returned list maintains the original order of messages, preserving the
+ * interaction flow between user and assistant.
+ *
+ * @return a list containing only USER messages and final ASSISTANT responses in chronological order
+ */
+ public List getInteractionMsgs() {
+ List conversations = new ArrayList<>();
+ for (Msg msg : originalMemoryStorage) {
+ if (MsgUtils.isRealUserMessage(msg) || MsgUtils.isFinalAssistantResponse(msg)) {
+ conversations.add(msg);
+ }
+ }
+ return conversations;
+ }
+
+ /**
+ * Gets the offload context map containing offloaded message content.
+ *
+ * This map stores messages that have been offloaded during compression operations.
+ * Each entry uses a UUID as the key and contains a list of messages that were offloaded
+ * together. These messages can be reloaded using {@link #reload(String)} with the
+ * corresponding UUID.
+ *
+ *
Offloading occurs when:
+ *
+ * - Large messages exceed the {@code largePayloadThreshold}
+ * - Tool invocations are compressed (Strategy 1)
+ * - Previous round conversations are summarized (Strategy 4)
+ * - Current round messages are compressed (Strategy 5 & 6)
+ *
+ *
+ * The offloaded content can be accessed via {@link ContextOffloadTool} or by
+ * calling {@link #reload(String)} with the UUID found in compressed message hints.
+ *
+ * @return a map where keys are UUID strings and values are lists of offloaded messages
+ */
+ public Map> getOffloadContext() {
+ return offloadContext;
+ }
+
+ /**
+ * Gets the list of compression events that occurred during context management.
+ *
+ * This list records all compression operations that have been performed, including:
+ *
+ * - Event type (which compression strategy was used)
+ * - Timestamp when the compression occurred
+ * - Number of messages compressed
+ * - Token counts before and after compression
+ * - Message positioning information (previous and next message IDs)
+ * - Compressed message ID (for compression types)
+ *
+ *
+ * The events are stored in chronological order and can be used for analysis,
+ * debugging, or monitoring compression effectiveness.
+ *
+ * @return a list of compression events, ordered by timestamp
+ */
+ public List getCompressionEvents() {
+ return compressionEvents;
+ }
+
+ // ==================== StateModule API ====================
+
+ /**
+ * Save memory state to the session.
+ *
+ * Saves working memory and original memory messages to the session storage.
+ *
+ * @param session the session to save state to
+ * @param sessionKey the session identifier
+ */
+ @Override
+ public void saveTo(Session session, SessionKey sessionKey) {
+ session.save(
+ sessionKey,
+ "autoContextMemory_workingMessages",
+ new ArrayList<>(workingMemoryStorage));
+ session.save(
+ sessionKey,
+ "autoContextMemory_originalMessages",
+ new ArrayList<>(originalMemoryStorage));
+
+ // Save offload context (critical for reload functionality)
+ if (!offloadContext.isEmpty()) {
+ session.save(
+ sessionKey,
+ "autoContextMemory_offloadContext",
+ new OffloadContextState(new HashMap<>(offloadContext)));
+ }
+
+ if (!compressionEvents.isEmpty()) {
+ session.save(
+ sessionKey,
+ "autoContextMemory_compressionEvents",
+ new ArrayList<>(compressionEvents));
+ }
+ }
+
+ /**
+ * Load memory state from the session.
+ *
+ *
Loads working memory and original memory messages from the session storage.
+ *
+ * @param session the session to load state from
+ * @param sessionKey the session identifier
+ */
+ @Override
+ public void loadFrom(Session session, SessionKey sessionKey) {
+ List loadedWorking =
+ session.getList(sessionKey, "autoContextMemory_workingMessages", Msg.class);
+ workingMemoryStorage.clear();
+ workingMemoryStorage.addAll(loadedWorking);
+
+ List loadedOriginal =
+ session.getList(sessionKey, "autoContextMemory_originalMessages", Msg.class);
+ originalMemoryStorage.clear();
+ originalMemoryStorage.addAll(loadedOriginal);
+
+ // Load offload context
+ session.get(sessionKey, "autoContextMemory_offloadContext", OffloadContextState.class)
+ .ifPresent(
+ state -> {
+ offloadContext.clear();
+ offloadContext.putAll(state.offloadContext());
+ });
+
+ // Load compression context events
+ List compressEvents =
+ session.getList(
+ sessionKey, "autoContextMemory_compressionEvents", CompressionEvent.class);
+ compressionEvents.clear();
+ compressionEvents.addAll(compressEvents);
+ }
+}
diff --git a/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/MsgUtils.java b/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/MsgUtils.java
index 7b21c655f..5437c1427 100644
--- a/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/MsgUtils.java
+++ b/agentscope-extensions/agentscope-extensions-autocontext-memory/src/main/java/io/agentscope/core/memory/autocontext/MsgUtils.java
@@ -1,702 +1,733 @@
-/*
- * Copyright 2024-2026 the original author or authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package io.agentscope.core.memory.autocontext;
-
-import com.fasterxml.jackson.core.type.TypeReference;
-import io.agentscope.core.message.ContentBlock;
-import io.agentscope.core.message.Msg;
-import io.agentscope.core.message.MsgRole;
-import io.agentscope.core.message.TextBlock;
-import io.agentscope.core.message.ToolResultBlock;
-import io.agentscope.core.message.ToolUseBlock;
-import io.agentscope.core.util.JsonUtils;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-/**
- * Utility class for message serialization and deserialization operations.
- *
- * This class provides methods for converting between {@link Msg} objects and JSON-compatible
- * formats (Map structures) for state persistence. It handles polymorphic types like ContentBlock
- * and its subtypes (TextBlock, ToolUseBlock, ToolResultBlock, etc.) using Jackson ObjectMapper.
- *
- *
Key Features:
- *
- * - Serialization: Converts {@code List} to {@code List
- * - Deserialization: Converts {@code List
- * - Map serialization: Handles {@code Map>} for offload context storage
- * - Message manipulation: Provides utility methods for replacing message ranges
- *
- *
- * Usage:
- * These methods are primarily used by {@link AutoContextMemory} for state persistence through
- * the session API. The serialized format preserves all ContentBlock
- * type information using Jackson's polymorphic type handling.
- */
-public class MsgUtils {
-
- /** Type reference for deserializing lists of JSON strings. */
- private static final TypeReference> MSG_STRING_LIST_TYPE =
- new TypeReference<>() {};
-
- /** Type reference for deserializing maps of string lists. */
- private static final TypeReference