-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathrlm_memory.py
More file actions
66 lines (56 loc) · 2.44 KB
/
rlm_memory.py
File metadata and controls
66 lines (56 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
soul.py v2.0 — RLM Memory
Recursive synthesis for exhaustive queries.
Works with AnthropicREST client (messages_create method).
"""
import re, time
from pathlib import Path
class RLMMemory:
def __init__(self, memory_path="MEMORY.md", chunk_size=10,
sub_model="claude-haiku-4-5", synth_model="claude-haiku-4-5"):
self.memory_path = Path(memory_path)
self.chunk_size = chunk_size
self.sub_model = sub_model
self.synth_model = synth_model
def _parse_entries(self):
text = self.memory_path.read_text()
return [b.strip() for b in re.split(r'\n## ', text)[1:] if b.strip()]
def retrieve(self, query: str, client) -> dict:
t0 = time.time()
entries = self._parse_entries()
if not entries:
return {"answer": "No memories found yet.",
"chunks_processed": 0, "relevant_chunks": 0,
"latency_ms": 0, "sub_summaries": []}
chunks = [entries[i:i+self.chunk_size]
for i in range(0, len(entries), self.chunk_size)]
sub_summaries = []
for chunk in chunks:
chunk_text = "\n\n---\n".join(chunk)
summary = client.messages_create(
model=self.sub_model, max_tokens=400,
messages=[{"role":"user","content":
f"From these memory entries, extract ONLY what's relevant to:\n'{query}'\n\n"
f"Entries:\n{chunk_text}\n\nBe concise. If nothing relevant, reply: SKIP"
}],
)
if summary.upper() != "SKIP":
sub_summaries.append(summary)
if not sub_summaries:
return {"answer": f"No memories relevant to: '{query}'",
"chunks_processed": len(chunks), "relevant_chunks": 0,
"latency_ms": int((time.time()-t0)*1000), "sub_summaries": []}
combined = "\n\n===\n".join(sub_summaries)
answer = client.messages_create(
model=self.synth_model, max_tokens=600,
messages=[{"role":"user","content":
f"Synthesize into a complete answer to: '{query}'\n\nFindings:\n{combined}\n\nBe direct."
}],
)
return {
"answer": answer,
"chunks_processed": len(chunks),
"relevant_chunks": len(sub_summaries),
"latency_ms": int((time.time()-t0)*1000),
"sub_summaries": sub_summaries,
}