-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkb_manager.py
More file actions
235 lines (179 loc) · 6.9 KB
/
kb_manager.py
File metadata and controls
235 lines (179 loc) · 6.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
"""
Knowledge Base Manager
Utilities for managing and updating the knowledge base
"""
import os
import logging
from typing import List, Optional
import pickle
logger = logging.getLogger(__name__)
class KnowledgeBaseManager:
"""Manage the knowledge base file and chunks."""
def __init__(self, kb_path: str = "info.txt", chunks_path: str = "doc_chunks.pkl"):
"""
Initialize knowledge base manager.
Args:
kb_path: Path to knowledge base text file
chunks_path: Path to chunks pickle file
"""
self.kb_path = kb_path
self.chunks_path = chunks_path
def load_knowledge_base(self) -> str:
"""
Load the knowledge base text.
Returns:
Knowledge base content as string
"""
try:
if not os.path.exists(self.kb_path):
logger.warning(f"Knowledge base not found at {self.kb_path}")
return ""
with open(self.kb_path, 'r', encoding='utf-8') as f:
content = f.read()
logger.info(f"✅ Loaded {len(content)} characters from knowledge base")
return content
except Exception as e:
logger.error(f"❌ Error loading knowledge base: {e}")
return ""
def save_knowledge_base(self, content: str) -> bool:
"""
Save content to knowledge base file.
Args:
content: Text content to save
Returns:
True if successful, False otherwise
"""
try:
with open(self.kb_path, 'w', encoding='utf-8') as f:
f.write(content)
logger.info(f"✅ Saved {len(content)} characters to knowledge base")
return True
except Exception as e:
logger.error(f"❌ Error saving knowledge base: {e}")
return False
def append_to_knowledge_base(self, content: str) -> bool:
"""
Append content to existing knowledge base.
Args:
content: Text to append
Returns:
True if successful, False otherwise
"""
try:
existing = self.load_knowledge_base()
updated = existing + "\n\n" + content if existing else content
return self.save_knowledge_base(updated)
except Exception as e:
logger.error(f"❌ Error appending to knowledge base: {e}")
return False
def get_knowledge_base_stats(self) -> dict:
"""
Get statistics about the knowledge base.
Returns:
Dictionary with stats
"""
content = self.load_knowledge_base()
stats = {
"total_characters": len(content),
"total_words": len(content.split()),
"total_lines": len(content.split('\n')),
"exists": os.path.exists(self.kb_path),
"file_size_kb": os.path.getsize(self.kb_path) / 1024 if os.path.exists(self.kb_path) else 0
}
return stats
def load_chunks(self) -> Optional[List[str]]:
"""
Load pre-chunked documents.
Returns:
List of chunks or None if not found
"""
try:
if not os.path.exists(self.chunks_path):
logger.warning(f"Chunks file not found at {self.chunks_path}")
return None
with open(self.chunks_path, 'rb') as f:
chunks = pickle.load(f)
logger.info(f"✅ Loaded {len(chunks)} chunks")
return chunks
except Exception as e:
logger.error(f"❌ Error loading chunks: {e}")
return None
def save_chunks(self, chunks: List[str]) -> bool:
"""
Save chunks to file.
Args:
chunks: List of text chunks
Returns:
True if successful, False otherwise
"""
try:
with open(self.chunks_path, 'wb') as f:
pickle.dump(chunks, f)
logger.info(f"✅ Saved {len(chunks)} chunks")
return True
except Exception as e:
logger.error(f"❌ Error saving chunks: {e}")
return False
def display_stats(self):
"""Display knowledge base statistics."""
stats = self.get_knowledge_base_stats()
print("\n" + "="*50)
print("📊 Knowledge Base Statistics")
print("="*50)
print(f"Exists: {stats['exists']}")
print(f"Size: {stats['file_size_kb']:.2f} KB")
print(f"Characters: {stats['total_characters']:,}")
print(f"Words: {stats['total_words']:,}")
print(f"Lines: {stats['total_lines']:,}")
print("="*50 + "\n")
class ContentOrganizer:
"""Organize knowledge base content into categories."""
def __init__(self, kb_manager: KnowledgeBaseManager):
self.kb_manager = kb_manager
def organize_by_sections(self, separator: str = "---") -> dict:
"""
Organize content into sections.
Args:
separator: Section separator string
Returns:
Dictionary of sections
"""
content = self.kb_manager.load_knowledge_base()
if not content:
return {}
sections = {}
current_section = "default"
current_content = []
for line in content.split('\n'):
if line.strip().startswith(separator):
if current_content:
sections[current_section] = '\n'.join(current_content).strip()
current_section = line.replace(separator, "").strip() or f"section_{len(sections)}"
current_content = []
else:
current_content.append(line)
if current_content:
sections[current_section] = '\n'.join(current_content).strip()
return sections
def display_sections(self):
"""Display organized sections."""
sections = self.organize_by_sections()
print("\n" + "="*50)
print("📚 Knowledge Base Sections")
print("="*50)
for name, content in sections.items():
words = len(content.split())
print(f"- {name}: {words} words")
print("="*50 + "\n")
def demonstrate_knowledge_base_management():
"""Demonstrate knowledge base management."""
# Create manager
manager = KnowledgeBaseManager()
# Display stats
manager.display_stats()
# Organize sections
organizer = ContentOrganizer(manager)
organizer.display_sections()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
demonstrate_knowledge_base_management()