Skip to content

Commit df17589

Browse files
Merge pull request #20 from Waltham-Data-Science/claude/fix-python-3.12-coverage-VDIft
Refactor Dataset.session_list() to match MATLAB API and improve error handling
2 parents ded2cb1 + 16edf45 commit df17589

6 files changed

Lines changed: 224 additions & 197 deletions

File tree

src/ndi/database_fun.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def copy_session_to_dataset(
390390

391391
# Check for already-copied sessions
392392
try:
393-
refs, session_ids = ndi_dataset_obj.session_list()
393+
refs, session_ids, *_ = ndi_dataset_obj.session_list()
394394
session_id = ndi_session_obj.id()
395395
if session_id in session_ids:
396396
return (

src/ndi/dataset/_dataset.py

Lines changed: 112 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -108,16 +108,23 @@ def add_linked_session(self, session: Any) -> Dataset:
108108
109109
Returns:
110110
self for chaining
111+
112+
Raises:
113+
ValueError: If the session is already part of this dataset
111114
"""
112-
# Check if already linked
113115
existing = self._find_session_doc(session.id())
114116
if existing is not None:
115-
return self
117+
raise ValueError(
118+
f"Session with id {session.id()} is already part of " f"dataset {self.id()}."
119+
)
116120

117121
# Create session_in_a_dataset document
118122
doc = self._create_session_doc(session, is_linked=True)
119123
self._session.database_add(doc)
120124

125+
# Cache the session object
126+
self._session_cache[session.id()] = session
127+
121128
return self
122129

123130
def add_ingested_session(self, session: Any) -> Dataset:
@@ -132,11 +139,25 @@ def add_ingested_session(self, session: Any) -> Dataset:
132139
133140
Returns:
134141
self for chaining
142+
143+
Raises:
144+
ValueError: If the session is already part of this dataset
145+
ValueError: If the session is not fully ingested
135146
"""
136-
# Check if already present
137147
existing = self._find_session_doc(session.id())
138148
if existing is not None:
139-
return self
149+
raise ValueError(
150+
f"Session with id {session.id()} is already part of " f"dataset {self.id()}."
151+
)
152+
153+
# Check if session is fully ingested
154+
if hasattr(session, "is_fully_ingested") and not session.is_fully_ingested():
155+
raise ValueError(
156+
f"Session with id {session.id()} and reference "
157+
f"{session.reference} is not yet fully ingested. "
158+
f"It must be fully ingested before it can be added "
159+
f"in ingested form to a dataset."
160+
)
140161

141162
# Copy all documents from source session into the dataset's database.
142163
# We bypass session.database_add() because it enforces session_id ==
@@ -160,25 +181,38 @@ def add_ingested_session(self, session: Any) -> Dataset:
160181
def unlink_session(
161182
self,
162183
session_id: str,
163-
remove_documents: bool = False,
184+
are_you_sure: bool = False,
164185
) -> Dataset:
165186
"""
166-
Remove a session from this dataset.
187+
Unlink a linked session from this dataset.
188+
189+
The session must be a linked session (not ingested). Use
190+
delete_ingested_session() for ingested sessions.
167191
168192
Args:
169193
session_id: ID of the session to unlink
170-
remove_documents: If True, also remove ingested documents
194+
are_you_sure: Must be True to proceed
171195
172196
Returns:
173197
self for chaining
198+
199+
Raises:
200+
ValueError: If not confirmed, session not found, or session is ingested
174201
"""
202+
if not are_you_sure:
203+
raise ValueError("Must set are_you_sure=True to unlink a session.")
204+
175205
doc = self._find_session_doc(session_id)
176206
if doc is None:
177-
return self
207+
raise ValueError(f"Session with ID {session_id} not found in " f"dataset {self.id()}.")
178208

179-
# Optionally remove ingested documents
180-
if remove_documents:
181-
self._remove_session_documents(session_id)
209+
props = doc.document_properties.get("session_in_a_dataset", {})
210+
if not props.get("is_linked", False):
211+
raise ValueError(
212+
f"Session with ID {session_id} is an INGESTED session, "
213+
f"not a linked session. Cannot unlink. Use "
214+
f"delete_ingested_session() instead."
215+
)
182216

183217
# Remove the session_in_a_dataset document
184218
self._session.database_rm(doc)
@@ -224,38 +258,43 @@ def open_session(self, session_id: str) -> Any | None:
224258

225259
return session
226260

227-
def session_list(self) -> tuple[list[str], list[dict[str, Any]]]:
261+
def session_list(
262+
self,
263+
) -> tuple[list[str], list[str], list[str], str]:
228264
"""
229265
List all sessions in this dataset.
230266
231267
Returns:
232-
A tuple of (session_references, session_list) where:
233-
- session_references: List of session reference strings
234-
- session_list: List of dicts with keys:
235-
- session_id: Session identifier
236-
- session_reference: Session reference name
237-
- is_linked: True if linked, False if ingested
238-
- document_id: ID of the session_in_a_dataset document
268+
A tuple of (ref_list, id_list, session_doc_ids, dataset_session_doc_id):
269+
- ref_list: List of session reference strings
270+
- id_list: List of session ID strings
271+
- session_doc_ids: List of document IDs for the
272+
session_in_a_dataset documents
273+
- dataset_session_doc_id: Document ID of the dataset's
274+
own session document (empty string if not found)
239275
"""
240276
q = Query("").isa("session_in_a_dataset")
241277
docs = self._session.database_search(q)
242278

243-
session_references = []
244-
session_list = []
279+
ref_list: list[str] = []
280+
id_list: list[str] = []
281+
session_doc_ids: list[str] = []
245282
for doc in docs:
246283
props = doc.document_properties.get("session_in_a_dataset", {})
247-
ref = props.get("session_reference", "")
248-
session_references.append(ref)
249-
session_list.append(
250-
{
251-
"session_id": props.get("session_id", ""),
252-
"session_reference": ref,
253-
"is_linked": bool(props.get("is_linked", False)),
254-
"document_id": doc.id,
255-
}
256-
)
284+
ref_list.append(props.get("session_reference", ""))
285+
id_list.append(props.get("session_id", ""))
286+
session_doc_ids.append(doc.id)
287+
288+
# Find the dataset's own session document
289+
dataset_session_doc_id = ""
290+
q_ds = Query("").isa("session") & (Query("base.session_id") == self.id())
291+
ds_docs = self._session.database_search(q_ds)
292+
if len(ds_docs) == 1:
293+
dataset_session_doc_id = ds_docs[0].id
294+
elif len(ds_docs) > 1:
295+
raise ValueError("More than 1 session document for the dataset session found.")
257296

258-
return session_references, session_list
297+
return ref_list, id_list, session_doc_ids, dataset_session_doc_id
259298

260299
# =========================================================================
261300
# Database Operations (delegated to internal session)
@@ -276,14 +315,34 @@ def database_rm(
276315
return self
277316

278317
def database_search(self, query: Query) -> list[Document]:
279-
"""Search the dataset database.
318+
"""Search the dataset database and all linked sessions.
280319
281320
Unlike Session.database_search(), this does NOT filter by session_id
282321
because a dataset stores documents from multiple ingested sessions.
322+
Results from linked sessions are also included.
283323
"""
284324
if self._session._database is None:
285-
return []
286-
return self._session._database.search(query)
325+
results: list[Document] = []
326+
else:
327+
results = list(self._session._database.search(query))
328+
329+
# Also search linked sessions
330+
self._open_linked_sessions()
331+
q = Query("").isa("session_in_a_dataset")
332+
info_docs = self._session.database_search(q)
333+
for info_doc in info_docs:
334+
props = info_doc.document_properties.get("session_in_a_dataset", {})
335+
if props.get("is_linked", False):
336+
sid = props.get("session_id", "")
337+
session = self._session_cache.get(sid)
338+
if session is not None:
339+
try:
340+
linked_results = session.database_search(query)
341+
results.extend(linked_results)
342+
except Exception:
343+
pass
344+
345+
return results
287346

288347
def database_openbinarydoc(
289348
self,
@@ -324,11 +383,14 @@ def delete_ingested_session(
324383

325384
doc = self._find_session_doc(session_id)
326385
if doc is None:
327-
return self
386+
raise ValueError(f"Session {session_id} not found in dataset.")
328387

329388
props = doc.document_properties.get("session_in_a_dataset", {})
330389
if props.get("is_linked", False):
331-
raise ValueError("Cannot delete a linked session. Use unlink_session() instead.")
390+
raise ValueError(
391+
f"Session {session_id} is a linked session, not an "
392+
f"ingested one. Use unlink_session() instead."
393+
)
332394

333395
# Remove all documents from this session
334396
self._remove_session_documents(session_id)
@@ -432,6 +494,17 @@ def _remove_session_documents(self, session_id: str) -> None:
432494
except Exception as exc:
433495
logger.warning("Failed to remove document %s: %s", doc.id, exc)
434496

497+
def _open_linked_sessions(self) -> None:
498+
"""Ensure all linked sessions are open and cached."""
499+
q = Query("").isa("session_in_a_dataset")
500+
docs = self._session.database_search(q)
501+
for doc in docs:
502+
props = doc.document_properties.get("session_in_a_dataset", {})
503+
if props.get("is_linked", False):
504+
sid = props.get("session_id", "")
505+
if sid and sid not in self._session_cache:
506+
self.open_session(sid)
507+
435508
def _recreate_linked_session(self, props: dict[str, Any]) -> Any | None:
436509
"""Recreate a linked session from stored creator args."""
437510
creator = props.get("session_creator", "")
@@ -465,5 +538,5 @@ def _recreate_linked_session(self, props: dict[str, Any]) -> Any | None:
465538

466539
def __repr__(self) -> str:
467540
"""String representation."""
468-
sessions = self.session_list()
469-
return f"Dataset('{self._reference}', sessions={len(sessions)})"
541+
refs, _ids, _doc_ids, _ds_doc_id = self.session_list()
542+
return f"Dataset('{self._reference}', sessions={len(refs)})"

0 commit comments

Comments
 (0)