@@ -108,16 +108,23 @@ def add_linked_session(self, session: Any) -> Dataset:
108108
109109 Returns:
110110 self for chaining
111+
112+ Raises:
113+ ValueError: If the session is already part of this dataset
111114 """
112- # Check if already linked
113115 existing = self ._find_session_doc (session .id ())
114116 if existing is not None :
115- return self
117+ raise ValueError (
118+ f"Session with id { session .id ()} is already part of " f"dataset { self .id ()} ."
119+ )
116120
117121 # Create session_in_a_dataset document
118122 doc = self ._create_session_doc (session , is_linked = True )
119123 self ._session .database_add (doc )
120124
125+ # Cache the session object
126+ self ._session_cache [session .id ()] = session
127+
121128 return self
122129
123130 def add_ingested_session (self , session : Any ) -> Dataset :
@@ -132,11 +139,25 @@ def add_ingested_session(self, session: Any) -> Dataset:
132139
133140 Returns:
134141 self for chaining
142+
143+ Raises:
144+ ValueError: If the session is already part of this dataset
145+ ValueError: If the session is not fully ingested
135146 """
136- # Check if already present
137147 existing = self ._find_session_doc (session .id ())
138148 if existing is not None :
139- return self
149+ raise ValueError (
150+ f"Session with id { session .id ()} is already part of " f"dataset { self .id ()} ."
151+ )
152+
153+ # Check if session is fully ingested
154+ if hasattr (session , "is_fully_ingested" ) and not session .is_fully_ingested ():
155+ raise ValueError (
156+ f"Session with id { session .id ()} and reference "
157+ f"{ session .reference } is not yet fully ingested. "
158+ f"It must be fully ingested before it can be added "
159+ f"in ingested form to a dataset."
160+ )
140161
141162 # Copy all documents from source session into the dataset's database.
142163 # We bypass session.database_add() because it enforces session_id ==
@@ -160,25 +181,38 @@ def add_ingested_session(self, session: Any) -> Dataset:
160181 def unlink_session (
161182 self ,
162183 session_id : str ,
163- remove_documents : bool = False ,
184+ are_you_sure : bool = False ,
164185 ) -> Dataset :
165186 """
166- Remove a session from this dataset.
187+ Unlink a linked session from this dataset.
188+
189+ The session must be a linked session (not ingested). Use
190+ delete_ingested_session() for ingested sessions.
167191
168192 Args:
169193 session_id: ID of the session to unlink
170- remove_documents: If True, also remove ingested documents
194+ are_you_sure: Must be True to proceed
171195
172196 Returns:
173197 self for chaining
198+
199+ Raises:
200+ ValueError: If not confirmed, session not found, or session is ingested
174201 """
202+ if not are_you_sure :
203+ raise ValueError ("Must set are_you_sure=True to unlink a session." )
204+
175205 doc = self ._find_session_doc (session_id )
176206 if doc is None :
177- return self
207+ raise ValueError ( f"Session with ID { session_id } not found in " f"dataset { self . id () } ." )
178208
179- # Optionally remove ingested documents
180- if remove_documents :
181- self ._remove_session_documents (session_id )
209+ props = doc .document_properties .get ("session_in_a_dataset" , {})
210+ if not props .get ("is_linked" , False ):
211+ raise ValueError (
212+ f"Session with ID { session_id } is an INGESTED session, "
213+ f"not a linked session. Cannot unlink. Use "
214+ f"delete_ingested_session() instead."
215+ )
182216
183217 # Remove the session_in_a_dataset document
184218 self ._session .database_rm (doc )
@@ -224,38 +258,43 @@ def open_session(self, session_id: str) -> Any | None:
224258
225259 return session
226260
227- def session_list (self ) -> tuple [list [str ], list [dict [str , Any ]]]:
261+ def session_list (
262+ self ,
263+ ) -> tuple [list [str ], list [str ], list [str ], str ]:
228264 """
229265 List all sessions in this dataset.
230266
231267 Returns:
232- A tuple of (session_references, session_list) where :
233- - session_references : List of session reference strings
234- - session_list : List of dicts with keys:
235- - session_id: Session identifier
236- - session_reference: Session reference name
237- - is_linked: True if linked, False if ingested
238- - document_id: ID of the session_in_a_dataset document
268+ A tuple of (ref_list, id_list, session_doc_ids, dataset_session_doc_id) :
269+ - ref_list : List of session reference strings
270+ - id_list : List of session ID strings
271+ - session_doc_ids: List of document IDs for the
272+ session_in_a_dataset documents
273+ - dataset_session_doc_id: Document ID of the dataset's
274+ own session document (empty string if not found)
239275 """
240276 q = Query ("" ).isa ("session_in_a_dataset" )
241277 docs = self ._session .database_search (q )
242278
243- session_references = []
244- session_list = []
279+ ref_list : list [str ] = []
280+ id_list : list [str ] = []
281+ session_doc_ids : list [str ] = []
245282 for doc in docs :
246283 props = doc .document_properties .get ("session_in_a_dataset" , {})
247- ref = props .get ("session_reference" , "" )
248- session_references .append (ref )
249- session_list .append (
250- {
251- "session_id" : props .get ("session_id" , "" ),
252- "session_reference" : ref ,
253- "is_linked" : bool (props .get ("is_linked" , False )),
254- "document_id" : doc .id ,
255- }
256- )
284+ ref_list .append (props .get ("session_reference" , "" ))
285+ id_list .append (props .get ("session_id" , "" ))
286+ session_doc_ids .append (doc .id )
287+
288+ # Find the dataset's own session document
289+ dataset_session_doc_id = ""
290+ q_ds = Query ("" ).isa ("session" ) & (Query ("base.session_id" ) == self .id ())
291+ ds_docs = self ._session .database_search (q_ds )
292+ if len (ds_docs ) == 1 :
293+ dataset_session_doc_id = ds_docs [0 ].id
294+ elif len (ds_docs ) > 1 :
295+ raise ValueError ("More than 1 session document for the dataset session found." )
257296
258- return session_references , session_list
297+ return ref_list , id_list , session_doc_ids , dataset_session_doc_id
259298
260299 # =========================================================================
261300 # Database Operations (delegated to internal session)
@@ -276,14 +315,34 @@ def database_rm(
276315 return self
277316
278317 def database_search (self , query : Query ) -> list [Document ]:
279- """Search the dataset database.
318+ """Search the dataset database and all linked sessions .
280319
281320 Unlike Session.database_search(), this does NOT filter by session_id
282321 because a dataset stores documents from multiple ingested sessions.
322+ Results from linked sessions are also included.
283323 """
284324 if self ._session ._database is None :
285- return []
286- return self ._session ._database .search (query )
325+ results : list [Document ] = []
326+ else :
327+ results = list (self ._session ._database .search (query ))
328+
329+ # Also search linked sessions
330+ self ._open_linked_sessions ()
331+ q = Query ("" ).isa ("session_in_a_dataset" )
332+ info_docs = self ._session .database_search (q )
333+ for info_doc in info_docs :
334+ props = info_doc .document_properties .get ("session_in_a_dataset" , {})
335+ if props .get ("is_linked" , False ):
336+ sid = props .get ("session_id" , "" )
337+ session = self ._session_cache .get (sid )
338+ if session is not None :
339+ try :
340+ linked_results = session .database_search (query )
341+ results .extend (linked_results )
342+ except Exception :
343+ pass
344+
345+ return results
287346
288347 def database_openbinarydoc (
289348 self ,
@@ -324,11 +383,14 @@ def delete_ingested_session(
324383
325384 doc = self ._find_session_doc (session_id )
326385 if doc is None :
327- return self
386+ raise ValueError ( f"Session { session_id } not found in dataset." )
328387
329388 props = doc .document_properties .get ("session_in_a_dataset" , {})
330389 if props .get ("is_linked" , False ):
331- raise ValueError ("Cannot delete a linked session. Use unlink_session() instead." )
390+ raise ValueError (
391+ f"Session { session_id } is a linked session, not an "
392+ f"ingested one. Use unlink_session() instead."
393+ )
332394
333395 # Remove all documents from this session
334396 self ._remove_session_documents (session_id )
@@ -432,6 +494,17 @@ def _remove_session_documents(self, session_id: str) -> None:
432494 except Exception as exc :
433495 logger .warning ("Failed to remove document %s: %s" , doc .id , exc )
434496
497+ def _open_linked_sessions (self ) -> None :
498+ """Ensure all linked sessions are open and cached."""
499+ q = Query ("" ).isa ("session_in_a_dataset" )
500+ docs = self ._session .database_search (q )
501+ for doc in docs :
502+ props = doc .document_properties .get ("session_in_a_dataset" , {})
503+ if props .get ("is_linked" , False ):
504+ sid = props .get ("session_id" , "" )
505+ if sid and sid not in self ._session_cache :
506+ self .open_session (sid )
507+
435508 def _recreate_linked_session (self , props : dict [str , Any ]) -> Any | None :
436509 """Recreate a linked session from stored creator args."""
437510 creator = props .get ("session_creator" , "" )
@@ -465,5 +538,5 @@ def _recreate_linked_session(self, props: dict[str, Any]) -> Any | None:
465538
466539 def __repr__ (self ) -> str :
467540 """String representation."""
468- sessions = self .session_list ()
469- return f"Dataset('{ self ._reference } ', sessions={ len (sessions )} )"
541+ refs , _ids , _doc_ids , _ds_doc_id = self .session_list ()
542+ return f"Dataset('{ self ._reference } ', sessions={ len (refs )} )"
0 commit comments