Skip to content

Commit 3abcedc

Browse files
committed
Mirror MATLAB ndi.dataset/ndi.dataset.dir architecture in Python
Major restructuring to match the MATLAB codebase: **ndi.dataset (base class):** - reference/id() delegate to session (matching MATLAB) - build_session_info() discovers sessions from session_in_a_dataset docs filtered by base.session_id == dataset.id() (matching MATLAB) - Static methods: repairDatasetSessionInfo, addSessionInfoToDataset, removeSessionInfoFromDataset (matching MATLAB) - database_search() searches session._database directly + linked sessions - database_add() routes docs by session_id (matching MATLAB) **ndi.dataset.dir (DatasetDir subclass):** - Constructor mirrors MATLAB ndi.dataset.dir with 1/2/3-arg forms - Hidden documents parameter for downloadDataset (MATLAB 3rd arg) - Session discovery: dataset_session_info → session_in_a_dataset → session docs, then re-creates session with correct ref/id - _ensure_session_tracking() creates session_in_a_dataset docs for untracked sessions found in the database - Uses dataset path directly for session (.ndi/ at root, not .ndi_dataset/) **ndi.query module-level functions:** - Added all(), none(), from_search() at module level so ndi.query.all() works like MATLAB (where ndi.query is a class, not a module) **downloadDataset:** - Now passes documents to DatasetDir constructor (matching MATLAB pattern) - Reference discovered from session documents, not cloud API name https://claude.ai/code/session_01ByofAviQrhGPLQ9BLquGjV
1 parent d49054e commit 3abcedc

4 files changed

Lines changed: 676 additions & 290 deletions

File tree

src/ndi/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from .calculator import Calculator
4444
from .common import PathConstants, get_logger, getLogger, timestamp
4545
from .database import Database, open_database
46-
from .dataset import Dataset
46+
from .dataset import Dataset, DatasetDir
4747
from .document import Document
4848
from .documentservice import DocumentService
4949
from .element import Element
@@ -117,6 +117,7 @@ def version() -> tuple:
117117
"ElementTimeseries",
118118
"Neuron",
119119
"Dataset",
120+
"DatasetDir",
120121
"App",
121122
"AppDoc",
122123
"DocExistsAction",

src/ndi/cloud/orchestration.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,24 @@ def downloadDataset(
7878
for dj in doc_jsons:
7979
updateFileInfoForRemoteFiles(dj, cloud_dataset_id)
8080

81-
# Convert to Document objects and add to a local Dataset
82-
from ndi.dataset import Dataset
81+
# Convert to Document objects and create Dataset with them.
82+
# Mirrors MATLAB: ndi.dataset.dir([], datasetFolder, ndiDocuments)
83+
from ndi.dataset import DatasetDir
8384

84-
dataset_reference = ds_info.get("name", cloud_dataset_id)
85-
dataset = Dataset(target, reference=dataset_reference)
8685
documents = jsons2documents(doc_jsons)
87-
for doc in documents:
86+
dataset = DatasetDir("", target, documents=documents)
87+
88+
# Create remote link document if not already present
89+
from ndi.query import Query
90+
91+
existing = dataset.database_search(Query("").isa("dataset_remote"))
92+
if not existing:
93+
remote_doc = createRemoteDatasetDoc(cloud_dataset_id, dataset)
8894
try:
89-
dataset._session._database.add(doc)
95+
dataset._session._database.add(remote_doc)
9096
except Exception:
9197
pass
9298

93-
# Create remote link document
94-
remote_doc = createRemoteDatasetDoc(cloud_dataset_id, dataset)
95-
try:
96-
dataset._session._database.add(remote_doc)
97-
except Exception:
98-
pass
99-
10099
# Store cloud client for on-demand file fetching
101100
dataset.cloud_client = client
102101

@@ -175,20 +174,21 @@ def load_dataset_from_json_dir(
175174
updateFileInfoForRemoteFiles(dj, cloud_dataset_id)
176175

177176
# Create Dataset
178-
from ndi.dataset import Dataset
177+
from ndi.dataset import DatasetDir
179178

180179
if target_folder is None:
181180
target = json_path.parent / f"{json_path.name}_dataset"
182181
else:
183182
target = Path(target_folder)
184183
target.mkdir(parents=True, exist_ok=True)
185184

186-
dataset = Dataset(target)
185+
# Convert JSON dicts to Document objects and create dataset with them
186+
from .download import jsons2documents as _j2d
187187

188-
# Use bulk_add for fast insertion (single transaction, no per-doc
189-
# duplicate checks). This bypasses session_id enforcement since
190-
# documents come from multiple remote sessions.
191-
added, skipped = dataset._session._database._driver.bulk_add(doc_jsons)
188+
all_documents = _j2d(doc_jsons)
189+
dataset = DatasetDir("", target, documents=all_documents)
190+
added = len(all_documents)
191+
skipped = 0
192192

193193
# Wire cloud client for on-demand file fetching
194194
if client is not None:

src/ndi/dataset/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,19 @@
1010
ndi.dataset.dir -> ndi.dataset.dir (constructor for directory-based datasets)
1111
"""
1212

13-
from ._dataset import Dataset
13+
from ._dataset import Dataset as _DatasetBase # noqa: F401
14+
from ._dataset import DatasetDir
15+
16+
# For backward compatibility, ``ndi.dataset.Dataset`` is ``DatasetDir``.
17+
# The base class is available as ``ndi.dataset._DatasetBase`` if needed.
18+
Dataset = DatasetDir
1419

1520
# MATLAB compatibility: ``ndi.dataset.dir(path)`` creates a directory-based
1621
# dataset, mirroring the MATLAB constructor ``ndi.dataset.dir``.
17-
dir = Dataset
22+
dir = DatasetDir
1823

1924
__all__ = [
2025
"Dataset",
26+
"DatasetDir",
2127
"dir",
2228
]

0 commit comments

Comments
 (0)