diff --git a/README.md b/README.md index b1d63ff..8f65467 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# LanceDB Mintlify Documentation +# LanceDB Documentation Home of the [LanceDB](https://lancedb.com/) documentation. Built using [Mintlify](https://www.mintlify.com/). diff --git a/docs/api-reference/index.mdx b/docs/api-reference/index.mdx index e9cd6ff..38359d8 100644 --- a/docs/api-reference/index.mdx +++ b/docs/api-reference/index.mdx @@ -11,7 +11,7 @@ refer to the API documentation linked below. If you're looking for a REST API reference, visit the [REST API](/api-reference/rest) page. If you're looking for conceptual and practical namespace guidance before diving into method signatures, see -[Namespaces and Catalog Model](/namespaces) and [Using Namespaces in SDKs](/tables/namespaces). +[Namespaces and Catalog Model](/namespaces) and [Using Namespaces in SDKs](/namespaces/usage). ## Supported SDKs @@ -46,4 +46,3 @@ for users working in languages other than those listed above. | [Swift](https://github.com/RyanLisse/LanceDbSwiftKit) | Community-contributed Swift SDK for LanceDB | | [R](https://github.com/CathalByrneGit/lancedb) | Community-contributed R package for LanceDB | | [Flutter](https://github.com/Alexcn/flutter_lancedb) | Community-contributed Flutter bindings for LanceDB | - diff --git a/docs/cloud/get-started.mdx b/docs/cloud/get-started.mdx index e8294a1..89cd326 100644 --- a/docs/cloud/get-started.mdx +++ b/docs/cloud/get-started.mdx @@ -39,7 +39,7 @@ import numpy as np import pyarrow as pa import os -# Connect to LanceDB Cloud/Enterprise +# Connect to LanceDB Enterprise uri = "db://your-database-uri" api_key = "your-api-key" region = "us-east-1" @@ -59,7 +59,7 @@ db = lancedb.connect( import { connect, Index, Table } from '@lancedb/lancedb'; import { FixedSizeList, Field, Float32, Schema, Utf8 } from 'apache-arrow'; -// Connect to LanceDB Cloud/Enterprise +// Connect to LanceDB Enterprise const dbUri = process.env.LANCEDB_URI || 'db://your-database-uri'; const apiKey = process.env.LANCEDB_API_KEY; const region = process.env.LANCEDB_REGION; @@ -273,7 +273,7 @@ console.log('Successfully created table'); After creating a table with vector data, you'll want to create an index to enable fast similarity searches. The index creation process optimizes the data structure for efficient vector similarity lookups, significantly improving query performance for large datasets. -Unlike in LanceDB OSS, the `create_index`/`createIndex` operation executes **asynchronously** in LanceDB Cloud/Enterprise. To ensure the index is fully built, you can use the `wait_timeout` parameter or call `wait_for_index` on the table. +Unlike in LanceDB OSS, the `create_index`/`createIndex` operation executes **asynchronously** in LanceDB Enterprise. To ensure the index is fully built, you can use the `wait_timeout` parameter or call `wait_for_index` on the table. @@ -373,6 +373,6 @@ console.log(filteredResults); ## What's Next? -It's time to use LanceDB Cloud/Enterprise in your own projects! +It's time to use LanceDB Enterprise in your own projects! We've prepared more [tutorials](/tutorials/) for you to continue learning. If you have any questions, reach out via [Discord](https://discord.gg/AUEWnJ7Txb). diff --git a/docs/docs.json b/docs/docs.json index 919fac4..f4e0cce 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -40,17 +40,17 @@ "pages": [ "index", "lance", - "namespaces" + "tables-and-namespaces" ] }, { "group": "LanceDB Enterprise", "pages": [ "enterprise/index", - "enterprise/features", + "enterprise/quickstart", "enterprise/architecture", - "enterprise/benchmarks", "enterprise/security", + "enterprise/benchmarks", { "group": "Deployment", "pages": [ @@ -70,21 +70,34 @@ ] }, { - "group": "User Guide", + "group": "Guides", "pages": [ { - "group": "Working with tables", + "group": "Table operations", "pages": [ "tables/index", "tables/create", "tables/multimodal", "tables/schema", "tables/update", - "tables/namespaces", "tables/versioning", "tables/consistency" ] }, + { + "group": "Namespaces", + "pages": [ + "namespaces/index", + "namespaces/usage" + ] + }, + { + "group": "Embeddings", + "pages": [ + "embedding/index", + "embedding/quickstart" + ] + }, { "group": "Indexing", "pages": [ @@ -124,13 +137,6 @@ "reranking/eval" ] }, - { - "group": "Embeddings", - "pages": [ - "embedding/index", - "embedding/quickstart" - ] - }, { "group": "Storage", "pages": [ diff --git a/docs/embedding/index.mdx b/docs/embedding/index.mdx index ac89562..5fe0e0d 100644 --- a/docs/embedding/index.mdx +++ b/docs/embedding/index.mdx @@ -50,8 +50,6 @@ while Rust examples typically compute query embeddings explicitly before vector ### Using an embedding function -Python SDK - In the Python SDK, the `.create()` method accepts several arguments to configure embedding function behavior. `max_retries` is a special argument that applies to all providers. | Argument | Type | Description | diff --git a/docs/enterprise/features.mdx b/docs/enterprise/features.mdx deleted file mode 100644 index e7bda93..0000000 --- a/docs/enterprise/features.mdx +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: "LanceDB Enterprise vs OSS" -sidebarTitle: "Differentiating features" -description: "Key benefits and differentiating features of LanceDB Enterprise over LanceDB OSS." -icon: "grip-vertical" ---- - -Modern AI workloads produce petabytes of multimodal data that must be queried in real time. On top of that, enterprise AI systems must stay completely private and air-gapped. - -LanceDB offers two self-hosted options to meet such requirements: LanceDB OSS, a single-process library, and LanceDB Enterprise, a distributed cluster with automated scaling and low-latency caching. - -This document compares their architectures and operational models so you can select the deployment that meets your performance targets and resource constraints. - -## Differentiating features - -LanceDB Enterprise is a distributed cluster that spans many machines (unlike LanceDB OSS, which is an embedded database that runs inside your process). Both are built on top of the same Lance columnar file format, so moving data from one edition to the other requires no conversion. - -| Dimension | LanceDB OSS | LanceDB Enterprise | What the difference means | -|:----------|:------------|:-------------------|:-------------------------| -| **Mode** | Single process | Distributed fleet | OSS lives on one host. Enterprise spreads work across nodes and keeps serving even if one node fails. | -| **Latency from object storage** | 500–1000 ms | 50–200 ms | Enterprise mitigates network delay with an SSD cache and parallel reads. | -| **Throughput** | 10–50 QPS | Up to 10,000 QPS | A cluster can serve thousands of concurrent users; a single process cannot. | -| **Cache** | None | Distributed NVMe cache | Enterprise keeps hot data near compute and avoids repeated S3 calls. | -| **Indexing & compaction** | Manual | Automatic | Enterprise runs background jobs that rebuild and compact data without downtime. | -| **Data format** | Supports multiple available standards | Supports multiple available standards | No vendor lock-in; data moves freely between editions. | -| **Deployment** | Embedded in your code | Self-managed or Managed Service | Enterprise meets uptime, compliance, and support goals that OSS cannot. | - -### Architecture and scale - -LanceDB OSS is directly embedded into your service. The process owns all CPU, memory, and storage, so scale is limited to what the host can provide. -LanceDB Enterprise separates work into routers, execution nodes, and background workers. New nodes join the cluster through a discovery service; they register, replicate metadata, and begin answering traffic without a restart. A distributed control plane watches node health, shifts load away from unhealthy nodes, and enforces consensus rules that prevent split-brain events. - -Read More: [LanceDB Enterprise Architecture](/enterprise/architecture/) - -### Latency of data retrieval - -With Lance OSS every query fetches data from S3, GCS, or Azure Blob. Each round trip to an object store adds several hundred milliseconds, especially when data is cold. - -LanceDB Enterprise uses NVMe SSDs as a hybrid cache, before the data store is even accessed. The first read fills the cache, and subsequent reads come from the local disk and return in tens of milliseconds. Parallel chunked reads further reduce tail latency. This gap matters when the application serves interactive dashboards or real-time recommendations. - -Read More: [LanceDB Enterprise Performance](/enterprise/benchmarks/) - -### Throughput of search queries - -A single LanceDB OSS process shares one CPU pool with the rest of the application. When concurrent queries hit that CPU, retrieval and similarity processes compete for cores. The server cannot process more work in parallel and any extra traffic waits in the queue, raising latency without increasing queries per second. - -LanceDB Enterprise distributes queries across many execution nodes. Each node runs a dedicated vector search engine that exploits all cores and uses SIMD instructions. A load balancer assigns queries to the least-loaded node, so throughput grows roughly linearly as more nodes join the cluster. - -### Caching of commonly retrieved data - -LanceDB OSS has no built-in cache. Every read repeats the same object-store round trip and pays the same latency penalty. - -LanceDB Enterprise shards a cache across the fleet with consistent hashing. Popular vectors remain on local NVMe drives until they age out under a least-recently-used policy. Cache misses fall back to the object store, fill the local shard, and serve future reads faster. This design slashes both latency and egress cost for workloads with temporal locality. - -### Maintenance of vector indexes - -Vector indexes fragment when data is inserted, updated, or deleted. Fragmentation slows queries because the engine must scan more blocks. LanceDB OSS offers a CLI call to compact or rebuild the index, but you must schedule it and stop queries while it runs. - -LanceDB Enterprise runs compaction jobs in the background. It copies data to a scratch space, rebuilds the index, swaps the old files atomically, and frees disk space. Production traffic continues uninterrupted. - -Read More: [Indexing in LanceDB](/indexing/) - -### Deployment and governance - -When you work with LanceDB OSS, it is included as part of your binary, Docker, or serverless function. The footprint is small, and no extra services run beside it. - -LanceDB Enterprise comes in two flavors. The self-managed template installs the deployment inside your VPC, so data never leaves your account. The managed SaaS option hands day-to-day operations to the vendor, including patching, scaling, and 24×7 monitoring. Both enterprise modes support private networking, role-based access control, audit logs, and single sign-on. - -Read More: [LanceDB Enterprise Performance](/enterprise/deployment/) - -## Which option is best? - -LanceDB OSS makes sense when the entire dataset fits on one machine, daily traffic remains under fifty queries per second, and your team can run manual maintenance without affecting users. - -[It's very simple to get started with OSS](/quickstart/): Get started with `pip install lancedb` and begin ingesting your data and vectors into LanceDB. - -Move to LanceDB Enterprise when you have petabyte-scale data, or you need latency to be below 200 ms, or you need higher query throughput towards thousands of QPS, or your business requires high availability, compliance controls, and vendor support. - -If these sound like your use cases, [reach out via this form](https://lancedb.com/contact/) and we can help you scope your workload and arrange an Enterprise proof of concept. diff --git a/docs/enterprise/index.mdx b/docs/enterprise/index.mdx index 94ba50c..b86689f 100644 --- a/docs/enterprise/index.mdx +++ b/docs/enterprise/index.mdx @@ -5,16 +5,16 @@ description: "Features and benefits of LanceDB Enterprise." icon: "server" --- -**LanceDB Enterprise** is both a **private cloud or a BYOC solution** that transforms your data lake into -a high-performance vector database or lakehouse that can operate at extreme scale. +**LanceDB Enterprise** is a private cloud or a bring-your-own-cloud (BYOC) solution that transforms your data lake into +a high-performance **multimodal lakehouse** that can operate at extreme scale. -With a vector database built for [lakehouse architecture](/enterprise/architecture), you can serve millions of tables and tens +With its [lakehouse architecture](/enterprise/architecture), you can serve millions of tables and tens of billions of rows in a single index, improve retrieval quality using hybrid search with blazing-fast metadata filters, and reduce costs by up to 200x with object storage. -For private deployments, high performance at extreme scale, or if you have strict security requirements, -[reach out to us](mailto:contact@lancedb.com) regarding LanceDB Enterprise. +If you need private deployments, high performance at extreme scale, or if you have strict security requirements, +[reach out to our team](mailto:contact@lancedb.com) to set up a LanceDB Enterprise cluster in your environment. ## Key benefits of LanceDB Enterprise @@ -53,4 +53,69 @@ changes or data migration required! | **Effortless Migration** | Migrate from Open Source LanceDB to LanceDB Enterprise by simply using a connection URL. | | **Observability** | First-class integration with existing observability systems for logging, monitoring, and distributed traces using OpenTelemetry. | -Take a look at a more thorough [list of benefits of LanceDB Enterprise](/enterprise/features). \ No newline at end of file +## How is LanceDB Enterprise different from OSS? + +LanceDB Enterprise is a distributed cluster that spans many machines (unlike LanceDB OSS, which is an embedded database that runs inside your process). Both are built on top of the same Lance columnar file format, so moving data from one edition to the other requires no conversion. + +| Dimension | LanceDB OSS | LanceDB Enterprise | What the difference means | +|:----------|:------------|:-------------------|:-------------------------| +| **Mode** | Single process | Distributed fleet | OSS lives on one host. Enterprise spreads work across nodes and keeps serving even if one node fails. | +| **Latency from object storage** | 500–1000 ms | 50–200 ms | Enterprise mitigates network delay with an SSD cache and parallel reads. | +| **Throughput** | 10–50 QPS | Up to 10,000 QPS | A cluster can serve thousands of concurrent users; a single process cannot. | +| **Cache** | None | Distributed NVMe cache | Enterprise keeps hot data near compute and avoids repeated S3 calls. | +| **Indexing & compaction** | Manual | Automatic | Enterprise runs background jobs that rebuild and compact data without downtime. | +| **Data format** | Supports multiple available standards | Supports multiple available standards | No vendor lock-in; data moves freely between editions. | +| **Deployment** | Embedded in your code | Self-managed or Managed Service | Enterprise meets uptime, compliance, and support goals that OSS cannot. | + +### Architecture and scale + +LanceDB OSS is directly embedded into your service. The process owns all CPU, memory, and storage, so scale is limited to what the host can provide. +LanceDB Enterprise separates work into routers, execution nodes, and background workers. New nodes join the cluster through a discovery service; they register, replicate metadata, and begin answering traffic without a restart. A distributed control plane watches node health, shifts load away from unhealthy nodes, and enforces consensus rules that prevent split-brain events. + +Read More: [LanceDB Enterprise Architecture](/enterprise/architecture/) + +### Latency of data retrieval + +With Lance OSS every query fetches data from S3, GCS, or Azure Blob. Each round trip to an object store adds several hundred milliseconds, especially when data is cold. + +LanceDB Enterprise uses NVMe SSDs as a hybrid cache, before the data store is even accessed. The first read fills the cache, and subsequent reads come from the local disk and return in tens of milliseconds. Parallel chunked reads further reduce tail latency. This gap matters when the application serves interactive dashboards or real-time recommendations. + +Read More: [LanceDB Enterprise Performance](/enterprise/benchmarks/) + +### Throughput of search queries + +A single LanceDB OSS process shares one CPU pool with the rest of the application. When concurrent queries hit that CPU, retrieval and similarity processes compete for cores. The server cannot process more work in parallel and any extra traffic waits in the queue, raising latency without increasing queries per second. + +LanceDB Enterprise distributes queries across many execution nodes. Each node runs a dedicated vector search engine that exploits all cores and uses SIMD instructions. A load balancer assigns queries to the least-loaded node, so throughput grows roughly linearly as more nodes join the cluster. + +### Caching of commonly retrieved data + +LanceDB OSS has no built-in cache. Every read repeats the same object-store round trip and pays the same latency penalty. + +LanceDB Enterprise shards a cache across the fleet with consistent hashing. Popular vectors remain on local NVMe drives until they age out under a least-recently-used policy. Cache misses fall back to the object store, fill the local shard, and serve future reads faster. This design slashes both latency and egress cost for workloads with temporal locality. + +### Maintenance of vector indexes + +Vector indexes fragment when data is inserted, updated, or deleted. Fragmentation slows queries because the engine must scan more blocks. LanceDB OSS offers a CLI call to compact or rebuild the index, but you must schedule it and stop queries while it runs. + +LanceDB Enterprise runs compaction jobs in the background. It copies data to a scratch space, rebuilds the index, swaps the old files atomically, and frees disk space. Production traffic continues uninterrupted. + +Read More: [Indexing in LanceDB](/indexing/) + +### Deployment and governance + +When you work with LanceDB OSS, it is included as part of your binary, Docker, or serverless function. The footprint is small, and no extra services run beside it. + +LanceDB Enterprise comes in two flavors. The self-managed template installs the deployment inside your VPC, so data never leaves your account. The managed SaaS option hands day-to-day operations to the vendor, including patching, scaling, and 24×7 monitoring. Both enterprise modes support private networking, role-based access control, audit logs, and single sign-on. + +Read More: [LanceDB Enterprise Performance](/enterprise/deployment/) + +## Which option is best? + +LanceDB OSS makes sense when the entire dataset fits on one machine, daily traffic remains under fifty queries per second, and your team can run manual maintenance without affecting users. + +[It's very simple to get started with OSS](/quickstart/): Get started with `pip install lancedb` and begin ingesting your data and vectors into LanceDB. + +Move to LanceDB Enterprise when you have petabyte-scale data, or you need latency to be below 200 ms, or you need higher query throughput towards thousands of QPS, or your business requires high availability, compliance controls, and vendor support. + +If these sound like your use cases, [reach out via this form](https://lancedb.com/contact/) and we can help you scope your workload and arrange an Enterprise proof of concept. diff --git a/docs/enterprise/quickstart.mdx b/docs/enterprise/quickstart.mdx new file mode 100644 index 0000000..7647062 --- /dev/null +++ b/docs/enterprise/quickstart.mdx @@ -0,0 +1,224 @@ +--- +title: "Enterprise Quickstart" +sidebarTitle: "Quickstart" +description: "Run the LanceDB quickstart workflow on a RemoteTable in LanceDB Enterprise." +icon: "server" +--- + +import { + PyConnectEnterpriseQuickstart, + TsConnectEnterpriseQuickstart, + RsConnectEnterpriseQuickstart, +} from '/snippets/connection.mdx'; +import { + PyQuickstartCreateTable, + PyQuickstartVectorSearch1, + PyQuickstartOpenTable, + PyQuickstartAddData, + PyQuickstartVectorSearch2, + TsQuickstartCreateTable, + TsQuickstartVectorSearch1, + TsQuickstartOpenTable, + TsQuickstartAddData, + TsQuickstartVectorSearch2, + RsQuickstartDefineStruct, + RsQuickstartCreateTable, + RsQuickstartVectorSearch1, + RsQuickstartOpenTable, + RsQuickstartAddData, + RsQuickstartVectorSearch2, +} from '/snippets/quickstart.mdx'; + +This quickstart follows a similar workflow as the [OSS quickstart](/quickstart), but uses a **`RemoteTable`** through a `db://...` connection. + + +To get a LanceDB Enterprise cluster setup and to obtain credentials and endpoint details, [contact our team](mailto:contact@lancedb.com) to get started. +This guide assumes your Enterprise cluster is already running. + + +## 1. Install LanceDB + + +```bash Python icon=Python +pip install lancedb +``` + +```bash TypeScript icon=js +npm install @lancedb/lancedb +``` + +```bash Rust icon=Rust +cargo add lancedb +``` + + +## 2. Connect to Enterprise (`db://...`) + + + + { "import lancedb\n\n" } + {PyConnectEnterpriseQuickstart} + + + + { "import * as lancedb from \"@lancedb/lancedb\";\n\n" } + {TsConnectEnterpriseQuickstart} + + + + { "use lancedb::connect;\n\n" } + {RsConnectEnterpriseQuickstart} + + + +## 3. Create a table (same sample data as the OSS quickstart) + + + + {PyQuickstartCreateTable} + + + + {TsQuickstartCreateTable} + + + + {RsQuickstartDefineStruct} + {RsQuickstartCreateTable} + + + +## 4. Run vector search + + + + {PyQuickstartVectorSearch1} + + + + {TsQuickstartVectorSearch1} + + + + {RsQuickstartVectorSearch1} + + + +## 5. Open table, add data, and query again + + + + {PyQuickstartOpenTable} + {PyQuickstartAddData} + {PyQuickstartVectorSearch2} + + + + {TsQuickstartOpenTable} + {TsQuickstartAddData} + {TsQuickstartVectorSearch2} + + + + { "use lancedb::table::Table;\n\n" } + {RsQuickstartOpenTable} + {RsQuickstartAddData} + {RsQuickstartVectorSearch2} + + + +## Differences between Enterprise and OSS usage + +As can be seen, the flow for working with a `RemoteTable` in Enterprise looks more or less +similar to the [OSS quickstart](/quickstart). However, there are some semantic differences: + +### 1. Connection model + +In LanceDB Enterprise, your app connects via a `db://...` URI and sends requests to the cluster API. The cluster executes table operations on your behalf. +Your code is coupled to a **managed service endpoint** (whereas in OSS, your code is directly coupled to storage paths). + +### 2. Returned table type + +Connecting to an Enterprise table via `open_table(...)` returns a `RemoteTable`, unlike in OSS, which returns a `LanceTable`. + +### 3. Materialization APIs + +For Python users working with LanceDB Enterprise, `RemoteTable` does not support table-level +materialization methods like `table.to_arrow()` or `table.to_pandas()`. This is to protect +users from accidentally materializing tables that are too large to fit in memory. + +Instead, you materialize results through query/search builders, for example `table.search(...).limit(...).to_pandas()` or `table.query(...).to_arrow()`. For quick previews, you can use `table.head()`. + +### 4. Maintenance lifecycle + +In Enterprise, maintenance operations like `optimize`, `compact_files` are handled by the cluster as background work. You can trigger them manually, but they are not required for performance or correctness in the same way they are in OSS. + +That means maintenance is managed by platform behavior and cluster configuration, not by explicit per-table maintenance calls in your application code. + +### 5. Guardrails and limits + +Enterprise can enforce platform-level guardrails, such as index/table limits and safety checks around operations like `merge_insert` when too many rows are unindexed. OSS mostly exposes storage/format-level behavior, and you tune many lifecycle tasks yourself. + +This means an operation in LanceDB Enterprise can fail due to service-level policy, not just because of local table shape or schema mismatch. + +### 6. Cluster-managed background work + +In Enterprise, async writes and reindexing workflows are handled by cluster background systems. In OSS, if you want ongoing upkeep, you usually schedule and run it yourself in your application or jobs. + +In practice, your app issues table operations, and the platform handles distributed orchestration for maintenance and indexing in the background. + + +As a rule of thumb, all you need to remember with regard to LanceDB Enterprise is this: treat `db://...` as a remote service boundary, use query builders to fetch results, and otherwise interact with your tables as you would in OSS.** + + +## Advanced usage via namespace-backed connections + +LanceDB Enterprise also supports namespace-backed catalog connections. This allows you to resolve tables by namespace, rather than by direct URI, and is accessed via the REST connection mode of `connect_namespace(...)`. This is useful when table location resolution and credential vending are handled by an external catalog/namespace service. + +```py Python icon=Python +import os +import lancedb + +ns_db = lancedb.connect_namespace( + "rest", + { + "uri": "https://", + "headers.Authorization": f"Bearer {os.environ['CATALOG_TOKEN']}", + }, +) + +# Namespace-scoped table resolution +table = ns_db.open_table("adventurers", namespace=["prod", "search"]) +``` + +This mode is useful when table location resolution and credential vending are handled by an external catalog/namespace service. + +If you want to stick to a common table flow, start with the `db://` RemoteTable flow shown above. + +## Further reading + +You can learn more about table operations, namespaces, and the architecture of LanceDB Enterprise in the following guides. + + + + Build on this quickstart with table creation, updates, and schema tips. + + + Learn how to use namespaces in LanceDB, and connect to an Enterprise namespace via REST. + + + Learn about the architecture of LanceDB Enterprise and how it achieves high performance at scale. + + \ No newline at end of file diff --git a/docs/index.mdx b/docs/index.mdx index c9eea00..840bcee 100644 --- a/docs/index.mdx +++ b/docs/index.mdx @@ -44,7 +44,7 @@ video and point cloud data, to gain insights and inform model development. Depending on your needs, you can choose one of three ways to run LanceDB. -### LanceDB OSS +### 1. LanceDB OSS The fastest way to get started is the open-source embedded library, with client SDKs in Python, TypeScript and Rust. Run it locally during development, then use the same data model and APIs as you scale up and need a managed solution. Start here: @@ -66,13 +66,13 @@ and need a managed solution. Start here: -### LanceDB Enterprise +### 2. LanceDB Enterprise [LanceDB Enterprise](/enterprise) is a distributed and managed **multimodal lakehouse** built for search, exploratory data analysis, feature engineering, and training-oriented data access workflows on top of the same core table abstraction. This eliminates the need for teams to build bespoke infrastructure to manage petabyte-scale multimodal datasets. -To get started, reach out at [contact@lancedb.com](mailto:contact@lancedb.com). +To get started, reach out to us at [contact@lancedb.com](mailto:contact@lancedb.com). **Built with scale, performance, and security in mind.** @@ -81,11 +81,9 @@ LanceDB Enterprise is designed for very large-scale, high-performance, distribut private deployments, and can operate under strict security requirements. -### LanceDB Cloud +### 3. LanceDB Cloud -[LanceDB Cloud](/cloud) is a serverless, managed service for users who are more -focused on search use cases. You can easily create and manage projects in the Cloud UI, and -integrate via REST API or client SDKs (Python, TypeScript, Rust). +[LanceDB Cloud](/cloud) is a serverless, managed service for search use cases. It is much more limited in scope and functionality than the Enterprise version, so it's strongly recommended to check out LanceDB Enterprise if your usage needs evolve around large-scale AI data management for search, training, feature engineering or exploratory data analysis. Cloud Enterprise -LanceDB Cloud/Enterprise support incremental reindexing through an automated background process. When new data is added to a table, the system automatically triggers a new index build. As the dataset grows, indexes are asynchronously updated in the background. +LanceDB Enterprise support incremental reindexing through an automated background process. When new data is added to a table, the system automatically triggers a new index build. As the dataset grows, indexes are asynchronously updated in the background. - While indexes are being rebuilt, queries use brute force methods on unindexed rows, which may temporarily increase latency. To avoid this, set `fast_search=True` to search only indexed data. - Use `index_stats()` to view the number of unindexed rows. This will be zero when indexes are fully up-to-date. diff --git a/docs/indexing/vector-index.mdx b/docs/indexing/vector-index.mdx index 2dae75c..2514656 100644 --- a/docs/indexing/vector-index.mdx +++ b/docs/indexing/vector-index.mdx @@ -33,7 +33,7 @@ If using LanceDB OSS, you will have to create the vector index manually, by call ### Automatic Indexing Enterprise-only -Vector indexing is managed **automatically** in LanceDB Cloud/Enterprise. As soon as data is updated, the system updates the index and optimizates it. *This is done asynchronously as a background process*. +Vector indexing is managed **automatically** in LanceDB Enterprise. As soon as data is updated, the system updates the index and optimizates it. *This is done asynchronously as a background process*. When you create a table in LanceDB Enterprise, LanceDB automatically: diff --git a/docs/lance.mdx b/docs/lance.mdx index 0f5faa0..88c2672 100644 --- a/docs/lance.mdx +++ b/docs/lance.mdx @@ -6,28 +6,23 @@ icon: "/static/assets/logo/lance-logo-gray.svg" --- [Lance](https://lance.org/) is an open-source lakehouse format, which provides the -foundation for LanceDB's capabilities. Lance combines the performance of Apache Arrow with advanced -features designed specifically for AI workloads. +foundation for LanceDB's capabilities. It provides a file format, +table format, and catalog spec with multimodal data at the center of its design, allowing developers +to build a complete open lakehouse on top of object storage. + +Building on top of open foundations and optimizing the format for AI workloads brings +high-performance vector search, full-text search, random access, and feature engineering capabilities +to a single unified system ([LanceDB](/enterprise)), eliminating the need for bespoke ETL and data pipelines that move data +to multiple other specialized data systems. - Learn more about the Lance format by reading the docs. + Visit the Lance format documentation to learn more about its design, features, and how it enables the multimodal lakehouse. -## How Lance Enables the Multimodal Lakehouse - -Lance is a file format, table format, and catalog spec for multimodal AI, allowing developers to build a -complete open lakehouse on top of object storage to power AI workflows. The format brings -high-performance vector search, full-text search, random access, and feature engineering capabilities -to a single unified system, eliminating the need for multiple specialized databases. - -Unlike traditional vector databases that only store embeddings alongside the metadata, LanceDB's -multimodal lakehouse stores both the original data (including image, video or audio bytes) -and its vector representations alongside traditional tabular data in the same efficient format. - ## Advantages of the Lance format Advantage | Description @@ -44,10 +39,10 @@ The following concepts are core to the Lance format: - Data storage is **columnar** and is **interoperable** with other columnar formats (such as Parquet) via Arrow + **Arrow-native, columnar storage** and **interoperability** with the open lakehouse ecosystem (including other file formats and compute engines). - Data is divided into **fragments** that represent a subset of the data. Fragments are chunks of data in a Lance dataset. Each fragment includes multiple files that contain several columns in the chunk of data that it represents. + **Zero-copy** data evolution, meaning you can easily add derived columns (like features or embeddings) at a later time, **without full table rewrites**. Only new data is written; expensive existing data (like images/videos) remain untouched. Data is **versioned**, with each insert operation creating a new version of the dataset and an update to the manifest that tracks versions via metadata diff --git a/docs/namespaces.mdx b/docs/namespaces/index.mdx similarity index 97% rename from docs/namespaces.mdx rename to docs/namespaces/index.mdx index 2a99c95..b5a3244 100644 --- a/docs/namespaces.mdx +++ b/docs/namespaces/index.mdx @@ -1,6 +1,6 @@ --- title: "Namespaces and the Catalog Model" -sidebarTitle: "Namespaces" +sidebarTitle: "Overview" description: "Understand LanceDB as a catalog-level abstraction over Lance's table format, and learn how namespaces help organize Lance tables." icon: "sitemap" keywords: ["namespace", "catalog", "lance format", "table format", "lancedb"] @@ -101,7 +101,7 @@ db = lancedb.connect_namespace( ``` [LanceDB Enterprise](/enterprise) operates a REST namespace server on top of the Lance format, so any REST client that can speak the REST namespace API contract can be used to interact with it. For authentication examples in LanceDB Enterprise, visit -the [Namespaces in SDKs](/tables/namespaces#namespaces-in-lancedb-enterprise) page. +the [Namespaces in SDKs](/namespaces/usage#namespaces-in-lancedb-enterprise) page. ## Best practices @@ -109,4 +109,4 @@ Below, we list some best practices for working with namespaces: - For simple use cases and single, stand-alone applications, the directory-based root namespace is sufficient and requires no special configuration. - For remote storage locations, introduce explicit namespaces when multiple teams, environments, or domains share the same catalog. - Treat namespace paths as stable identifiers (for example `"prod/search"`, `"staging/recs"`). -- For maintainability reasons, avoid hard-coding object-store table paths in application code -- instead, prefer catalog identifiers + namespaces. \ No newline at end of file +- For maintainability reasons, avoid hard-coding object-store table paths in application code -- instead, prefer catalog identifiers + namespaces. diff --git a/docs/tables/namespaces.mdx b/docs/namespaces/usage.mdx similarity index 99% rename from docs/tables/namespaces.mdx rename to docs/namespaces/usage.mdx index 386e930..cfcb674 100644 --- a/docs/tables/namespaces.mdx +++ b/docs/namespaces/usage.mdx @@ -1,6 +1,6 @@ --- title: "Using Namespaces" -sidebarTitle: "Namespaces" +sidebarTitle: "Using namespaces" description: "Use LanceDB's namespace-aware table and catalog APIs in Python, TypeScript, and Rust." icon: "folder-tree" keywords: ["namespace", "create_table", "open_table", "list_tables", "catalog"] diff --git a/docs/snippets/connection.mdx b/docs/snippets/connection.mdx index 781b80a..72fe9c2 100644 --- a/docs/snippets/connection.mdx +++ b/docs/snippets/connection.mdx @@ -8,6 +8,8 @@ export const PyConnectCloud = "uri = \"db://your-database-uri\"\napi_key = \"you export const PyConnectCloudAsync = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\n"; +export const PyConnectEnterpriseQuickstart = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\nhost_override = \"https://your-enterprise-endpoint.com\"\n\ndb = lancedb.connect(\n uri=uri,\n api_key=api_key,\n region=region,\n host_override=host_override,\n)\n"; + export const PyConnectObjectStorage = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\ndb = lancedb.connect(uri)\n"; export const PyConnectObjectStorageAsync = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\nasync_db = await lancedb.connect_async(uri)\n"; @@ -20,12 +22,16 @@ export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasyn export const TsConnectCloud = "const uri = \"db://your-database-uri\";\nconst apiKey = \"your-api-key\";\nconst region = \"us-east-1\";\n"; +export const TsConnectEnterpriseQuickstart = "const uri = \"db://your-database-uri\";\nconst apiKey = \"your-api-key\";\nconst region = \"us-east-1\";\nconst hostOverride = \"https://your-enterprise-endpoint.com\";\n\nconst db = await lancedb.connect(uri, {\n apiKey,\n region,\n hostOverride,\n});\n"; + export const TsConnectObjectStorage = "async function connectObjectStorageExample() {\n const uri = \"s3://your-bucket/path\";\n // You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\n const db = await lancedb.connect(uri);\n return db;\n}\n"; export const RsConnect = "async fn connect_example(uri: &str) {\n let db = connect(uri).execute().await.unwrap();\n let _ = db;\n}\n"; export const RsConnectCloud = "let uri = \"db://your-database-uri\";\nlet api_key = \"your-api-key\";\nlet region = \"us-east-1\";\n"; +export const RsConnectEnterpriseQuickstart = "let uri = \"db://your-database-uri\";\nlet api_key = \"your-api-key\";\nlet region = \"us-east-1\";\nlet host_override = \"https://your-enterprise-endpoint.com\";\n"; + export const RsConnectObjectStorage = "let uri = \"s3://your-bucket/path\";\n// You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\n"; export const RsNamespaceAdminOps = "let mut properties = std::collections::HashMap::new();\nproperties.insert(\"root\".to_string(), \"./local_lancedb\".to_string());\nlet db = lancedb::connect_namespace(\"dir\", properties).execute().await?;\nlet namespace = vec![\"prod\".to_string(), \"search\".to_string()];\n\ndb.create_namespace(lancedb::database::CreateNamespaceRequest {\n namespace: vec![\"prod\".to_string()],\n})\n.await?;\ndb.create_namespace(lancedb::database::CreateNamespaceRequest {\n namespace: namespace.clone(),\n})\n.await?;\n\nlet child_namespaces = db\n .list_namespaces(lancedb::database::ListNamespacesRequest {\n namespace: vec![\"prod\".to_string()],\n ..Default::default()\n })\n .await?;\nprintln!(\n \"Child namespaces under {:?}: {:?}\",\n namespace, child_namespaces\n);\n// Child namespaces under [\"prod\", \"search\"]: [\"search\"]\n\ndb.drop_namespace(lancedb::database::DropNamespaceRequest {\n namespace: namespace.clone(),\n})\n.await?;\ndb.drop_namespace(lancedb::database::DropNamespaceRequest {\n namespace: vec![\"prod\".to_string()],\n})\n.await?;\n"; diff --git a/docs/snippets/embedding.mdx b/docs/snippets/embedding.mdx index 3ce10c3..05ce787 100644 --- a/docs/snippets/embedding.mdx +++ b/docs/snippets/embedding.mdx @@ -30,7 +30,7 @@ export const TsRegisterSecret = "const registry = getRegistry();\nregistry.setVa export const RsEmbeddingFunction = "use std::{borrow::Cow, sync::Arc};\n\nuse arrow_array::{Array, FixedSizeListArray, Float32Array};\nuse arrow_schema::{DataType, Field, Schema};\nuse lancedb::{\n connect,\n embeddings::{EmbeddingDefinition, EmbeddingFunction},\n Result,\n};\n\n#[derive(Debug, Clone)]\nstruct MyTextEmbedder {\n dim: usize,\n}\n\nimpl EmbeddingFunction for MyTextEmbedder {\n fn name(&self) -> &str {\n \"my-embedder\"\n }\n\n fn source_type(&self) -> Result> {\n Ok(Cow::Owned(DataType::Utf8))\n }\n\n fn dest_type(&self) -> Result> {\n Ok(Cow::Owned(DataType::new_fixed_size_list(\n DataType::Float32,\n self.dim as i32,\n true,\n )))\n }\n\n fn compute_source_embeddings(&self, source: Arc) -> Result> {\n let values = Arc::new(Float32Array::from(vec![1.0f32; source.len() * self.dim]));\n let field = Arc::new(Field::new(\"item\", DataType::Float32, true));\n Ok(Arc::new(FixedSizeListArray::new(\n field,\n self.dim as i32,\n values,\n None,\n )))\n }\n\n fn compute_query_embeddings(&self, _input: Arc) -> Result> {\n unimplemented!()\n }\n}\n\n#[tokio::main]\nasync fn main() -> Result<()> {\n let db = connect(\"./mydb\").execute().await?;\n db.embedding_registry()\n .register(\"my-embedder\", Arc::new(MyTextEmbedder { dim: 3 }))?;\n\n let schema = Arc::new(Schema::new(vec![Field::new(\"text\", DataType::Utf8, false)]));\n db.create_empty_table(\"mytable\", schema)\n .add_embedding(EmbeddingDefinition::new(\n \"text\",\n \"my-embedder\",\n Some(\"vector\"),\n ))?\n .execute()\n .await?;\n\n Ok(())\n}\n"; -export const RsManualQueryEmbeddings = "use std::{iter::once, sync::Arc};\n\nuse arrow_array::{record_batch, StringArray};\nuse arrow_schema::{DataType, Field, Schema};\nuse futures::StreamExt;\nuse lancedb::{\n connect,\n embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},\n query::{ExecutableQuery, QueryBase},\n Result,\n};\n\n#[tokio::main]\nasync fn main() -> Result<()> {\n let db = connect(\"./mydb\").execute().await?;\n let api_key = std::env::var(\"OPENAI_API_KEY\").expect(\"OPENAI_API_KEY is not set\");\n let embedding = Arc::new(OpenAIEmbeddingFunction::new_with_model(\n api_key,\n \"text-embedding-3-large\",\n )?);\n db.embedding_registry().register(\"openai\", embedding.clone())?;\n\n let schema = Arc::new(Schema::new(vec![Field::new(\"text\", DataType::Utf8, false)]));\n let table = db\n .create_empty_table(\"mytable\", schema)\n .add_embedding(EmbeddingDefinition::new(\"text\", \"openai\", Some(\"vector\")))?\n .execute()\n .await?;\n\n table\n .add(record_batch!((\"text\", Utf8, [\"This is a test.\", \"Another example.\"]))?)\n .execute()\n .await?;\n\n // Manually generate embeddings for the query (Cloud/Enterprise path)\n let query = Arc::new(StringArray::from_iter_values(once(\"test example\")));\n let query_vector = embedding.compute_query_embeddings(query)?;\n // --8<-- [start:manual_query_search]\n // query_vector is assumed to already be generated by your embedding function\n let mut results = table.vector_search(query_vector)?.limit(5).execute().await?;\n\n while let Some(batch) = results.next().await {\n println!(\"{:?}\", batch?);\n }\n // --8<-- [end:manual_query_search]\n\n Ok(())\n}\n"; +export const RsManualQueryEmbeddings = "use std::{iter::once, sync::Arc};\n\nuse arrow_array::{record_batch, StringArray};\nuse arrow_schema::{DataType, Field, Schema};\nuse futures::StreamExt;\nuse lancedb::{\n connect,\n embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},\n query::{ExecutableQuery, QueryBase},\n Result,\n};\n\n#[tokio::main]\nasync fn main() -> Result<()> {\n let db = connect(\"./mydb\").execute().await?;\n let api_key = std::env::var(\"OPENAI_API_KEY\").expect(\"OPENAI_API_KEY is not set\");\n let embedding = Arc::new(OpenAIEmbeddingFunction::new_with_model(\n api_key,\n \"text-embedding-3-large\",\n )?);\n db.embedding_registry().register(\"openai\", embedding.clone())?;\n\n let schema = Arc::new(Schema::new(vec![Field::new(\"text\", DataType::Utf8, false)]));\n let table = db\n .create_empty_table(\"mytable\", schema)\n .add_embedding(EmbeddingDefinition::new(\"text\", \"openai\", Some(\"vector\")))?\n .execute()\n .await?;\n\n table\n .add(record_batch!((\"text\", Utf8, [\"This is a test.\", \"Another example.\"]))?)\n .execute()\n .await?;\n\n // Manually generate embeddings for the query (Enterprise path)\n let query = Arc::new(StringArray::from_iter_values(once(\"test example\")));\n let query_vector = embedding.compute_query_embeddings(query)?;\n // --8<-- [start:manual_query_search]\n // query_vector is assumed to already be generated by your embedding function\n let mut results = table.vector_search(query_vector)?.limit(5).execute().await?;\n\n while let Some(batch) = results.next().await {\n println!(\"{:?}\", batch?);\n }\n // --8<-- [end:manual_query_search]\n\n Ok(())\n}\n"; export const RsManualQuerySearch = "// query_vector is assumed to already be generated by your embedding function\nlet mut results = table.vector_search(query_vector)?.limit(5).execute().await?;\n\nwhile let Some(batch) = results.next().await {\n println!(\"{:?}\", batch?);\n}\n"; diff --git a/docs/static/assets/images/overview/understanding-tables.png b/docs/static/assets/images/overview/understanding-tables.png new file mode 100644 index 0000000..f61d904 Binary files /dev/null and b/docs/static/assets/images/overview/understanding-tables.png differ diff --git a/docs/tables-and-namespaces.mdx b/docs/tables-and-namespaces.mdx new file mode 100644 index 0000000..3d66f61 --- /dev/null +++ b/docs/tables-and-namespaces.mdx @@ -0,0 +1,43 @@ +--- +title: "Tables and Namespaces" +sidebarTitle: "Tables and Namespaces" +description: "Learn more about the table abstraction and namespaces in LanceDB." +icon: "table" +--- + +Despite its name, LanceDB is not a "database" in the traditional sense. It is a **Multimodal Lakehouse** built on Lance tables plus a catalog abstraction. +As you dive deeper into LanceDB, it helps to separate two ideas: +- A **table** is where your data lives and is queried. +- A **namespace** is how groups of tables are organized and resolved at the catalog level. + +## Understanding tables + +A table is the core data abstraction in LanceDB: a structured dataset with schema, indexes, and versioned updates. +What changes between deployments is how that table is addressed and accessed. + +The mental model below clarifies table types by connection mode: + +- **`LanceTable`**: direct table access (local path, `file://`, `s3://`, and similar object-store paths). This is the common mode in LanceDB OSS. +- **`RemoteTable`**: catalog-backed table access through a server/cluster (`db://...`). This is the mode you will use in LanceDB Enterprise/Cloud. + +![](/static/assets/images/overview/understanding-tables.png) + +From an application perspective, both expose a familiar table API: create/open tables, mutate rows, and query data. +The main difference is where resolution and execution happen (directly against storage vs through a remote catalog service). + +## Semantic difference between tables and namespaces + +The easiest way to think about this is: +- A **table** answers: "What data do I store and query?" +- A **namespace** answers: "Where does this table name live in my catalog hierarchy?" + +In other words, tables are data objects; namespaces are catalog objects. + +| Concept | Scope | Owns | Typical operations | +| --- | --- | --- | --- | +| Table | Data layer | Schema, rows, indexes, versions | `create_table`, `open_table`, inserts/updates/deletes, search/query | +| Namespace | Catalog layer | Hierarchy of names, table grouping, table name resolution | `create_namespace`, `list_namespaces`, `drop_namespace`, table ops with `namespace` | + +For simple use cases where you have a relatively flat set of tables, you can ignore namespaces and just use table paths directly. +As your application needs evolve and your tables grow in number and complexity, you may move from table-centric thinking +to catalog-centric thinking. Check out the [Namespaces and the Catalog Model](/namespaces) guide to learn more. diff --git a/docs/tables/index.mdx b/docs/tables/index.mdx index 8f7e45c..241fbd6 100644 --- a/docs/tables/index.mdx +++ b/docs/tables/index.mdx @@ -154,7 +154,7 @@ applications, you'd generate these vectors from the raw text fields using a suit ## Connect to a database -### Option 1: Local database +### Option 1: Direct table access We start by connecting to a LanceDB database path. The example below uses a local path in LanceDB OSS. @@ -172,12 +172,12 @@ We start by connecting to a LanceDB database path. The example below uses a loca -### Option 2: Remote database - You can also connect LanceDB OSS directly to object storage. For credentials, endpoints, and provider-specific options, see [Configuring storage](/storage/configuration). -If you're using a managed LanceDB service on either LanceDB Cloud or Enterprise, you can connect using a `db://` URI, +### Option 2: Remote tables + +If you're using a managed LanceDB service on LanceDB [Enterprise](/enterprise) or LanceDB Cloud, you can connect using a `db://` URI, along with any encessary credentials. Simply replace the local path with a remote `uri` that points to where your data is stored, and you're ready to go. @@ -195,10 +195,8 @@ that points to where your data is stored, and you're ready to go. -To learn more about LanceDB Enterprise, see the [Enterprise documentation](/enterprise). - -- When you connect to a remote URI (Cloud/Enterprise), `open_table(...)` returns a *remote* table. +- When you connect to a remote URI (Enterprise), `open_table(...)` returns a *remote* table. Remote tables support core operations (ingest, search, update, delete), but some convenience methods for bulk data export are not available. - In the Python SDK, `table.to_arrow()` and `table.to_pandas()` are not implemented for remote tables. diff --git a/lance-namespace b/lance-namespace deleted file mode 160000 index 505b547..0000000 --- a/lance-namespace +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 505b54764095b59e80c2cf0d542430e7f8ddff31 diff --git a/tests/py/test_connection.py b/tests/py/test_connection.py index 9221145..67803c1 100644 --- a/tests/py/test_connection.py +++ b/tests/py/test_connection.py @@ -42,6 +42,46 @@ async def connect_async_example(): # --8<-- [end:connect_cloud_async] +def connect_enterprise_quickstart_config(): + import lancedb + + # --8<-- [start:connect_enterprise_quickstart] + uri = "db://your-database-uri" + api_key = "your-api-key" + region = "us-east-1" + host_override = "https://your-enterprise-endpoint.com" + + db = lancedb.connect( + uri=uri, + api_key=api_key, + region=region, + host_override=host_override, + ) + # --8<-- [end:connect_enterprise_quickstart] + return db + + +def test_connect_enterprise_quickstart(monkeypatch): + import lancedb + + captured = {} + + def fake_connect(**kwargs): + captured.update(kwargs) + return object() + + monkeypatch.setattr(lancedb, "connect", fake_connect) + + db = connect_enterprise_quickstart_config() + assert db is not None + assert captured == { + "uri": "db://your-database-uri", + "api_key": "your-api-key", + "region": "us-east-1", + "host_override": "https://your-enterprise-endpoint.com", + } + + def connect_object_storage_config(): # --8<-- [start:connect_object_storage] import lancedb diff --git a/tests/rs/connection.rs b/tests/rs/connection.rs index f9e64de..735efce 100644 --- a/tests/rs/connection.rs +++ b/tests/rs/connection.rs @@ -20,6 +20,7 @@ async fn main() { // Keep the cloud snippet in this file, but don't run it in CI. let _ = connect_cloud_config(); + let _ = connect_enterprise_quickstart_config(); let _ = connect_object_storage_config(); } @@ -33,6 +34,21 @@ fn connect_cloud_config() -> (String, String, String) { (uri.to_string(), api_key.to_string(), region.to_string()) } +fn connect_enterprise_quickstart_config() -> (String, String, String, String) { + // --8<-- [start:connect_enterprise_quickstart] + let uri = "db://your-database-uri"; + let api_key = "your-api-key"; + let region = "us-east-1"; + let host_override = "https://your-enterprise-endpoint.com"; + // --8<-- [end:connect_enterprise_quickstart] + ( + uri.to_string(), + api_key.to_string(), + region.to_string(), + host_override.to_string(), + ) +} + fn connect_object_storage_config() -> &'static str { // --8<-- [start:connect_object_storage] let uri = "s3://your-bucket/path"; diff --git a/tests/rs/embedding.rs b/tests/rs/embedding.rs index c732f6b..c11069d 100644 --- a/tests/rs/embedding.rs +++ b/tests/rs/embedding.rs @@ -84,7 +84,7 @@ async fn main() -> Result<()> { .execute() .await?; - // Manually generate embeddings for the query (Cloud/Enterprise path) + // Manually generate embeddings for the query (Enterprise path) let query = Arc::new(StringArray::from_iter_values(once("test example"))); let query_vector = embedding.compute_query_embeddings(query)?; // --8<-- [start:manual_query_search] diff --git a/tests/ts/connection.test.ts b/tests/ts/connection.test.ts index 8055a94..50815c3 100644 --- a/tests/ts/connection.test.ts +++ b/tests/ts/connection.test.ts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors -import { expect, test } from "@jest/globals"; +import { expect, jest, test } from "@jest/globals"; import * as path from "node:path"; import { withTempDirectory } from "./util.ts"; // --8<-- [start:connect] @@ -26,6 +26,38 @@ const apiKey = "your-api-key"; const region = "us-east-1"; // --8<-- [end:connect_cloud] +async function connectEnterpriseQuickstart() { + // --8<-- [start:connect_enterprise_quickstart] + const uri = "db://your-database-uri"; + const apiKey = "your-api-key"; + const region = "us-east-1"; + const hostOverride = "https://your-enterprise-endpoint.com"; + + const db = await lancedb.connect(uri, { + apiKey, + region, + hostOverride, + }); + // --8<-- [end:connect_enterprise_quickstart] + return db; +} + +test("enterprise quickstart connect uses placeholder config", async () => { + const mockDb = { __mock: true } as unknown as Awaited< + ReturnType + >; + const spy = jest.spyOn(lancedb, "connect").mockResolvedValue(mockDb); + + const db = await connectEnterpriseQuickstart(); + expect(db).toBe(mockDb); + expect(spy).toHaveBeenCalledWith("db://your-database-uri", { + apiKey: "your-api-key", + region: "us-east-1", + hostOverride: "https://your-enterprise-endpoint.com", + }); + spy.mockRestore(); +}); + // --8<-- [start:connect_object_storage] async function connectObjectStorageExample() { const uri = "s3://your-bucket/path"; @@ -35,4 +67,4 @@ async function connectObjectStorageExample() { } // --8<-- [end:connect_object_storage] -void [uri, apiKey, region, connectObjectStorageExample]; +void [uri, apiKey, region, connectObjectStorageExample, connectEnterpriseQuickstart];