lotus-data · liana313 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -42,8 +42,11 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-suite: [lm-openai, lm-ollama, rm, multimodality, utility_operators, cache]
+        test-suite: [settings, lm-openai, lm-ollama, rm, multimodality, utility_operators, cache]
         include:
+          - test-suite: settings
+            file: tests/test_settings.py
+            timeout: 5
           - test-suite: lm-openai
             file: .github/tests/lm_tests.py
             timeout: 10

diff --git a/docs/configurations.rst b/docs/configurations.rst
@@ -59,3 +59,107 @@ Configurable Parameters
 
     lotus.settings.configure(lm=gpt_4o, helper_lm=gpt_4o_mini)
 
+Scoped Settings with ``context()``
+------------------------------------
+
+``lotus.settings.context(**kwargs)`` is a context manager that temporarily overrides
+settings for the duration of a ``with`` block. The previous values are always restored
+on exit — even if an exception is raised.
+
+This is useful for:
+
+* Switching to a cheaper model for one step in a pipeline without affecting the rest
+* Running an evaluation judge with a fresh model and ``enable_cache=False``
+* Isolating settings in tests so one test cannot pollute another
+* Running concurrent threads or asyncio tasks with independent settings
+
+Basic usage
+~~~~~~~~~~~
+
+.. code-block:: python
+
+    import lotus
+    from lotus.models import LM
+
+    lm = LM(model="gpt-4o")
+    cheap_lm = LM(model="gpt-4o-mini")
+    lotus.settings.configure(lm=lm)
+
+    # Use the cheap model only for this step; gpt-4o is restored afterward
+    with lotus.settings.context(lm=cheap_lm, enable_cache=False):
+        df = df.sem_filter("Is {Review} positive?")
+
+    # Back to gpt-4o here
+    df = df.sem_map("Summarise {Review} in one sentence.")
+
+Nested contexts
+~~~~~~~~~~~~~~~
+
+Contexts can be nested. Each level saves and restores independently.
+
+.. code-block:: python
+
+    with lotus.settings.context(lm=cheap_lm):
+        # inner context adds another override on top
+        with lotus.settings.context(enable_cache=True):
+            df = df.sem_map(...)   # cheap_lm + enable_cache=True
+        df = df.sem_filter(...)    # cheap_lm only, enable_cache restored
+
+Concurrent threads
+~~~~~~~~~~~~~~~~~~
+
+Because ``context()`` uses ``contextvars.ContextVar`` internally, each thread sees
+only its own overrides. Threads cannot overwrite each other's settings even though
+they share the same ``lotus.settings`` object.
+
+.. code-block:: python
+
+    import threading
+    import lotus
+    from lotus.models import LM
+
+    lotus.settings.configure(lm=LM(model="gpt-4o-mini"))
+
+    def analyse(df, model, results, key):
+        with lotus.settings.context(lm=model):
+            results[key] = df.sem_map("Summarise {Text}.")
+
+    results = {}
+    t1 = threading.Thread(target=analyse, args=(df1, LM("gpt-4o-mini"), results, "fast"))
+    t2 = threading.Thread(target=analyse, args=(df2, LM("gpt-4o"), results, "quality"))
+    t1.start(); t2.start()
+    t1.join(); t2.join()
+
+Concurrent asyncio tasks
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``asyncio`` tasks created with ``asyncio.create_task()`` or ``asyncio.gather()`` each
+receive a copy of the caller's context, so ``ContextVar`` mutations inside one task
+are invisible to others.
+
+.. code-block:: python
+
+    import asyncio
+    import lotus
+    from lotus.models import LM
+
+    lotus.settings.configure(lm=LM(model="gpt-4o-mini"))
+
+    async def run(df, model):
+        with lotus.settings.context(lm=model):
+            await asyncio.sleep(0)   # yield; other tasks see their own model
+            return df.sem_map("Classify {Text}.")
+
+    async def main():
+        results = await asyncio.gather(
+            run(df1, LM("gpt-4o-mini")),
+            run(df2, LM("gpt-4o")),
+        )
+
+    asyncio.run(main())
+
+.. note::
+
+    ``configure()`` mutates the global settings object and is **not** thread-safe.
+    Use ``context()`` whenever settings need to differ across concurrent execution paths.
+
diff --git a/examples/settings_examples/concurrent_asyncio.py b/examples/settings_examples/concurrent_asyncio.py
@@ -0,0 +1,60 @@
+"""Run concurrent asyncio tasks, each with its own model and cache settings.
+
+asyncio tasks created with asyncio.create_task() or asyncio.gather() each
+receive a copy of the current contextvars context, so ContextVar mutations
+inside one task are invisible to others. This makes context() safe to use
+in async pipelines without any extra locking.
+"""
+
+import asyncio
+
+import pandas as pd
+
+import lotus
+from lotus.models import LM
+
+baseline_lm = LM(model="gpt-4o-mini")
+lotus.settings.configure(lm=baseline_lm, enable_cache=True)
+
+fast_lm = LM(model="gpt-4o-mini")
+quality_lm = LM(model="gpt-4o")
+
+tech_articles = [
+    "Researchers demonstrate a new battery chemistry that doubles energy density.",
+    "New compiler optimisation cuts inference latency by 40% on edge devices.",
+    "Open-source robotics framework gains traction in warehouse automation.",
+]
+
+science_articles = [
+    "Study links gut microbiome diversity to improved cognitive function.",
+    "James Webb telescope captures earliest known galaxy formation.",
+    "CRISPR variant achieves record efficiency in correcting sickle-cell mutations.",
+]
+
+
+async def summarise(articles: list[str], lm: LM, label: str) -> pd.DataFrame:
+    """Summarise a batch of articles using the provided model."""
+    df = pd.DataFrame({"Article": articles})
+    with lotus.settings.context(lm=lm, enable_cache=False):
+        # Simulate async I/O between LM calls
+        await asyncio.sleep(0)
+        df = df.sem_map("Summarise {Article} in one sentence.")
+    print(f"\n{label} summaries (model: {lm.model}):")
+    print(df["Article"].to_string(index=False))
+    return df
+
+
+async def main() -> None:
+    # Both tasks run concurrently; each sees only its own lm override
+    tech_task = asyncio.create_task(summarise(tech_articles, fast_lm, "Tech"))
+    science_task = asyncio.create_task(summarise(science_articles, quality_lm, "Science"))
+
+    tech_df, science_df = await asyncio.gather(tech_task, science_task)
+
+    # Global settings are untouched by either task
+    assert lotus.settings.lm is baseline_lm
+    assert lotus.settings.enable_cache is True
+    print("\nGlobal settings unchanged after tasks completed.")
+
+
+asyncio.run(main())
diff --git a/examples/settings_examples/concurrent_threads.py b/examples/settings_examples/concurrent_threads.py
@@ -0,0 +1,66 @@
+"""Run parallel analyses on different data segments, each with its own model.
+
+Because context() uses contextvars.ContextVar, each thread sees only its own
+settings overlay. The threads cannot overwrite each other's lm or enable_cache
+even though they all share the same global lotus.settings object.
+"""
+
+import threading
+
+import pandas as pd
+
+import lotus
+from lotus.models import LM
+
+# Global baseline — used by any code that runs outside a context
+baseline_lm = LM(model="gpt-4o-mini")
+lotus.settings.configure(lm=baseline_lm)
+
+# Two specialised models for different analysis tasks
+sentiment_lm = LM(model="gpt-4o-mini")
+topic_lm = LM(model="gpt-4o-mini")
+
+reviews = [
+    "The battery life on this laptop is incredible — lasts all day easily.",
+    "Screen is gorgeous but the keyboard feels cheap.",
+    "Runs hot under load and the fan is very loud.",
+    "Best value for money I've found in this price range.",
+    "Customer support was unhelpful when I had a setup issue.",
+    "Surprisingly lightweight for a 15-inch machine.",
+]
+
+results: dict[str, pd.DataFrame] = {}
+
+
+def run_sentiment(data: list[str]) -> None:
+    df = pd.DataFrame({"Review": data})
+    with lotus.settings.context(lm=sentiment_lm):
+        results["sentiment"] = df.sem_map(
+            "Classify the sentiment of {Review} as Positive, Negative, or Neutral."
+        )
+
+
+def run_topic(data: list[str]) -> None:
+    df = pd.DataFrame({"Review": data})
+    with lotus.settings.context(lm=topic_lm):
+        results["topic"] = df.sem_map(
+            "Identify the main topic of {Review} in two words or fewer."
+        )
+
+
+t1 = threading.Thread(target=run_sentiment, args=(reviews,))
+t2 = threading.Thread(target=run_topic, args=(reviews,))
+
+t1.start()
+t2.start()
+t1.join()
+t2.join()
+
+print("Sentiment analysis:")
+print(results["sentiment"].to_string(index=False))
+print("\nTopic analysis:")
+print(results["topic"].to_string(index=False))
+
+# Global settings are untouched by either thread
+assert lotus.settings.lm is baseline_lm
+print("\nGlobal lm unchanged after threads exited.")
diff --git a/examples/settings_examples/eval_cache_isolation.py b/examples/settings_examples/eval_cache_isolation.py
@@ -0,0 +1,53 @@
+"""Run an evaluation with a different model and cache setting than the main pipeline.
+
+Without context(), toggling enable_cache or swapping lm for an eval step
+would leak into subsequent pipeline operations. The context manager ensures
+the eval runs in isolation and the original settings are always restored.
+"""
+
+import pandas as pd
+
+import lotus
+from lotus.models import LM
+from lotus.evals import llm_as_judge
+
+# Production model with caching enabled
+prod_lm = LM(model="gpt-4o-mini")
+# Dedicated eval judge — should not be cached so results are always fresh
+judge_lm = LM(model="gpt-4o")
+
+lotus.settings.configure(lm=prod_lm, enable_cache=True)
+
+data = {
+    "question": [
+        "What is the capital of France?",
+        "Who wrote Romeo and Juliet?",
+        "What is the speed of light?",
+    ],
+    "answer": [
+        "Paris is the capital of France.",
+        "Romeo and Juliet was written by William Shakespeare.",
+        "The speed of light is approximately 300,000 km/s.",
+    ],
+}
+df = pd.DataFrame(data)
+
+# Step 1: Generate additional context with the cached production model
+df = df.sem_map("Expand {answer} with one additional relevant fact.")
+print("Expanded answers:")
+print(df[["question", "answer"]].to_string(index=False))
+
+# Step 2: Evaluate quality using the judge model, with caching disabled
+# so every eval call goes to the model rather than returning a stale result.
+with lotus.settings.context(lm=judge_lm, enable_cache=False):
+    scores = df.llm_as_judge(
+        judge_instruction="Rate the accuracy of this {answer} to the {question} from 1-10. Output only the number.",
+        n_trials=1,
+    )
+    print("\nEval scores (judge: gpt-4o, cache disabled):")
+    print(scores)
+
+# Verify settings are restored
+assert lotus.settings.lm is prod_lm
+assert lotus.settings.enable_cache is True
+print("\nSettings restored: lm=gpt-4o-mini, enable_cache=True")
diff --git a/examples/settings_examples/scoped_model_switching.py b/examples/settings_examples/scoped_model_switching.py
@@ -0,0 +1,39 @@
+"""Temporarily switch to a different model for one step in a pipeline.
+
+The global lm is restored automatically after the context exits, so later
+steps continue using the original model without any manual save/restore.
+"""
+
+import pandas as pd
+
+import lotus
+from lotus.models import LM
+
+# Global model used for most pipeline steps
+lm = LM(model="gpt-4o")
+# Cheaper/faster model for a high-volume intermediate step
+cheap_lm = LM(model="gpt-4o-mini")
+
+lotus.settings.configure(lm=lm)
+
+data = {
+    "Paper Title": [
+        "Attention Is All You Need",
+        "BERT: Pre-training of Deep Bidirectional Transformers",
+        "Deep Residual Learning for Image Recognition",
+        "Generative Adversarial Networks",
+        "Neural Machine Translation by Jointly Learning to Align and Translate",
+    ]
+}
+df = pd.DataFrame(data)
+
+# Step 1: Use the cheap model for a coarse filter (high volume, low stakes)
+with lotus.settings.context(lm=cheap_lm):
+    df = df.sem_filter("Is {Paper Title} related to natural language processing?")
+    print(f"After NLP filter ({len(df)} papers remaining):")
+    print(df["Paper Title"].tolist())
+
+# Step 2: Back to the global (high-quality) model for the final summarization
+df = df.sem_map("Write a one-sentence summary of the contributions of {Paper Title}.")
+print("\nSummaries (generated with gpt-4o):")
+print(df)