release: 1.5.0 (#370)

stainless-app[bot] · Vivek Nair · web-flow · commit b28532ff6fe1 · 2025-08-11T22:02:45.000-04:00
* feat(progress): show experiment URL in reporters (#369) * release: 1.5.0 --------- Co-authored-by: Vivek Nair <vivek@gentrace.ai> Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.4.1"
+  ".": "1.5.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.5.0 (2025-08-12)
+
+Full Changelog: [v1.4.1...v1.5.0](https://github.com/gentrace/gentrace-python/compare/v1.4.1...v1.5.0)
+
+### Features
+
+* **progress:** show experiment URL in reporters ([#369](https://github.com/gentrace/gentrace-python/issues/369)) ([5a48f44](https://github.com/gentrace/gentrace-python/commit/5a48f445af2131ceb2d758efff95826b24c8ac57))
+
 ## 1.4.1 (2025-08-11)
 
 Full Changelog: [v1.4.0...v1.4.1](https://github.com/gentrace/gentrace-python/compare/v1.4.0...v1.4.1)
diff --git a/examples/eval_dataset_local_cases.py b/examples/eval_dataset_local_cases.py
@@ -44,9 +44,15 @@ async def dataset_evaluation() -> None:
 
     # Using TestInput with TypedDict for type safety
     test_cases = [
-        TestInput[PromptInputs](name="greeting", inputs={"prompt": "Hello! How are you doing today?"}),
-        TestInput[PromptInputs](name="factual_question", inputs={"prompt": "What is the capital of France?"}),
-        TestInput[PromptInputs](name="math_problem", inputs={"prompt": "What is 25 * 4?"}),
+        TestInput[PromptInputs](
+            name="greeting", inputs={"prompt": "Hello! How are you doing today?"}
+        ),
+        TestInput[PromptInputs](
+            name="factual_question", inputs={"prompt": "What is the capital of France?"}
+        ),
+        TestInput[PromptInputs](
+            name="math_problem", inputs={"prompt": "What is 25 * 4?"}
+        ),
         TestInput[PromptInputs](
             name="creative_writing",
             inputs={"prompt": "Write a haiku about artificial intelligence"},
@@ -64,7 +70,4 @@ async def dataset_evaluation() -> None:
 
 
 if __name__ == "__main__":
-    result = asyncio.run(dataset_evaluation())
-
-    if result:
-        print(f"Experiment URL: {result.url}")
+    asyncio.run(dataset_evaluation())
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "gentrace-py"
-version = "1.4.1"
+version = "1.5.0"
 description = "The official Python library for the gentrace API"
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/gentrace/_version.py b/src/gentrace/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "gentrace"
-__version__ = "1.4.1"  # x-release-please-version
+__version__ = "1.5.0"  # x-release-please-version
diff --git a/src/gentrace/lib/eval_dataset.py b/src/gentrace/lib/eval_dataset.py
@@ -458,8 +458,9 @@ async def eval_dataset(
             logger.setLevel(logging.INFO)
         progress_reporter = SimpleProgressReporter(logger)
 
-    # Start progress reporting
-    progress_reporter.start(experiment_context["pipeline_id"], len(converted_test_cases))
+    # Start progress reporting with experiment URL if available
+    experiment_url = experiment_context.get("experiment_url")
+    progress_reporter.start(experiment_context["pipeline_id"], len(converted_test_cases), experiment_url)
 
     evaluation_tasks: List[Tuple[str, Awaitable[Optional[TResult]]]] = []
     for i, test_case in enumerate(converted_test_cases):
diff --git a/src/gentrace/lib/experiment.py b/src/gentrace/lib/experiment.py
@@ -20,12 +20,13 @@
 class ExperimentContext(TypedDict):
     """
     Represents the context for an experiment run. This context is stored in
-    a ContextVar to make the experiment ID and pipeline ID available throughout
+    a ContextVar to make the experiment ID, pipeline ID, and URL available throughout
     the asynchronous execution flow.
     """
 
     experiment_id: str
     pipeline_id: str
+    experiment_url: Optional[str]  # URL to view the experiment in the Gentrace UI
 
 
 experiment_context_var: contextvars.ContextVar[Optional[ExperimentContext]] = contextvars.ContextVar(
@@ -182,9 +183,24 @@ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> ExperimentResult:
             if not experiment_obj:
                 raise RuntimeError("Failed to obtain experiment from API.")
 
+            # Construct the experiment URL early so it can be displayed immediately
+            # Get the client to access base_url
+            client = _get_async_client_instance()
+            base_url = str(client.base_url).rstrip('/')
+            
+            # Extract hostname from base URL (remove /api suffix if present)
+            if base_url.endswith('/api'):
+                hostname = base_url[:-4]
+            else:
+                hostname = base_url
+            
+            # Construct the URL using resource_path
+            experiment_url = f"{hostname}{experiment_obj.resource_path}"
+
             context_data: ExperimentContext = {
                 "experiment_id": experiment_obj.id,
                 "pipeline_id": effective_pipeline_id,
+                "experiment_url": experiment_url,
             }
 
             token = experiment_context_var.set(context_data)
@@ -200,26 +216,13 @@ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> ExperimentResult:
                 if experiment_obj:
                     await finish_experiment_api(id=experiment_obj.id)
 
-            # Get the client to access base_url
-            client = _get_async_client_instance()
-            base_url = str(client.base_url).rstrip('/')
-            
-            # Extract hostname from base URL (remove /api suffix if present)
-            if base_url.endswith('/api'):
-                hostname = base_url[:-4]
-            else:
-                hostname = base_url
-            
-            # Construct the URL using resource_path
-            url = f"{hostname}{experiment_obj.resource_path}"
-            
             # Create ExperimentResult instance with all fields from experiment plus URL
             # Use model_dump with by_alias=True to get camelCase field names
             experiment_data = experiment_obj.model_dump(by_alias=True)
             
             result = ExperimentResult(
                 **experiment_data,
-                url=url
+                url=experiment_url  # Use the URL we constructed earlier
             )
             
             return result
diff --git a/src/gentrace/lib/progress.py b/src/gentrace/lib/progress.py
@@ -34,13 +34,14 @@ class ProgressReporter(ABC):
     """
 
     @abstractmethod
-    def start(self, pipeline_id: str, total: int) -> None:
+    def start(self, pipeline_id: str, total: int, experiment_url: Optional[str] = None) -> None:
         """
         Initialize the progress reporter for a new evaluation run.
 
         Args:
             pipeline_id: The ID of the pipeline being evaluated.
             total: The total number of test cases to be executed.
+            experiment_url: Optional URL to view the experiment in the Gentrace UI.
         """
         pass
 
@@ -93,14 +94,18 @@ def __init__(self, logger: Optional[logging.Logger] = None) -> None:
         self.logger = logger if logger is not None else logging.getLogger("gentrace")
 
     @override
-    def start(self, pipeline_id: str, total: int) -> None:
+    def start(self, pipeline_id: str, total: int, experiment_url: Optional[str] = None) -> None:
         """Initialize a new evaluation run with line-by-line output."""
         self.pipeline_id = pipeline_id
         self.total = total
         self.current = 0
 
         message = f"\nRunning experiment with {total} test {'case' if total == 1 else 'cases'}..."
         self.logger.info(message)
+        
+        # Display the experiment URL if available
+        if experiment_url:
+            self.logger.info(f"Experiment URL: {experiment_url}")
 
     def update_current_test(self, test_name: str) -> None:
         """
@@ -154,6 +159,7 @@ def __init__(self) -> None:
         self.completed_count = 0
         self.total_count = 0
         self.last_completed_test = ""
+        self.experiment_url: Optional[str] = None
 
     def _create_display(self) -> Table:
         """Create the display table with current test info and progress bar."""
@@ -178,12 +184,20 @@ def _create_display(self) -> Table:
         return table
 
     @override
-    def start(self, pipeline_id: str, total: int) -> None:
+    def start(self, pipeline_id: str, total: int, experiment_url: Optional[str] = None) -> None:
         """Initialize a new progress bar for the evaluation run."""
         self.total_count = total
         self.completed_count = 0
         self.current_test_name = ""
         self.last_completed_test = ""
+        self.experiment_url = experiment_url
+        
+        # Print the experiment URL separately before starting the Live display
+        # Using Rich's hyperlink markup for clickable links in supported terminals
+        if experiment_url:
+            # Use Rich's link markup to make the URL clickable in supported terminals
+            self.console.print(f"[bold cyan]Experiment:[/bold cyan] [link={experiment_url}]{experiment_url}[/link]", crop=False, overflow="ignore")
+            self.console.print()  # Add spacing
 
         # Create progress bar without description in the bar itself
         self.progress = Progress(
diff --git a/tests/lib/test_progress.py b/tests/lib/test_progress.py
@@ -226,6 +226,20 @@ def test_stop(self) -> None:
 
         mock_logger.info.assert_called_once_with("Evaluation complete.")
 
+    def test_start_with_url(self) -> None:
+        """Test starting with an experiment URL."""
+        mock_logger = Mock(spec=logging.Logger)
+        reporter = SimpleProgressReporter(logger=mock_logger)
+        
+        test_url = "https://gentrace.ai/t/org/pipeline/123/experiments/abc"
+        reporter.start("pipeline-123", 5, test_url)
+        
+        # Verify both the start message and URL were logged
+        assert mock_logger.info.call_count == 2
+        calls = mock_logger.info.call_args_list
+        assert "Running experiment with 5 test cases..." in calls[0][0][0]
+        assert f"Experiment URL: {test_url}" in calls[1][0][0]
+
     def test_full_lifecycle(self) -> None:
         """Test complete lifecycle of progress reporting."""
         mock_logger = Mock(spec=logging.Logger)
@@ -387,6 +401,30 @@ def test_stop(self, mock_progress_class: Any, mock_console_class: Any, mock_live
                 break
         assert found_complete_msg
 
+    @patch("gentrace.lib.progress.Live")
+    @patch("gentrace.lib.progress.Console")
+    @patch("gentrace.lib.progress.Progress")
+    def test_start_with_url(self, mock_progress_class: Any, _mock_console_class: Any, mock_live_class: Any) -> None:
+        """Test starting with an experiment URL."""
+        mock_progress = Mock()
+        mock_task_id = 999
+        mock_progress.add_task.return_value = mock_task_id
+        mock_progress_class.return_value = mock_progress
+        
+        mock_live = Mock()
+        mock_live_class.return_value = mock_live
+
+        reporter = RichProgressReporter()
+        test_url = "https://gentrace.ai/t/org/pipeline/456/experiments/def"
+        
+        # Mock _create_display to avoid rendering issues with mocked objects
+        with patch.object(reporter, '_create_display', return_value=Mock()):
+            reporter.start("pipeline-456", 10, test_url)
+
+        assert reporter.experiment_url == test_url
+        assert reporter.total_count == 10
+        mock_live.start.assert_called_once()
+
     @patch("gentrace.lib.progress.Live")
     @patch("gentrace.lib.progress.Console")
     @patch("gentrace.lib.progress.Progress")

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- ".": "1.4.1"`
	`2`	`+ ".": "1.5.0"`
`3`	`3`	`}`