sambanova · snova-varunkrishna · Apr 10, 2026 · Apr 10, 2026
diff --git a/langgraph/agentic_graph_rag/README.md b/langgraph/agentic_graph_rag/README.md
@@ -38,13 +38,18 @@ Get up and running in 5 minutes!
 
 - [ ] Python 3.11 or higher installed
 - [ ] Neo4j running with synthea-sample database
+  - Download the synthea sample from [here](https://drive.google.com/file/d/15WQWmEkHTB71H3OZ_3Kbb_OKBvM9tbVz/view?usp=drive_link)
+  - Create a database called "synthea-sample" and load the data from the above `.dump` file
+  - Install the **Graph Data Science (GDS) plugin** in Neo4j (required for patient similarity search)
+    - In Neo4j Desktop: open your project → click the database → go to the **Plugins** tab → install **Graph Data Science Library**
+    - For Neo4j AuraDB: enable GDS from the instance settings in the Aura console
 - [ ] LLM API key (Anthropic or SambaNova)
 
 ### Installation Steps
 
 1. **Navigate to Project Directory**
 ```bash
-cd agentic_graph_rag
+cd agentic_graph_rag/backend
 ```
 
 2. **Create Virtual Environment**
@@ -105,7 +110,7 @@ APP_DEBUG_LOGGING=false
 
 5. **Test Connection**
 ```bash
-python test_connection.py
+python tests/test_connection.py
 ```
 
 You should see:
@@ -117,15 +122,10 @@ You should see:
 
 6. **Start Server**
 
-**Option A: Using the startup script (Linux/Mac)**
-```bash
-./start_server.sh
-```
+The following script runs tests for required prerequisites and then starts the server.
 
-**Option B: Manual start**
 ```bash
-cd backend
-python server.py
+./start_server.sh
 ```
 
 7. **Access the Application**

diff --git a/langgraph/agentic_graph_rag/backend/.env.example b/langgraph/agentic_graph_rag/backend/.env.example
@@ -13,10 +13,6 @@ SAMBANOVA_API_KEY="your-sambanova-api-key-here"
 # Get your key from: https://console.anthropic.com/
 ANTHROPIC_API_DEV_KEY="sk-ant-api03-your-anthropic-api-key-here"
 
-# Fireworks API Key (optional, if using Fireworks provider)
-# Get your key from: https://fireworks.ai/
-FIREWORKS_API_KEY="your-fireworks-api-key-here"
-
 # ============================================================================
 # Provider Configuration
 # ============================================================================

diff --git a/langgraph/agentic_graph_rag/backend/README_EMBEDDINGS.md b/langgraph/agentic_graph_rag/backend/README_EMBEDDINGS.md
@@ -43,7 +43,6 @@ If successful, it will return the GDS version number.
 Once GDS is installed, run the script:
 
 ```bash
-cd /Users/varunbk/repo/ai-starter-kit/agentic_graph_rag/backend
 source ../.venv/bin/activate
 python patient_similarity_embeddings.py
 ```

diff --git a/langgraph/agentic_graph_rag/backend/agent.py b/langgraph/agentic_graph_rag/backend/agent.py
@@ -954,12 +954,32 @@ def call_model(state: AgentState) -> dict:
 
         # Filter messages for LLM: keep only HumanMessage and AIMessage from previous turns
         # Keep ALL message types from the current turn
+        # IMPORTANT: strip tool_calls from previous-turn AIMessages to avoid Anthropic's
+        # requirement that every tool_use be immediately followed by a tool_result.
         filtered_messages = []
         for i, msg in enumerate(messages):
             if i <= (last_human_idx or 0):
                 # For previous turns and the current HumanMessage, keep only Human and AI messages
-                if isinstance(msg, (HumanMessage, AIMessage)):
+                if isinstance(msg, HumanMessage):
                     filtered_messages.append(msg)
+                elif isinstance(msg, AIMessage):
+                    if msg.tool_calls:
+                        # Strip tool_use blocks from content so Anthropic doesn't expect orphaned tool_results.
+                        # langchain-anthropic stores tool_use as dicts inside the content list.
+                        clean_content = msg.content
+                        if isinstance(clean_content, list):
+                            clean_content = [
+                                block for block in clean_content
+                                if not (isinstance(block, dict) and block.get("type") == "tool_use")
+                            ]
+                            if not clean_content:
+                                clean_content = ""
+                        # Only include the message if there is remaining text content
+                        if clean_content:
+                            filtered_messages.append(AIMessage(content=clean_content))
+                        # Otherwise skip — the synthesis AIMessage for this turn carries the answer
+                    else:
+                        filtered_messages.append(msg)
             else:
                 # For current turn after HumanMessage, keep all messages (including ToolMessage)
                 filtered_messages.append(msg)

diff --git a/langgraph/agentic_graph_rag/backend/patient_similarity_embeddings.py b/langgraph/agentic_graph_rag/backend/patient_similarity_embeddings.py
@@ -525,14 +525,16 @@ def find_similar_patients(
         has_space = ' ' in patient_name.strip()
 
         if has_space:
-            # Full name search
-            # Fetch 2x k results to account for bidirectional duplicates
+            # Full name search — undirected match to include both outgoing and incoming edges,
+            # deduplicated in Cypher via max() so bidirectional pairs count only once.
             query = f"""
                 MATCH (p:Patient)
                 WHERE toLower(p.firstName + ' ' + p.lastName) CONTAINS toLower($patient_name)
                 WITH p LIMIT 1
                 MATCH (p)-[sim:{similarity_type}]-(similar:Patient)
-                WHERE id(p) <> id(similar)
+                WITH p,
+                     similar,
+                     max(sim.similarityScore) AS similarity_score
                 RETURN
                     p.firstName + ' ' + p.lastName AS source_patient,
                     similar.firstName + ' ' + similar.lastName AS similar_patient,
@@ -541,20 +543,21 @@ def find_similar_patients(
                     similar.procedureCount AS procedure_count,
                     similar.drugCount AS drug_count,
                     similar.expenses AS expenses,
-                    sim.similarityScore AS similarity_score
-                ORDER BY sim.similarityScore DESC
+                    similarity_score
+                ORDER BY similarity_score DESC
                 LIMIT $fetch_limit
             """
         else:
-            # Single name search
-            # Fetch 2x k results to account for bidirectional duplicates
+            # Single name search — undirected match, deduplicated in Cypher via max().
             query = f"""
                 MATCH (p:Patient)
                 WHERE toLower(p.firstName) CONTAINS toLower($patient_name)
                    OR toLower(p.lastName) CONTAINS toLower($patient_name)
                 WITH p LIMIT 1
                 MATCH (p)-[sim:{similarity_type}]-(similar:Patient)
-                WHERE id(p) <> id(similar)
+                WITH p,
+                     similar,
+                     max(sim.similarityScore) AS similarity_score
                 RETURN
                     p.firstName + ' ' + p.lastName AS source_patient,
                     similar.firstName + ' ' + similar.lastName AS similar_patient,
@@ -563,27 +566,16 @@ def find_similar_patients(
                     similar.procedureCount AS procedure_count,
                     similar.drugCount AS drug_count,
                     similar.expenses AS expenses,
-                    sim.similarityScore AS similarity_score
-                ORDER BY sim.similarityScore DESC
+                    similarity_score
+                ORDER BY similarity_score DESC
                 LIMIT $fetch_limit
             """
 
         with self.driver.session(database=self.database) as session:
-            # Fetch 2x k results to account for bidirectional duplicates
-            fetch_limit = k * 2
-            result = session.run(query, {"patient_name": patient_name, "k": k, "fetch_limit": fetch_limit})
+            result = session.run(query, {"patient_name": patient_name, "k": k, "fetch_limit": k})
             records = [dict(record) for record in result]
 
-        # Deduplicate by similar_patient name (in case of bidirectional relationships)
-        seen_patients = set()
-        unique_records = []
-        for record in records:
-            patient_name_key = record['similar_patient']
-            if patient_name_key not in seen_patients:
-                seen_patients.add(patient_name_key)
-                unique_records.append(record)
-                if len(unique_records) >= k:
-                    break
+        unique_records = records
 
         if unique_records:
             logger.info(f"\nFound {len(unique_records)} similar patients:")

diff --git a/langgraph/agentic_graph_rag/backend/server.py b/langgraph/agentic_graph_rag/backend/server.py
@@ -2,6 +2,7 @@
 FastAPI server for Synthea chatbot application.
 """
 import os
+import traceback
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
@@ -167,6 +168,7 @@ async def chat(request: ChatRequest):
         )
 
     except Exception as e:
+        traceback.print_exc()
         raise HTTPException(
             status_code=500,
             detail=f"Error processing chat request: {str(e)}"

diff --git a/langgraph/agentic_graph_rag/backend/start_server.sh b/langgraph/agentic_graph_rag/backend/start_server.sh
@@ -39,6 +39,33 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
+# Test GDS plugin
+echo ""
+echo "🔄 Testing Neo4j GDS plugin..."
+python tests/test_gds_plugin.py
+
+if [ $? -ne 0 ]; then
+    echo ""
+    echo "⚠️  Neo4j Graph Data Science plugin is not installed."
+    echo "Patient similarity search (find_similar_patients) will not work."
+    echo "See README.md for installation instructions."
+    echo ""
+    echo "Press Enter to continue without GDS, or Ctrl+C to abort..."
+    read
+fi
+
+# Test LLM provider
+echo ""
+echo "🔄 Testing LLM provider..."
+python tests/test_provider.py
+
+if [ $? -ne 0 ]; then
+    echo ""
+    echo "❌ LLM provider test failed!"
+    echo "Please check your API key in .env."
+    exit 1
+fi
+
 echo ""
 echo "✅ All checks passed!"
 echo ""

diff --git a/langgraph/agentic_graph_rag/backend/tests/test_connection.py b/langgraph/agentic_graph_rag/backend/tests/test_connection.py
@@ -2,7 +2,8 @@
 Test script to verify Neo4j connection and basic queries.
 """
 import sys
-sys.path.append('..')
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 
 from neo4j_utils import Neo4jConnection
 
@@ -16,15 +17,15 @@ def test_connection():
 
         # Test a simple query
         print("\nTesting basic query...")
-        result = conn.execute_query("MATCH (p:Patient) RETURN count(p) as patient_count")
-        if result:
-            print(f"✓ Query successful: Found {result[0]['patient_count']} patients")
+        records, _, _ = conn.execute_query("MATCH (p:Patient) RETURN count(p) as patient_count")
+        if records:
+            print(f"✓ Query successful: Found {records[0]['patient_count']} patients")
         else:
             print("✗ Query returned no results")
 
         # Test patient search
         print("\nTesting patient search...")
-        patients = conn.search_patients("Ethan")
+        patients, _, _ = conn.search_patients("Ethan")
         if patients:
             print(f"✓ Found {len(patients)} patients matching 'Ethan'")
             for p in patients[:3]:
@@ -34,7 +35,7 @@ def test_connection():
 
         # Test procedures query
         print("\nTesting procedures query for 'Ethan766'...")
-        procedures = conn.get_patient_procedures("Ethan766")
+        procedures, _, _ = conn.get_patient_procedures("Ethan766")
         if procedures:
             print(f"✓ Found {len(procedures)} procedures")
             print(f"  Sample: {procedures[0]['procedure_description']}")

diff --git a/langgraph/agentic_graph_rag/backend/tests/test_gds_plugin.py b/langgraph/agentic_graph_rag/backend/tests/test_gds_plugin.py
@@ -0,0 +1,46 @@
+"""
+Test script to verify the Neo4j Graph Data Science (GDS) plugin is installed.
+Required for patient similarity search (find_similar_patients tool).
+"""
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from neo4j_utils import Neo4jConnection
+
+
+def test_gds_plugin():
+    """Check that the GDS plugin is installed and accessible."""
+    print("Testing Neo4j Graph Data Science (GDS) plugin...")
+
+    try:
+        conn = Neo4jConnection()
+        conn.connect()
+
+        records, _, _ = conn.execute_query("RETURN gds.version() AS version")
+        if records:
+            version = records[0]["version"]
+            print(f"✓ GDS plugin is installed (version {version})")
+            conn.close()
+            return True
+        else:
+            print("✗ GDS plugin query returned no results")
+            conn.close()
+            return False
+
+    except Exception as e:
+        error = str(e)
+        if "gds" in error.lower() or "not correctly installed" in error.lower() or "unknown function" in error.lower():
+            print("✗ GDS plugin is NOT installed")
+            print("\nTo install the Graph Data Science plugin:")
+            print("  Neo4j Desktop : open your project → select the database → Plugins tab → install 'Graph Data Science Library'")
+            print("  Neo4j AuraDB  : enable GDS from the instance settings in the Aura console")
+            print("\nThe GDS plugin is required for patient similarity search.")
+        else:
+            print(f"✗ Error checking GDS plugin: {error}")
+        return False
+
+
+if __name__ == "__main__":
+    success = test_gds_plugin()
+    sys.exit(0 if success else 1)
diff --git a/langgraph/agentic_graph_rag/backend/tests/test_provider.py b/langgraph/agentic_graph_rag/backend/tests/test_provider.py
@@ -0,0 +1,43 @@
+"""
+Test script to verify the configured LLM provider and API key are working.
+"""
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+from dotenv import load_dotenv
+load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
+
+from llm_factory import create_llm, get_provider, get_main_agent_model
+
+
+def test_provider():
+    """Test the configured LLM provider with a simple prompt."""
+    provider = get_provider()
+    model = get_main_agent_model()
+
+    print(f"Testing LLM provider...")
+    print(f"  Provider : {provider}")
+    print(f"  Model    : {model}")
+
+    try:
+        llm = create_llm(model=model)
+        response = llm.invoke("1+1=")
+        answer = response.content.strip()
+        print(f"✓ API key is valid. Response to '1+1=': {answer}")
+        return True
+
+    except Exception as e:
+        print(f"\n✗ Error: {e}")
+        print("\nPlease check:")
+        print(f"1. PROVIDER is set correctly in .env (current: '{provider}')")
+        if provider == "anthropic":
+            print("2. ANTHROPIC_API_DEV_KEY is set to a valid key in .env")
+        else:
+            print("2. SAMBANOVA_API_KEY is set to a valid key in .env")
+        return False
+
+
+if __name__ == "__main__":
+    success = test_provider()
+    sys.exit(0 if success else 1)
diff --git a/..._rag/backend/tests/test_query_tracking.py → ...h_rag/backend/tests/test_query_tracing.py b/..._rag/backend/tests/test_query_tracking.py → ...h_rag/backend/tests/test_query_tracing.py
diff --git a/langgraph/agentic_graph_rag/frontend/index.html b/langgraph/agentic_graph_rag/frontend/index.html
@@ -682,7 +682,7 @@ <h1>🏥 Synthea Medical Database Assistant</h1>
             <button onclick="fillExample('What medications is Ethan766 taking?')">Patient medications</button>
             <button onclick="fillExample('What diagnoses does Ethan766 have?')">Patient diagnoses</button>
             <button onclick="fillExample('Show me all encounters for Ethan766')">Patient encounters</button>
-            <button onclick="fillExample('Find patients who are similar to Dudley365 Spencer878')">Similar patients</button>
+            <button onclick="fillExample('Find the top 5 most similar patients to Dudley365 Spencer878')">Similar patients</button>
             <button onclick="fillExample('What is the database schema?')">Database schema</button>
             <button onclick="fillExample('How many patients are in the database?')">Patient count</button>
             <button onclick="fillExample('Which procedures were performed most frequently? Show top 10.')">Top procedures</button>