diff --git a/langgraph/agentic_graph_rag/README.md b/langgraph/agentic_graph_rag/README.md index 5896b3a..4dd77a0 100644 --- a/langgraph/agentic_graph_rag/README.md +++ b/langgraph/agentic_graph_rag/README.md @@ -38,13 +38,18 @@ Get up and running in 5 minutes! - [ ] Python 3.11 or higher installed - [ ] Neo4j running with synthea-sample database + - Download the synthea sample from [here](https://drive.google.com/file/d/15WQWmEkHTB71H3OZ_3Kbb_OKBvM9tbVz/view?usp=drive_link) + - Create a database called "synthea-sample" and load the data from the above `.dump` file + - Install the **Graph Data Science (GDS) plugin** in Neo4j (required for patient similarity search) + - In Neo4j Desktop: open your project → click the database → go to the **Plugins** tab → install **Graph Data Science Library** + - For Neo4j AuraDB: enable GDS from the instance settings in the Aura console - [ ] LLM API key (Anthropic or SambaNova) ### Installation Steps 1. **Navigate to Project Directory** ```bash -cd agentic_graph_rag +cd agentic_graph_rag/backend ``` 2. **Create Virtual Environment** @@ -105,7 +110,7 @@ APP_DEBUG_LOGGING=false 5. **Test Connection** ```bash -python test_connection.py +python tests/test_connection.py ``` You should see: @@ -117,15 +122,10 @@ You should see: 6. **Start Server** -**Option A: Using the startup script (Linux/Mac)** -```bash -./start_server.sh -``` +The following script runs tests for required prerequisites and then starts the server. -**Option B: Manual start** ```bash -cd backend -python server.py +./start_server.sh ``` 7. **Access the Application** diff --git a/langgraph/agentic_graph_rag/backend/.env.example b/langgraph/agentic_graph_rag/backend/.env.example index 2a270bb..6a6a99f 100644 --- a/langgraph/agentic_graph_rag/backend/.env.example +++ b/langgraph/agentic_graph_rag/backend/.env.example @@ -13,10 +13,6 @@ SAMBANOVA_API_KEY="your-sambanova-api-key-here" # Get your key from: https://console.anthropic.com/ ANTHROPIC_API_DEV_KEY="sk-ant-api03-your-anthropic-api-key-here" -# Fireworks API Key (optional, if using Fireworks provider) -# Get your key from: https://fireworks.ai/ -FIREWORKS_API_KEY="your-fireworks-api-key-here" - # ============================================================================ # Provider Configuration # ============================================================================ diff --git a/langgraph/agentic_graph_rag/backend/README_EMBEDDINGS.md b/langgraph/agentic_graph_rag/backend/README_EMBEDDINGS.md index 4df2ba3..1fefc39 100644 --- a/langgraph/agentic_graph_rag/backend/README_EMBEDDINGS.md +++ b/langgraph/agentic_graph_rag/backend/README_EMBEDDINGS.md @@ -43,7 +43,6 @@ If successful, it will return the GDS version number. Once GDS is installed, run the script: ```bash -cd /Users/varunbk/repo/ai-starter-kit/agentic_graph_rag/backend source ../.venv/bin/activate python patient_similarity_embeddings.py ``` diff --git a/langgraph/agentic_graph_rag/backend/agent.py b/langgraph/agentic_graph_rag/backend/agent.py index 1ae1758..9ca50a2 100644 --- a/langgraph/agentic_graph_rag/backend/agent.py +++ b/langgraph/agentic_graph_rag/backend/agent.py @@ -954,12 +954,32 @@ def call_model(state: AgentState) -> dict: # Filter messages for LLM: keep only HumanMessage and AIMessage from previous turns # Keep ALL message types from the current turn + # IMPORTANT: strip tool_calls from previous-turn AIMessages to avoid Anthropic's + # requirement that every tool_use be immediately followed by a tool_result. filtered_messages = [] for i, msg in enumerate(messages): if i <= (last_human_idx or 0): # For previous turns and the current HumanMessage, keep only Human and AI messages - if isinstance(msg, (HumanMessage, AIMessage)): + if isinstance(msg, HumanMessage): filtered_messages.append(msg) + elif isinstance(msg, AIMessage): + if msg.tool_calls: + # Strip tool_use blocks from content so Anthropic doesn't expect orphaned tool_results. + # langchain-anthropic stores tool_use as dicts inside the content list. + clean_content = msg.content + if isinstance(clean_content, list): + clean_content = [ + block for block in clean_content + if not (isinstance(block, dict) and block.get("type") == "tool_use") + ] + if not clean_content: + clean_content = "" + # Only include the message if there is remaining text content + if clean_content: + filtered_messages.append(AIMessage(content=clean_content)) + # Otherwise skip — the synthesis AIMessage for this turn carries the answer + else: + filtered_messages.append(msg) else: # For current turn after HumanMessage, keep all messages (including ToolMessage) filtered_messages.append(msg) diff --git a/langgraph/agentic_graph_rag/backend/patient_similarity_embeddings.py b/langgraph/agentic_graph_rag/backend/patient_similarity_embeddings.py index 360159a..2a73a93 100644 --- a/langgraph/agentic_graph_rag/backend/patient_similarity_embeddings.py +++ b/langgraph/agentic_graph_rag/backend/patient_similarity_embeddings.py @@ -525,14 +525,16 @@ def find_similar_patients( has_space = ' ' in patient_name.strip() if has_space: - # Full name search - # Fetch 2x k results to account for bidirectional duplicates + # Full name search — undirected match to include both outgoing and incoming edges, + # deduplicated in Cypher via max() so bidirectional pairs count only once. query = f""" MATCH (p:Patient) WHERE toLower(p.firstName + ' ' + p.lastName) CONTAINS toLower($patient_name) WITH p LIMIT 1 MATCH (p)-[sim:{similarity_type}]-(similar:Patient) - WHERE id(p) <> id(similar) + WITH p, + similar, + max(sim.similarityScore) AS similarity_score RETURN p.firstName + ' ' + p.lastName AS source_patient, similar.firstName + ' ' + similar.lastName AS similar_patient, @@ -541,20 +543,21 @@ def find_similar_patients( similar.procedureCount AS procedure_count, similar.drugCount AS drug_count, similar.expenses AS expenses, - sim.similarityScore AS similarity_score - ORDER BY sim.similarityScore DESC + similarity_score + ORDER BY similarity_score DESC LIMIT $fetch_limit """ else: - # Single name search - # Fetch 2x k results to account for bidirectional duplicates + # Single name search — undirected match, deduplicated in Cypher via max(). query = f""" MATCH (p:Patient) WHERE toLower(p.firstName) CONTAINS toLower($patient_name) OR toLower(p.lastName) CONTAINS toLower($patient_name) WITH p LIMIT 1 MATCH (p)-[sim:{similarity_type}]-(similar:Patient) - WHERE id(p) <> id(similar) + WITH p, + similar, + max(sim.similarityScore) AS similarity_score RETURN p.firstName + ' ' + p.lastName AS source_patient, similar.firstName + ' ' + similar.lastName AS similar_patient, @@ -563,27 +566,16 @@ def find_similar_patients( similar.procedureCount AS procedure_count, similar.drugCount AS drug_count, similar.expenses AS expenses, - sim.similarityScore AS similarity_score - ORDER BY sim.similarityScore DESC + similarity_score + ORDER BY similarity_score DESC LIMIT $fetch_limit """ with self.driver.session(database=self.database) as session: - # Fetch 2x k results to account for bidirectional duplicates - fetch_limit = k * 2 - result = session.run(query, {"patient_name": patient_name, "k": k, "fetch_limit": fetch_limit}) + result = session.run(query, {"patient_name": patient_name, "k": k, "fetch_limit": k}) records = [dict(record) for record in result] - # Deduplicate by similar_patient name (in case of bidirectional relationships) - seen_patients = set() - unique_records = [] - for record in records: - patient_name_key = record['similar_patient'] - if patient_name_key not in seen_patients: - seen_patients.add(patient_name_key) - unique_records.append(record) - if len(unique_records) >= k: - break + unique_records = records if unique_records: logger.info(f"\nFound {len(unique_records)} similar patients:") diff --git a/langgraph/agentic_graph_rag/backend/server.py b/langgraph/agentic_graph_rag/backend/server.py index a391cb7..6dbb0ce 100644 --- a/langgraph/agentic_graph_rag/backend/server.py +++ b/langgraph/agentic_graph_rag/backend/server.py @@ -2,6 +2,7 @@ FastAPI server for Synthea chatbot application. """ import os +import traceback from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles @@ -167,6 +168,7 @@ async def chat(request: ChatRequest): ) except Exception as e: + traceback.print_exc() raise HTTPException( status_code=500, detail=f"Error processing chat request: {str(e)}" diff --git a/langgraph/agentic_graph_rag/backend/start_server.sh b/langgraph/agentic_graph_rag/backend/start_server.sh index e863665..feab139 100755 --- a/langgraph/agentic_graph_rag/backend/start_server.sh +++ b/langgraph/agentic_graph_rag/backend/start_server.sh @@ -39,6 +39,33 @@ if [ $? -ne 0 ]; then exit 1 fi +# Test GDS plugin +echo "" +echo "🔄 Testing Neo4j GDS plugin..." +python tests/test_gds_plugin.py + +if [ $? -ne 0 ]; then + echo "" + echo "⚠️ Neo4j Graph Data Science plugin is not installed." + echo "Patient similarity search (find_similar_patients) will not work." + echo "See README.md for installation instructions." + echo "" + echo "Press Enter to continue without GDS, or Ctrl+C to abort..." + read +fi + +# Test LLM provider +echo "" +echo "🔄 Testing LLM provider..." +python tests/test_provider.py + +if [ $? -ne 0 ]; then + echo "" + echo "❌ LLM provider test failed!" + echo "Please check your API key in .env." + exit 1 +fi + echo "" echo "✅ All checks passed!" echo "" diff --git a/langgraph/agentic_graph_rag/backend/tests/test_connection.py b/langgraph/agentic_graph_rag/backend/tests/test_connection.py index aea2771..6882986 100644 --- a/langgraph/agentic_graph_rag/backend/tests/test_connection.py +++ b/langgraph/agentic_graph_rag/backend/tests/test_connection.py @@ -2,7 +2,8 @@ Test script to verify Neo4j connection and basic queries. """ import sys -sys.path.append('..') +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from neo4j_utils import Neo4jConnection @@ -16,15 +17,15 @@ def test_connection(): # Test a simple query print("\nTesting basic query...") - result = conn.execute_query("MATCH (p:Patient) RETURN count(p) as patient_count") - if result: - print(f"✓ Query successful: Found {result[0]['patient_count']} patients") + records, _, _ = conn.execute_query("MATCH (p:Patient) RETURN count(p) as patient_count") + if records: + print(f"✓ Query successful: Found {records[0]['patient_count']} patients") else: print("✗ Query returned no results") # Test patient search print("\nTesting patient search...") - patients = conn.search_patients("Ethan") + patients, _, _ = conn.search_patients("Ethan") if patients: print(f"✓ Found {len(patients)} patients matching 'Ethan'") for p in patients[:3]: @@ -34,7 +35,7 @@ def test_connection(): # Test procedures query print("\nTesting procedures query for 'Ethan766'...") - procedures = conn.get_patient_procedures("Ethan766") + procedures, _, _ = conn.get_patient_procedures("Ethan766") if procedures: print(f"✓ Found {len(procedures)} procedures") print(f" Sample: {procedures[0]['procedure_description']}") diff --git a/langgraph/agentic_graph_rag/backend/tests/test_gds_plugin.py b/langgraph/agentic_graph_rag/backend/tests/test_gds_plugin.py new file mode 100644 index 0000000..fd3534f --- /dev/null +++ b/langgraph/agentic_graph_rag/backend/tests/test_gds_plugin.py @@ -0,0 +1,46 @@ +""" +Test script to verify the Neo4j Graph Data Science (GDS) plugin is installed. +Required for patient similarity search (find_similar_patients tool). +""" +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from neo4j_utils import Neo4jConnection + + +def test_gds_plugin(): + """Check that the GDS plugin is installed and accessible.""" + print("Testing Neo4j Graph Data Science (GDS) plugin...") + + try: + conn = Neo4jConnection() + conn.connect() + + records, _, _ = conn.execute_query("RETURN gds.version() AS version") + if records: + version = records[0]["version"] + print(f"✓ GDS plugin is installed (version {version})") + conn.close() + return True + else: + print("✗ GDS plugin query returned no results") + conn.close() + return False + + except Exception as e: + error = str(e) + if "gds" in error.lower() or "not correctly installed" in error.lower() or "unknown function" in error.lower(): + print("✗ GDS plugin is NOT installed") + print("\nTo install the Graph Data Science plugin:") + print(" Neo4j Desktop : open your project → select the database → Plugins tab → install 'Graph Data Science Library'") + print(" Neo4j AuraDB : enable GDS from the instance settings in the Aura console") + print("\nThe GDS plugin is required for patient similarity search.") + else: + print(f"✗ Error checking GDS plugin: {error}") + return False + + +if __name__ == "__main__": + success = test_gds_plugin() + sys.exit(0 if success else 1) diff --git a/langgraph/agentic_graph_rag/backend/tests/test_provider.py b/langgraph/agentic_graph_rag/backend/tests/test_provider.py new file mode 100644 index 0000000..4308dc0 --- /dev/null +++ b/langgraph/agentic_graph_rag/backend/tests/test_provider.py @@ -0,0 +1,43 @@ +""" +Test script to verify the configured LLM provider and API key are working. +""" +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from dotenv import load_dotenv +load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) + +from llm_factory import create_llm, get_provider, get_main_agent_model + + +def test_provider(): + """Test the configured LLM provider with a simple prompt.""" + provider = get_provider() + model = get_main_agent_model() + + print(f"Testing LLM provider...") + print(f" Provider : {provider}") + print(f" Model : {model}") + + try: + llm = create_llm(model=model) + response = llm.invoke("1+1=") + answer = response.content.strip() + print(f"✓ API key is valid. Response to '1+1=': {answer}") + return True + + except Exception as e: + print(f"\n✗ Error: {e}") + print("\nPlease check:") + print(f"1. PROVIDER is set correctly in .env (current: '{provider}')") + if provider == "anthropic": + print("2. ANTHROPIC_API_DEV_KEY is set to a valid key in .env") + else: + print("2. SAMBANOVA_API_KEY is set to a valid key in .env") + return False + + +if __name__ == "__main__": + success = test_provider() + sys.exit(0 if success else 1) diff --git a/langgraph/agentic_graph_rag/backend/tests/test_query_tracking.py b/langgraph/agentic_graph_rag/backend/tests/test_query_tracing.py similarity index 100% rename from langgraph/agentic_graph_rag/backend/tests/test_query_tracking.py rename to langgraph/agentic_graph_rag/backend/tests/test_query_tracing.py diff --git a/langgraph/agentic_graph_rag/frontend/index.html b/langgraph/agentic_graph_rag/frontend/index.html index 7793b6e..201087a 100644 --- a/langgraph/agentic_graph_rag/frontend/index.html +++ b/langgraph/agentic_graph_rag/frontend/index.html @@ -682,7 +682,7 @@