From 0fd1f7f3d278b296ece116b096c9f44e97c685a7 Mon Sep 17 00:00:00 2001
From: abhiram304 <abhiram.304@gmail.com>
Date: Sat, 25 Oct 2025 22:50:28 -0700
Subject: [PATCH 1/5] With the analytics Agent

---
 docs/ANALYTICS_AGENT.md                       | 61 ++++++++++++++++
 multi_tool_agent_bquery_tools/.env            |  8 +-
 multi_tool_agent_bquery_tools/agent.py        | 14 +++-
 .../agents/analytics_agent.py                 | 41 +++++++++++
 .../agents/analytics_prompts.py               | 73 +++++++++++++++++++
 .../tools/air_quality_tool.py                 | 35 +++++++--
 .../tools/disease_tools.py                    | 15 +++-
 7 files changed, 232 insertions(+), 15 deletions(-)
 create mode 100644 docs/ANALYTICS_AGENT.md
 create mode 100644 multi_tool_agent_bquery_tools/agents/analytics_agent.py
 create mode 100644 multi_tool_agent_bquery_tools/agents/analytics_prompts.py

diff --git a/docs/ANALYTICS_AGENT.md b/docs/ANALYTICS_AGENT.md
new file mode 100644
index 00000000..e4411a75
--- /dev/null
+++ b/docs/ANALYTICS_AGENT.md
@@ -0,0 +1,61 @@
+# Analytics Agent Documentation
+
+## Overview
+
+The **Analytics Agent** is a specialized sub-agent that performs cross-dataset analysis on air quality and infectious disease data using Python code execution. It can handle complex analytical queries that span multiple data sources.
+
+## Capabilities
+
+### Data Sources
+1. **Historical Air Quality** - EPA Historical Air Quality dataset (PM2.5, AQI)
+2. **Live Air Quality** - Real-time AirNow API data
+3. **Infectious Disease** - CDC BEAM disease surveillance data
+
+### Analysis Types
+- Cross-dataset correlations
+- Temporal pattern analysis
+- Geographic comparisons
+- Trend analysis and forecasting
+- Statistical modeling
+- Data visualization
+
+## Architecture
+
+### Tools
+The agent has access to three data retrieval tools:
+- `get_air_quality()` - Historical EPA data
+- `get_live_air_quality()` - Current air quality readings
+- `get_infectious_disease_data()` - CDC disease data
+
+### Code Execution
+Uses `VertexAiCodeExecutor` to:
+- Execute Python code in a stateful environment
+- Run data analysis and visualizations
+- Maintain state across multiple code executions
+
+## Example Queries
+
+### Cross-Dataset Analysis
+```
+"Analyze the correlation between air quality and respiratory diseases in California"
+```
+
+### Temporal Analysis
+```
+"Show me trends in air quality vs disease rates over the past year"
+```
+
+### Geographic Comparison
+```
+"Compare air quality and disease patterns across different states"
+```
+
+## Files Created
+
+1. `multi_tool_agent_bquery_tools/agents/analytics_agent.py` - Main agent definition
+2. `multi_tool_agent_bquery_tools/agents/analytics_prompts.py` - Instruction prompts
+3. `multi_tool_agent_bquery_tools/agent.py` - Updated to include analytics agent
+
+## Integration
+
+The analytics agent is automatically integrated into the root agent's sub-agents list and will be routed to when users ask analytical questions spanning multiple datasets.
diff --git a/multi_tool_agent_bquery_tools/.env b/multi_tool_agent_bquery_tools/.env
index 737b1adf..90743284 100644
--- a/multi_tool_agent_bquery_tools/.env
+++ b/multi_tool_agent_bquery_tools/.env
@@ -2,17 +2,17 @@
 # Choose one of the following options:
 
 # Option 1: Google AI Studio (Free tier) - RECOMMENDED FOR QUICKSTART
-GOOGLE_GENAI_USE_VERTEXAI=FALSE
+GOOGLE_GENAI_USE_VERTEXAI=TRUE
 GOOGLE_API_KEY=AIzaSyAcq1AUFa-n4l_vmwtb3-DP1YpXzOj-zGM
 
 # Option 2: Google Cloud Vertex AI
 # GOOGLE_GENAI_USE_VERTEXAI=TRUE
-# GOOGLE_CLOUD_PROJECT=YOUR_PROJECT_ID
-# GOOGLE_CLOUD_LOCATION=LOCATION
+GOOGLE_CLOUD_PROJECT=qwiklabs-gcp-00-4a7d408c735c
+GOOGLE_CLOUD_LOCATION=us-central1
 
 # Option 3: Vertex AI Express Mode (Free tier)
 # GOOGLE_GENAI_USE_VERTEXAI=TRUE
-# GOOGLE_API_KEY=PASTE_YOUR_ACTUAL_EXPRESS_MODE_API_KEY_HERE
+GOOGLE_API_KEY=PASTE_YOUR_ACTUAL_EXPRESS_MOGOOGLE_API_KEY=AIzaSyALQGawG7iVNjJhG8v5w3Z_eyt5oRdMCvk
 
 # BigQuery Configuration (for air quality data)
 # If using Google AI Studio, you can still access BigQuery public datasets
diff --git a/multi_tool_agent_bquery_tools/agent.py b/multi_tool_agent_bquery_tools/agent.py
index b8a6ceb2..1dedf15e 100644
--- a/multi_tool_agent_bquery_tools/agent.py
+++ b/multi_tool_agent_bquery_tools/agent.py
@@ -1,4 +1,5 @@
 # ./agent.py
+# -*- coding: utf-8 -*-
 import os
 import asyncio
 from google.adk.agents import Agent
@@ -16,6 +17,13 @@
 from .agents.psa_video import create_psa_video_agents
 from .tools.health_tools import get_health_faq
 
+# Try to import analytics agent, use None if it fails
+try:
+    from .agents.analytics_agent import analytics_agent
+except Exception as e:
+    print(f"[WARNING] Analytics agent not available: {e}")
+    analytics_agent = None
+
 # === Model configuration ===
 GEMINI_MODEL = "gemini-2.0-flash"
 
@@ -37,7 +45,8 @@
         "3. [DISEASES] Infectious Disease Tracking - County-level CDC data\n"
         "4. [CLINICS] Find nearby clinics or doctors using Google Search\n"
         "5. [HEALTH] General wellness, hygiene, and preventive care advice\n"
-        "6. [PSA VIDEOS] Generate and share public health announcement videos\n\n"
+        "6. [ANALYTICS] Cross-dataset analysis across air quality and disease data\n"
+        "7. [PSA VIDEOS] Generate and share public health announcement videos\n\n"
         "What would you like to know about today?\"\n\n"
         "Routing Rules:\n"
         "- Mentions of 'live', 'today', 'current', or 'now' → live_air_quality_agent.\n"
@@ -47,6 +56,7 @@
         "(e.g., 'I have a rash', 'I feel dizzy', 'my tooth hurts', 'I cut my hand', "
         "'my child is sick'), route to clinic_finder_agent."
         "- General health, hygiene, prevention, wellness, or safety advice → health_faq_agent.\n"
+        "- Analytical questions spanning multiple datasets, correlations, trends, or complex analysis → analytics_agent.\n"
         "- Requests to create PSA videos, announcements, or post to social media → PSA video agents.\n\n"
         "Process:\n"
         "1. If clinic_finder_agent provides a search phrase (e.g., 'dermatologist near San Jose'), "
@@ -61,7 +71,7 @@
         infectious_diseases_agent,
         clinic_finder_agent,
         health_faq_agent,
-    ] + psa_agents,  # Add PSA video agents (ActionLine, VeoPrompt, Twitter)
+    ] + ([analytics_agent] if analytics_agent else []) + psa_agents  # Add PSA video agents (ActionLine, VeoPrompt, Twitter)
 )
 
 # === Runner & Session Setup ===
diff --git a/multi_tool_agent_bquery_tools/agents/analytics_agent.py b/multi_tool_agent_bquery_tools/agents/analytics_agent.py
new file mode 100644
index 00000000..0e813868
--- /dev/null
+++ b/multi_tool_agent_bquery_tools/agents/analytics_agent.py
@@ -0,0 +1,41 @@
+import os
+from google.adk.agents import Agent
+from .analytics_prompts import return_instructions_analytics
+
+# Import tools from other agents to get data
+from ..tools.air_quality_tool import get_air_quality
+from ..tools.live_air_quality_tool import get_live_air_quality
+from ..tools.disease_tools import get_infectious_disease_data
+
+GEMINI_MODEL = "gemini-2.0-flash"
+
+# Try to use VertexAI code executor, fall back to None if not available
+# (Agent will still work without code executor for basic data retrieval)
+code_executor = None
+try:
+    # Check if running with Google AI Studio (no VertexAI needed)
+    use_vertex_ai = os.getenv('GOOGLE_GENAI_USE_VERTEXAI', '').lower() == 'true'
+    
+    if use_vertex_ai:
+        from google.adk.code_executors import VertexAiCodeExecutor
+        code_executor = VertexAiCodeExecutor(
+            optimize_data_file=True,
+            stateful=True,
+        )
+        print("[OK] Analytics agent using VertexAI code executor")
+    else:
+        print("[INFO] Running with Google AI Studio - code executor disabled")
+        print("[INFO] Analytics agent will work without code execution (data retrieval only)")
+except Exception as e:
+    print(f"[WARNING] VertexAI code executor not available: {e}")
+    print("[INFO] Analytics agent will work without code execution (data retrieval only)")
+    code_executor = None
+
+analytics_agent = Agent(
+    name="analytics_agent",
+    model=GEMINI_MODEL,
+    description="Analytics agent that performs cross-dataset analysis across air quality and disease data. Provides statistical analysis, correlations, and insights.",
+    instruction=return_instructions_analytics(),
+    code_executor=code_executor,
+    tools=[get_air_quality, get_live_air_quality, get_infectious_disease_data],
+)
diff --git a/multi_tool_agent_bquery_tools/agents/analytics_prompts.py b/multi_tool_agent_bquery_tools/agents/analytics_prompts.py
new file mode 100644
index 00000000..10ad506b
--- /dev/null
+++ b/multi_tool_agent_bquery_tools/agents/analytics_prompts.py
@@ -0,0 +1,73 @@
+"""Module for storing analytics agent instructions."""
+
+def return_instructions_analytics() -> str:
+    instruction_prompt_analytics = """
+# Guidelines
+
+**Objective:** Assist the user in achieving their data analysis goals by performing 
+analytics across multiple datasets (air quality and infectious disease data), with 
+emphasis on avoiding assumptions and ensuring accuracy.
+
+**Available Data Sources:**
+1. **Historical Air Quality Data** - EPA Historical Air Quality dataset via `get_air_quality()`
+2. **Live Air Quality Data** - Real-time data via AirNow API using `get_live_air_quality()`
+3. **Infectious Disease Data** - CDC BEAM data via `get_infectious_disease_data()`
+
+**CRITICAL: DO NOT GENERATE PYTHON CODE**
+You do NOT have Python code execution available. You can ONLY:
+1. Use TOOL CALLS to fetch data (get_air_quality, get_infectious_disease_data, etc.)
+2. Analyze the data returned from tools using text-based analysis
+3. Provide insights based on the tool responses
+
+**IMPORTANT:** You CANNOT call functions like `default_api.get_air_quality()` in code.
+You MUST use the provided tools (not Python functions) to fetch data.
+
+**Data Acquisition - USE TOOLS ONLY:**
+1. Use tool `get_air_quality()` to fetch historical air quality data
+2. Use tool `get_live_air_quality()` for current air quality readings  
+3. Use tool `get_infectious_disease_data()` for CDC disease data
+
+**How to work with data:**
+- Call tools to get data (they return structured responses)
+- Read the returned data from tool responses
+- Perform manual analysis (summarize, compare, identify trends)
+- Look for patterns: correlations, seasonal trends, geographic differences
+- Present findings in clear, organized text
+
+**No Assumptions:** **Crucially, avoid making assumptions about the nature of
+the data or column names.** Base findings solely on the data itself. Always
+explore the data structure first before analysis.
+
+**Answerability:** Some queries may not be answerable with the available data.
+In those cases, inform the user why you cannot process their query and
+suggest what type of data would be needed to fulfill their request.
+
+TASK:
+You need to assist the user with their queries by:
+1. Fetching data from available sources using TOOL CALLS (not Python code)
+2. Analyzing the returned data using text-based analysis
+3. Looking for patterns, correlations, trends in the data
+4. Presenting clear, actionable insights and recommendations
+
+**IMPORTANT:** After gathering sufficient data, you MUST provide analysis and insights. 
+Do NOT ask the user for more data or years repeatedly. Instead:
+- If you have data, analyze it immediately
+- Look for patterns, trends, and correlations
+- Present your findings clearly
+- Make recommendations based on the data
+
+**Tool Response Format:**
+Tool responses are in JSON format. Access data like:
+- `response['data']['total_cases']` - total cases
+- `response['data']['diseases'][0]['cases']` - first disease cases
+- Look for 'report' field for formatted text summaries
+
+**Cross-Dataset Analysis:**
+When analyzing relationships between air quality and disease data:
+- Correlate air quality metrics (AQI, PM2.5) with disease rates
+- Identify temporal patterns across both datasets
+- Compare geographic trends
+- Look for causal relationships (be cautious about claiming causation)
+
+"""
+    return instruction_prompt_analytics
diff --git a/multi_tool_agent_bquery_tools/tools/air_quality_tool.py b/multi_tool_agent_bquery_tools/tools/air_quality_tool.py
index 1cb04f52..ee37813b 100644
--- a/multi_tool_agent_bquery_tools/tools/air_quality_tool.py
+++ b/multi_tool_agent_bquery_tools/tools/air_quality_tool.py
@@ -39,9 +39,14 @@ def infer_state_from_county(county):
 
 
 def get_air_quality(county: Optional[str] = None, state: Optional[str] = None, city: Optional[str] = None, 
-                   year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None,
+                   year: Optional[int] = None, start_year: Optional[int] = None, end_year: Optional[int] = None,
+                   month: Optional[int] = None, day: Optional[int] = None,
                    days_back: Optional[int] = None) -> dict:
-    """Retrieves air quality data from EPA Historical Air Quality BigQuery dataset."""
+    """Retrieves air quality data from EPA Historical Air Quality BigQuery dataset.
+    
+    Supports both single year (year parameter) and year ranges (start_year/end_year).
+    Use year for a single year, or start_year + end_year for a range (e.g., 2019-2021).
+    """
     try:
         # Handle state inference from county
         if county and not state:
@@ -66,8 +71,19 @@ def get_air_quality(county: Optional[str] = None, state: Optional[str] = None, c
         if days_back is not None:
             year, month, day = handle_relative_dates(days_back)
         
-        # Set default year if not provided
-        if year is None:
+        # Handle year range vs single year
+        if start_year and end_year:
+            # Year range provided
+            year_range = list(range(start_year, end_year + 1))
+            use_year_range = True
+        elif year:
+            # Single year provided
+            year_range = [year]
+            use_year_range = False
+        else:
+            # No year provided, default to recent
+            year_range = [2020]
+            use_year_range = False
             year = 2020
         
         # Query real EPA data from public BigQuery dataset
@@ -78,13 +94,20 @@ def get_air_quality(county: Optional[str] = None, state: Optional[str] = None, c
             where_conditions.append(f"county_name = '{county}'")
         if city:
             where_conditions.append(f"city_name = '{city}'")
+        
+        # Date conditions
         if year and month and day:
             where_conditions.append(f"date_local = DATE({year}, {month}, {day})")
         elif year and month:
             where_conditions.append(f"EXTRACT(YEAR FROM date_local) = {year}")
             where_conditions.append(f"EXTRACT(MONTH FROM date_local) = {month}")
-        elif year:
-            where_conditions.append(f"EXTRACT(YEAR FROM date_local) = {year}")
+        elif use_year_range and len(year_range) > 1:
+            # Multiple years - use IN clause
+            year_list = ','.join(map(str, year_range))
+            where_conditions.append(f"EXTRACT(YEAR FROM date_local) IN ({year_list})")
+        elif use_year_range or year:
+            # Single year or first year in range
+            where_conditions.append(f"EXTRACT(YEAR FROM date_local) = {year_range[0]}")
         
         where_clause = " AND ".join(where_conditions) if where_conditions else f"EXTRACT(YEAR FROM date_local) = {year}"
         
diff --git a/multi_tool_agent_bquery_tools/tools/disease_tools.py b/multi_tool_agent_bquery_tools/tools/disease_tools.py
index 84021ebe..ec698613 100644
--- a/multi_tool_agent_bquery_tools/tools/disease_tools.py
+++ b/multi_tool_agent_bquery_tools/tools/disease_tools.py
@@ -11,8 +11,13 @@
 
 
 def get_infectious_disease_data(county: Optional[str] = None, state: Optional[str] = None, 
-                                disease: Optional[str] = None, year: Optional[int] = None) -> dict:
-    """Retrieves infectious disease data from CDC BEAM BigQuery dataset."""
+                                disease: Optional[str] = None, year: Optional[int] = None,
+                                start_year: Optional[int] = None, end_year: Optional[int] = None) -> dict:
+    """Retrieves infectious disease data from CDC BEAM BigQuery dataset.
+    
+    Supports both single year (year parameter) and year ranges (start_year/end_year).
+    Use year for a single year, or start_year + end_year for a range (e.g., 2019-2021).
+    """
     try:
         # Handle state inference from county
         if county and not state:
@@ -60,7 +65,11 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
             where_conditions.append(f"State = '{state_abbrev}'")
         if disease:
             where_conditions.append(f"LOWER(Pathogen) LIKE LOWER('%{disease}%')")
-        if year:
+        
+        # Handle year range vs single year
+        if start_year and end_year:
+            where_conditions.append(f"Year >= {start_year} AND Year <= {end_year}")
+        elif year:
             where_conditions.append(f"Year = {year}")
         else:
             where_conditions.append("Year = 2025")  # Default to recent data

From 2e57a651adfd97d049affaf7310df5cf223bc1c9 Mon Sep 17 00:00:00 2001
From: abhiram304 <abhiram.304@gmail.com>
Date: Sat, 25 Oct 2025 23:04:50 -0700
Subject: [PATCH 2/5] Remove .env from tracking and add to .gitignore

---
 .gitignore                         |  3 ++-
 multi_tool_agent_bquery_tools/.env | 25 -------------------------
 2 files changed, 2 insertions(+), 26 deletions(-)
 delete mode 100644 multi_tool_agent_bquery_tools/.env

diff --git a/.gitignore b/.gitignore
index 58126d92..dd57e48f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ env/
 venv/
 ENV/
 .env
+/.env
 .venv
 *.log
 .DS_Store
@@ -22,4 +23,4 @@ TWITTER_INTEGRATION_COMPLETE.md
 TWITTER_QUICK_START.md
 INTEGRATION_PLAN.md
 INTEGRATION_SUMMARY.md
-test_integration.py
\ No newline at end of file
+test_integration.py
diff --git a/multi_tool_agent_bquery_tools/.env b/multi_tool_agent_bquery_tools/.env
deleted file mode 100644
index 90743284..00000000
--- a/multi_tool_agent_bquery_tools/.env
+++ /dev/null
@@ -1,25 +0,0 @@
-# Model configuration
-# Choose one of the following options:
-
-# Option 1: Google AI Studio (Free tier) - RECOMMENDED FOR QUICKSTART
-GOOGLE_GENAI_USE_VERTEXAI=TRUE
-GOOGLE_API_KEY=AIzaSyAcq1AUFa-n4l_vmwtb3-DP1YpXzOj-zGM
-
-# Option 2: Google Cloud Vertex AI
-# GOOGLE_GENAI_USE_VERTEXAI=TRUE
-GOOGLE_CLOUD_PROJECT=qwiklabs-gcp-00-4a7d408c735c
-GOOGLE_CLOUD_LOCATION=us-central1
-
-# Option 3: Vertex AI Express Mode (Free tier)
-# GOOGLE_GENAI_USE_VERTEXAI=TRUE
-GOOGLE_API_KEY=PASTE_YOUR_ACTUAL_EXPRESS_MOGOOGLE_API_KEY=AIzaSyALQGawG7iVNjJhG8v5w3Z_eyt5oRdMCvk
-
-# BigQuery Configuration (for air quality data)
-# If using Google AI Studio, you can still access BigQuery public datasets
-# If using Vertex AI, set your project ID:
-# GOOGLE_CLOUD_PROJECT=YOUR_PROJECT_ID
-
-# For BigQuery authentication, you can either:
-# 1. Use the same API key (if using Google AI Studio)
-# 2. Use Application Default Credentials: gcloud auth application-default login
-# 3. Set service account key: GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json

From 25e94c9f9d1b713deada4056279025d23750e0fc Mon Sep 17 00:00:00 2001
From: abhiram304 <abhiram.304@gmail.com>
Date: Sat, 25 Oct 2025 23:08:21 -0700
Subject: [PATCH 3/5] Add .env to .gitignore and ensure it's untracked

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index dd57e48f..5fb8d959 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,7 @@ env/
 venv/
 ENV/
 .env
-/.env
+*.env
 .venv
 *.log
 .DS_Store

From a1af7f1f1d0becdd92610cee1e76bcbf79d9c76a Mon Sep 17 00:00:00 2001
From: abhiram304 <abhiram.304@gmail.com>
Date: Sun, 26 Oct 2025 11:32:38 -0700
Subject: [PATCH 4/5] Fix

---
 .../tools/disease_tools.py                    | 186 ++++++++++++------
 1 file changed, 124 insertions(+), 62 deletions(-)

diff --git a/multi_tool_agent_bquery_tools/tools/disease_tools.py b/multi_tool_agent_bquery_tools/tools/disease_tools.py
index ec698613..df2ed0e8 100644
--- a/multi_tool_agent_bquery_tools/tools/disease_tools.py
+++ b/multi_tool_agent_bquery_tools/tools/disease_tools.py
@@ -1,14 +1,25 @@
 import os
 import random
 import google.auth
-from google.adk.tools.bigquery import BigQueryCredentialsConfig, BigQueryToolset
-from google.adk.tools.bigquery.config import BigQueryToolConfig, WriteMode
+from google.cloud import bigquery
 from ..tools.common_utils import COUNTY_STATE_MAPPING, infer_state_from_county
 from typing import Optional, Tuple, Dict, List
 
 
 INFECTIOUS_DISEASES = ["Salmonella", "E. coli", "Norovirus", "Hepatitis A", "Giardia", "Cryptosporidium"]
 
+# Disease synonym mapping - maps user-friendly names to actual CDC BEAM database names
+DISEASE_SYNONYMS = {
+    "e. coli": "STEC",
+    "e coli": "STEC",
+    "escherichia coli": "STEC",
+    "e.coli": "STEC",
+    "stec": "STEC",
+    "stec o157": "STEC",
+    "shiga toxin-producing e. coli": "STEC",
+    "shiga toxin producing e coli": "STEC",
+}
+
 
 def get_infectious_disease_data(county: Optional[str] = None, state: Optional[str] = None, 
                                 disease: Optional[str] = None, year: Optional[int] = None,
@@ -21,6 +32,8 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
     try:
         # Handle state inference from county
         if county and not state:
+            print(f"[DISEASE] Received county query: {county}")
+            print(f"[DISEASE] Note: CDC BEAM data is at STATE level, not county level. Will query state data for county's state.")
             inferred_state, is_ambiguous = infer_state_from_county(county)
             if is_ambiguous:
                 county_lower = county.lower().strip()
@@ -33,6 +46,12 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
                         }
             elif inferred_state:
                 state = inferred_state
+                print(f"[DISEASE] Mapped county '{county}' to state '{state}'")
+            else:
+                print(f"[DISEASE] Warning: Could not infer state from county '{county}'")
+        
+        if county:
+            print(f"[DISEASE] Warning: County '{county}' specified but CDC data is state-level only. Querying state-level data.")
         
         # Get state abbreviation for query
         state_abbrev = None
@@ -54,7 +73,7 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
                 'Wisconsin': 'WI', 'Wyoming': 'WY'
             }
             state_abbrev = state_map.get(state, state[:2].upper() if len(state) > 2 else state.upper())
-            print(f"[DISEASE] Querying for state: {state} -> {state_abbrev}")
+            print(f"[DISEASE] Query parameters: state={state} ({state_abbrev}), county={county}, disease={disease}, year={year}, start={start_year}, end={end_year}")
         
         # Query CDC BEAM dataset
         project_id = os.getenv("GOOGLE_CLOUD_PROJECT", "qwiklabs-gcp-00-4a7d408c735c")
@@ -64,6 +83,13 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
         if state_abbrev:
             where_conditions.append(f"State = '{state_abbrev}'")
         if disease:
+            # Check for disease synonym
+            disease_lower = disease.lower().strip()
+            mapped_disease = DISEASE_SYNONYMS.get(disease_lower, disease)
+            if mapped_disease != disease:
+                print(f"[DISEASE] Mapped disease '{disease}' to '{mapped_disease}'")
+                disease = mapped_disease
+            
             where_conditions.append(f"LOWER(Pathogen) LIKE LOWER('%{disease}%')")
         
         # Handle year range vs single year
@@ -81,66 +107,69 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
             Year,
             Month,
             State,
-            Source_Type,
+            `Source Type`,
             Pathogen,
-            Serotype_or_Species,
-            SUM(Number_of_isolates) as total_cases
+            `Serotype or Species`,
+            SUM(`Number of isolates`) as total_cases
         FROM `{project_id}.beam_report_data_folder.beam_report_data`
         WHERE {where_clause}
-        GROUP BY Year, Month, State, Source_Type, Pathogen, Serotype_or_Species
+        GROUP BY Year, Month, State, `Source Type`, Pathogen, `Serotype or Species`
         ORDER BY total_cases DESC
         LIMIT 50
         """
         
-        # Execute query
+        # Execute query using standard BigQuery client
         try:
-            application_default_credentials, _ = google.auth.default()
-            credentials_config = BigQueryCredentialsConfig(
-                credentials=application_default_credentials
-            )
-            tool_config = BigQueryToolConfig(write_mode=WriteMode.BLOCKED)
+            # Debug: Print the full query
+            print(f"[DISEASE] Executing query on project: {project_id}")
+            print(f"[DISEASE] Full query:")
+            print(query)
             
-            bigquery_toolset = BigQueryToolset(
-                credentials_config=credentials_config, 
-                bigquery_tool_config=tool_config
-            )
+            # Initialize BigQuery client
+            bq_client = bigquery.Client(project=project_id)
             
-            result = bigquery_toolset.execute_sql(
-                project_id=project_id,
-                query=query
-            )
+            # Execute query
+            query_job = bq_client.query(query)
+            results = query_job.result()
+            data = list(results)
             
-            if result.status == "success" and result.data:
+            print(f"[DISEASE] Query returned {len(data)} rows")
+            
+            if data:
                 # Process real data
                 report_data = []
                 total_cases = 0
                 
-                for row in result.data[:10]:  # Top 10 pathogens
-                    cases = int(row.get('total_cases', 0))
-                    pathogen = row.get('Pathogen', 'Unknown')
-                    source = row.get('Source_Type', 'Unknown')
+                for row in data[:10]:  # Top 10 pathogens
+                    row_dict = dict(row)
+                    cases = int(row_dict.get('total_cases', 0))
+                    pathogen = row_dict.get('Pathogen', 'Unknown')
+                    source = row_dict.get('Source Type', 'Unknown')
                     
                     report_data.append({
                         "disease": pathogen,
                         "cases": cases,
                         "source": source,
-                        "serotype": row.get('Serotype_or_Species', 'N/A')
+                        "serotype": row_dict.get('Serotype or Species', 'N/A')
                     })
                     total_cases += cases
                 
-                location_desc = f"{state}" if state else "All States"
-                year_text = f" in {year}" if year else " in 2025"
+                location_desc = f"{county}, {state}" if county and state else f"{state}" if state else "All States"
+                year_text = f" in {year}" if year else f" in {start_year}-{end_year}" if start_year and end_year else " in 2025"
+                
+                # Add note about county if specified
+                county_note = f"\nNote: Data shown is for {state} state level. County-specific data is not available in CDC BEAM dataset.\n" if county else ""
                 
-                report = f"""Infectious Disease Report for {location_desc}{year_text}:
+                report = f"""Infectious Disease Report for {location_desc}{year_text}:{county_note}
 (Data from CDC BEAM Dashboard via BigQuery)
 
 Total Cases Reported: {total_cases}
 
 Disease Breakdown:"""
                 
-                for data in report_data:
+                for disease_data in report_data:
                     report += f"""
-- {data['disease']}: {data['cases']} isolates (Source: {data['source']})"""
+- {disease_data['disease']}: {disease_data['cases']} isolates (Source: {disease_data['source']})"""
                 
                 report += f"""
 
@@ -157,51 +186,84 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st
                     }
                 }
             else:
-                # Fallback to mock data if query fails
-                raise Exception("No data returned from BigQuery")
+                # No data returned - try to help user with available data
+                print(f"[DISEASE] Query returned no data rows")
+                
+                # Try to get available pathogens for this query
+                try:
+                    help_query = f"""
+                    SELECT DISTINCT Pathogen
+                    FROM `{project_id}.beam_report_data_folder.beam_report_data`
+                    WHERE {where_clause.split(' AND LOWER(Pathogen)')[0]}  -- Remove disease filter
+                    LIMIT 20
+                    """
+                    help_job = bq_client.query(help_query)
+                    available_pathogens = [row[0] for row in help_job.result()]
+                    
+                    if available_pathogens:
+                        print(f"[DISEASE] Available pathogens for this query: {', '.join(available_pathogens)}")
+                        available_msg = f" Available pathogens: {', '.join(available_pathogens)}"
+                    else:
+                        available_msg = ""
+                except Exception as e:
+                    print(f"[DISEASE] Could not fetch available pathogens: {e}")
+                    available_msg = ""
+                
+                raise Exception(f"No data returned from BigQuery.{available_msg}")
                 
         except Exception as query_error:
-            print(f"[DISEASE] BigQuery error: {query_error}")
+            error_str = str(query_error)
+            print(f"[DISEASE] BigQuery error: {error_str}")
             print(f"[DISEASE] Query was: {query[:200]}...")
+            
+            # Check if error contains available pathogens info
+            if "Available pathogens:" in error_str:
+                # Don't use mock data - return helpful error
+                return {
+                    "status": "error",
+                    "error_message": f"No data found for disease '{disease or 'specified disease'}' in the CDC BEAM database. {error_str}",
+                    "suggestion": "Try querying for: STEC, Salmonella, Campylobacter, Shigella, or Vibrio"
+                }
+            
             print(f"[DISEASE] Falling back to mock data")
             # Generate mock data as fallback
             location_desc = f"{county}, {state}" if county and state else state if state else "Demo Location"
             diseases_to_report = [disease] if disease else random.sample(INFECTIOUS_DISEASES, 3)
-        
-        report_data = []
-        total_cases = 0
-        
-        for disease_name in diseases_to_report:
-            cases = random.randint(15, 250)
-            report_data.append({
-                "disease": disease_name,
-                "cases": cases,
+            
+            report_data = []
+            total_cases = 0
+            
+            for disease_name in diseases_to_report:
+                cases = random.randint(15, 250)
+                report_data.append({
+                    "disease": disease_name,
+                    "cases": cases,
                     "source": "Mock Data"
-            })
-            total_cases += cases
-        
+                })
+                total_cases += cases
+            
             year_text = f" in {year}" if year else " (demo data)"
-        
-        report = f"""Infectious Disease Report for {location_desc}{year_text}:
+            
+            report = f"""Infectious Disease Report for {location_desc}{year_text}:
 (Demo Mode - Real data requires BigQuery access)
 
 Total Cases Reported: {total_cases}
 
 Disease Breakdown:"""
-        
-        for data in report_data:
-            report += f"""
-- {data['disease']}: {data['cases']} cases"""
-        
-        return {
-            "status": "success",
-            "report": report,
-            "data": {
-                "location": location_desc,
-                "total_cases": total_cases,
-                "diseases": report_data
+            
+            for disease_data in report_data:
+                report += f"""
+- {disease_data['disease']}: {disease_data['cases']} cases"""
+            
+            return {
+                "status": "success",
+                "report": report,
+                "data": {
+                    "location": location_desc,
+                    "total_cases": total_cases,
+                    "diseases": report_data
+                }
             }
-        }
         
     except Exception as e:
         return {

From 3fdf281fed4df4ccc70f99a41adf9f00a0803f83 Mon Sep 17 00:00:00 2001
From: abhiram304 <abhiram.304@gmail.com>
Date: Sun, 26 Oct 2025 12:03:38 -0700
Subject: [PATCH 5/5] Input from the UI form - location and time, default time
 set to 2024 to 2025 Add a current time to the global prompt so it always uses
 the current time for all Agents Fix the diseases agent to call Bigquery each
 time Analytics agent integration

---
 app.py                                 |  26 +++-
 multi_tool_agent_bquery_tools/agent.py | 165 ++++++++++++++++++-------
 static/js/app.js                       |  65 +++++++++-
 3 files changed, 207 insertions(+), 49 deletions(-)

diff --git a/app.py b/app.py
index ba73d7cc..0ca51053 100644
--- a/app.py
+++ b/app.py
@@ -303,6 +303,8 @@ def agent_chat():
         
         request_data = request.get_json()
         question = request_data.get('question', '')
+        location_context = request_data.get('location_context', None)
+        time_frame = request_data.get('time_frame', None)
         
         if not question:
             return jsonify({
@@ -310,6 +312,26 @@ def agent_chat():
                 'error': 'No question provided'
             }), 400
         
+        # Add location context to the question if available
+        if location_context:
+            location_info = []
+            if location_context.get('city'):
+                location_info.append(f"City: {location_context['city']}")
+            if location_context.get('state'):
+                location_info.append(f"State: {location_context['state']}")
+            if location_context.get('county'):
+                location_info.append(f"County: {location_context['county']}")
+            if location_context.get('zipCode'):
+                location_info.append(f"ZIP Code: {location_context['zipCode']}")
+            if location_context.get('formattedAddress'):
+                location_info.append(f"Address: {location_context['formattedAddress']}")
+            
+            if location_info:
+                location_text = " | ".join(location_info)
+                enhanced_question = f"User Location Context: {location_text}\n\nUser Question: {question}"
+                print(f"[CHAT] Enhanced question with location context: {enhanced_question}")
+                question = enhanced_question
+        
         # Check if user wants to generate PSA video
         video_keywords = ['create video', 'generate psa', 'make video', 'create psa', 'video psa', 'psa video']
         wants_video = any(keyword in question.lower() for keyword in video_keywords)
@@ -391,8 +413,8 @@ def agent_chat():
                 traceback.print_exc()
                 # Fall through to normal chat
         
-        # Normal chat flow
-        response = call_adk_agent(question)
+        # Normal chat flow - pass context to agent
+        response = call_adk_agent(question, location_context=location_context, time_frame=time_frame)
         
         return jsonify({
             'success': True,
diff --git a/multi_tool_agent_bquery_tools/agent.py b/multi_tool_agent_bquery_tools/agent.py
index 1dedf15e..7fcbac84 100644
--- a/multi_tool_agent_bquery_tools/agent.py
+++ b/multi_tool_agent_bquery_tools/agent.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import os
 import asyncio
+from datetime import datetime
 from google.adk.agents import Agent
 from google.adk.runners import Runner
 from google.adk.sessions import InMemorySessionService
@@ -27,52 +28,116 @@
 # === Model configuration ===
 GEMINI_MODEL = "gemini-2.0-flash"
 
+def get_current_time_context():
+    """Generate current time context for the agent"""
+    now = datetime.now()
+    current_time = now.strftime("%A, %B %d, %Y at %I:%M %p")
+    current_date = now.strftime("%Y-%m-%d")
+    current_year = now.year
+    
+    return f"""
+CURRENT TIME CONTEXT:
+- Current Date & Time: {current_time}
+- Current Date (ISO): {current_date}
+- Current Year: {current_year}
+
+IMPORTANT: Always reference the current time when providing health advice, especially for:
+- Seasonal health recommendations
+- Time-sensitive health alerts
+- Current weather conditions affecting health
+- Recent data trends and patterns
+"""
+
+def create_root_agent_with_context(location_context=None, time_frame=None):
+    """Create the root agent with dynamic context including current time, location, and time frame"""
+    
+    # Get current time context
+    time_context = get_current_time_context()
+    
+    # Build location context if provided
+    location_info = ""
+    if location_context:
+        location_parts = []
+        if location_context.get('city'):
+            location_parts.append(f"City: {location_context['city']}")
+        if location_context.get('state'):
+            location_parts.append(f"State: {location_context['state']}")
+        if location_context.get('county'):
+            location_parts.append(f"County: {location_context['county']}")
+        if location_context.get('zipCode'):
+            location_parts.append(f"ZIP Code: {location_context['zipCode']}")
+        if location_context.get('formattedAddress'):
+            location_parts.append(f"Address: {location_context['formattedAddress']}")
+        
+        if location_parts:
+            location_info = f"""
+USER LOCATION CONTEXT:
+- {', '.join(location_parts)}
+- Coordinates: {location_context.get('coordinates', {}).get('lat', 'N/A')}, {location_context.get('coordinates', {}).get('lng', 'N/A')}
+"""
+    
+    # Build time frame context if provided
+    time_frame_info = ""
+    if time_frame:
+        time_frame_info = f"""
+DATA TIME FRAME CONTEXT:
+- Start Date: {time_frame.get('start_date', 'Not specified')}
+- End Date: {time_frame.get('end_date', 'Not specified')}
+- Analysis Period: {time_frame.get('period', 'Not specified')}
+"""
+    
+    # Combine all context
+    global_context = f"{time_context}{location_info}{time_frame_info}"
+    
+    return Agent(
+        name="community_health_assistant",
+        model=GEMINI_MODEL,
+        description="Main community health assistant that routes queries to specialized sub-agents.",
+        global_instruction=global_context,
+        instruction=(
+            "You are a friendly Community Health & Wellness Assistant. "
+            "When a user greets you, respond warmly with this menu:\n\n"
+            "\"Welcome to the Community Health & Wellness Assistant!\n\n"
+            "I can help you with:\n"
+            "1. [LIVE AIR QUALITY] Check current air quality via the AirNow API\n"
+            "2. [HISTORICAL AIR QUALITY] View past PM2.5 data from EPA BigQuery\n"
+            "3. [DISEASES] Infectious Disease Tracking - County-level CDC data\n"
+            "4. [CLINICS] Find nearby clinics or doctors using Google Search\n"
+            "5. [HEALTH] General wellness, hygiene, and preventive care advice\n"
+            "6. [ANALYTICS] Cross-dataset analysis across air quality and disease data\n"
+            "7. [PSA VIDEOS] Generate and share public health announcement videos\n\n"
+            "What would you like to know about today?\"\n\n"
+            "Routing Rules:\n"
+            "- Mentions of 'live', 'today', 'current', or 'now' → live_air_quality_agent.\n"
+            "- Questions mentioning years, months, or historical data → air_quality_agent.\n"
+            "- Mentions of infections, outbreaks, or diseases → infectious_diseases_agent.\n"
+            "- If the user describes symptoms or feeling unwell "
+            "(e.g., 'I have a rash', 'I feel dizzy', 'my tooth hurts', 'I cut my hand', "
+            "'my child is sick'), route to clinic_finder_agent."
+            "- General health, hygiene, prevention, wellness, or safety advice → health_faq_agent.\n"
+            "- Analytical questions spanning multiple datasets, correlations, trends, or complex analysis → analytics_agent.\n"
+            "- Requests to create PSA videos, announcements, or post to social media → PSA video agents.\n\n"
+            "Process:\n"
+            "1. If clinic_finder_agent provides a search phrase (e.g., 'dermatologist near San Jose'), "
+            "use google_search with that phrase.\n"
+            "2. Summarize the top 3–5 results clearly with clinic names and addresses.\n\n"
+            "After any response (from you or a sub-agent), always end with: "
+            "'Is there anything else I can help you with today?'"
+        ),
+        sub_agents=[
+            air_quality_agent,
+            live_air_quality_agent,
+            infectious_diseases_agent,
+            clinic_finder_agent,
+            health_faq_agent,
+        ] + ([analytics_agent] if analytics_agent else []) + psa_agents  # Add PSA video agents (ActionLine, VeoPrompt, Twitter)
+    )
+
 # === Create PSA Video Agents ===
 psa_agents = create_psa_video_agents(model=GEMINI_MODEL, tools_module=None)
 
-# === Root Agent Definition ===
-root_agent = Agent(
-    name="community_health_assistant",
-    model=GEMINI_MODEL,
-    description="Main community health assistant that routes queries to specialized sub-agents.",
-    instruction=(
-        "You are a friendly Community Health & Wellness Assistant. "
-        "When a user greets you, respond warmly with this menu:\n\n"
-        "\"Welcome to the Community Health & Wellness Assistant!\n\n"
-        "I can help you with:\n"
-        "1. [LIVE AIR QUALITY] Check current air quality via the AirNow API\n"
-        "2. [HISTORICAL AIR QUALITY] View past PM2.5 data from EPA BigQuery\n"
-        "3. [DISEASES] Infectious Disease Tracking - County-level CDC data\n"
-        "4. [CLINICS] Find nearby clinics or doctors using Google Search\n"
-        "5. [HEALTH] General wellness, hygiene, and preventive care advice\n"
-        "6. [ANALYTICS] Cross-dataset analysis across air quality and disease data\n"
-        "7. [PSA VIDEOS] Generate and share public health announcement videos\n\n"
-        "What would you like to know about today?\"\n\n"
-        "Routing Rules:\n"
-        "- Mentions of 'live', 'today', 'current', or 'now' → live_air_quality_agent.\n"
-        "- Questions mentioning years, months, or historical data → air_quality_agent.\n"
-        "- Mentions of infections, outbreaks, or diseases → infectious_diseases_agent.\n"
-        "- If the user describes symptoms or feeling unwell "
-        "(e.g., 'I have a rash', 'I feel dizzy', 'my tooth hurts', 'I cut my hand', "
-        "'my child is sick'), route to clinic_finder_agent."
-        "- General health, hygiene, prevention, wellness, or safety advice → health_faq_agent.\n"
-        "- Analytical questions spanning multiple datasets, correlations, trends, or complex analysis → analytics_agent.\n"
-        "- Requests to create PSA videos, announcements, or post to social media → PSA video agents.\n\n"
-        "Process:\n"
-        "1. If clinic_finder_agent provides a search phrase (e.g., 'dermatologist near San Jose'), "
-        "use google_search with that phrase.\n"
-        "2. Summarize the top 3–5 results clearly with clinic names and addresses.\n\n"
-        "After any response (from you or a sub-agent), always end with: "
-        "'Is there anything else I can help you with today?'"
-    ),
-    sub_agents=[
-        air_quality_agent,
-        live_air_quality_agent,
-        infectious_diseases_agent,
-        clinic_finder_agent,
-        health_faq_agent,
-    ] + ([analytics_agent] if analytics_agent else []) + psa_agents  # Add PSA video agents (ActionLine, VeoPrompt, Twitter)
-)
+# === Default Root Agent (for backward compatibility) ===
+root_agent = create_root_agent_with_context()
 
 # === Runner & Session Setup ===
 APP_NAME = "community_health_app"
@@ -95,11 +160,21 @@ def _initialize_session_and_runner():
         )
         _runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=_session_service)
 
-def call_agent(query: str) -> str:
+def call_agent(query: str, location_context=None, time_frame=None) -> str:
     """Helper function to call the agent with a query and return the response."""
     _initialize_session_and_runner()
+    
+    # Create agent with context if provided
+    if location_context or time_frame:
+        agent_with_context = create_root_agent_with_context(location_context, time_frame)
+        # Create a new runner with the context-aware agent
+        context_runner = Runner(agent=agent_with_context, app_name=APP_NAME, session_service=_session_service)
+        runner_to_use = context_runner
+    else:
+        runner_to_use = _runner
+    
     content = types.Content(role="user", parts=[types.Part(text=query)])
-    events = _runner.run(user_id=USER_ID, session_id=SESSION_ID, new_message=content)
+    events = runner_to_use.run(user_id=USER_ID, session_id=SESSION_ID, new_message=content)
 
     for event in events:
         if event.is_final_response():
diff --git a/static/js/app.js b/static/js/app.js
index 1fda60bd..1e57f6cc 100644
--- a/static/js/app.js
+++ b/static/js/app.js
@@ -335,6 +335,33 @@ async function askAI() {
     const loadingMsg = addMessage('Thinking...', 'bot');
     
     try {
+        // Get stored location data for chat agent
+        const storedLocationData = localStorage.getItem('currentLocationData');
+        let locationContext = null;
+        
+        if (storedLocationData) {
+            try {
+                locationContext = JSON.parse(storedLocationData);
+                console.log('[Chat] Using stored location data:', locationContext);
+            } catch (e) {
+                console.warn('[Chat] Failed to parse stored location data:', e);
+            }
+        }
+        
+        // Get time frame from date inputs
+        const startDateInput = document.getElementById('startDate');
+        const endDateInput = document.getElementById('endDate');
+        let timeFrame = null;
+        
+        if (startDateInput && endDateInput && startDateInput.value && endDateInput.value) {
+            timeFrame = {
+                start_date: startDateInput.value,
+                end_date: endDateInput.value,
+                period: `${startDateInput.value} to ${endDateInput.value}`
+            };
+            console.log('[Chat] Using time frame:', timeFrame);
+        }
+        
         // Try ADK agent first
         const response = await fetch('/api/agent-chat', {
             method: 'POST',
@@ -344,7 +371,9 @@ async function askAI() {
             body: JSON.stringify({
                 question: question,
                 state: currentState,
-                days: currentDays
+                days: currentDays,
+                location_context: locationContext,
+                time_frame: timeFrame
             })
         });
 
@@ -356,7 +385,39 @@ async function askAI() {
         if (data.success) {
             // Add agent badge if available
             const agentBadge = data.agent ? `<div class="text-xs text-gray-500 mt-1">via ${data.agent}</div>` : '';
-            addMessage(data.response + agentBadge, 'bot');
+            
+            // Add context indicators if available
+            let contextIndicators = '';
+            if (locationContext) {
+                const locationText = [locationContext.city, locationContext.state, locationContext.zipCode].filter(Boolean).join(', ');
+                contextIndicators += `<div class="text-xs text-emerald-600 mt-1 flex items-center">
+                    <i class="fas fa-map-marker-alt mr-1"></i>
+                    Using location: ${locationText}
+                </div>`;
+            }
+            
+            if (timeFrame) {
+                contextIndicators += `<div class="text-xs text-blue-600 mt-1 flex items-center">
+                    <i class="fas fa-calendar-alt mr-1"></i>
+                    Using time frame: ${timeFrame.period}
+                </div>`;
+            }
+            
+            // Add current time indicator
+            const now = new Date();
+            const currentTime = now.toLocaleString('en-US', { 
+                weekday: 'short', 
+                month: 'short', 
+                day: 'numeric', 
+                hour: '2-digit', 
+                minute: '2-digit' 
+            });
+            contextIndicators += `<div class="text-xs text-purple-600 mt-1 flex items-center">
+                <i class="fas fa-clock mr-1"></i>
+                Current time: ${currentTime}
+            </div>`;
+            
+            addMessage(data.response + agentBadge + contextIndicators, 'bot');
             
             // If video generation started, begin polling
             if (data.task_id) {