From 0fd1f7f3d278b296ece116b096c9f44e97c685a7 Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Sat, 25 Oct 2025 22:50:28 -0700 Subject: [PATCH 1/5] With the analytics Agent --- docs/ANALYTICS_AGENT.md | 61 ++++++++++++++++ multi_tool_agent_bquery_tools/.env | 8 +- multi_tool_agent_bquery_tools/agent.py | 14 +++- .../agents/analytics_agent.py | 41 +++++++++++ .../agents/analytics_prompts.py | 73 +++++++++++++++++++ .../tools/air_quality_tool.py | 35 +++++++-- .../tools/disease_tools.py | 15 +++- 7 files changed, 232 insertions(+), 15 deletions(-) create mode 100644 docs/ANALYTICS_AGENT.md create mode 100644 multi_tool_agent_bquery_tools/agents/analytics_agent.py create mode 100644 multi_tool_agent_bquery_tools/agents/analytics_prompts.py diff --git a/docs/ANALYTICS_AGENT.md b/docs/ANALYTICS_AGENT.md new file mode 100644 index 00000000..e4411a75 --- /dev/null +++ b/docs/ANALYTICS_AGENT.md @@ -0,0 +1,61 @@ +# Analytics Agent Documentation + +## Overview + +The **Analytics Agent** is a specialized sub-agent that performs cross-dataset analysis on air quality and infectious disease data using Python code execution. It can handle complex analytical queries that span multiple data sources. + +## Capabilities + +### Data Sources +1. **Historical Air Quality** - EPA Historical Air Quality dataset (PM2.5, AQI) +2. **Live Air Quality** - Real-time AirNow API data +3. **Infectious Disease** - CDC BEAM disease surveillance data + +### Analysis Types +- Cross-dataset correlations +- Temporal pattern analysis +- Geographic comparisons +- Trend analysis and forecasting +- Statistical modeling +- Data visualization + +## Architecture + +### Tools +The agent has access to three data retrieval tools: +- `get_air_quality()` - Historical EPA data +- `get_live_air_quality()` - Current air quality readings +- `get_infectious_disease_data()` - CDC disease data + +### Code Execution +Uses `VertexAiCodeExecutor` to: +- Execute Python code in a stateful environment +- Run data analysis and visualizations +- Maintain state across multiple code executions + +## Example Queries + +### Cross-Dataset Analysis +``` +"Analyze the correlation between air quality and respiratory diseases in California" +``` + +### Temporal Analysis +``` +"Show me trends in air quality vs disease rates over the past year" +``` + +### Geographic Comparison +``` +"Compare air quality and disease patterns across different states" +``` + +## Files Created + +1. `multi_tool_agent_bquery_tools/agents/analytics_agent.py` - Main agent definition +2. `multi_tool_agent_bquery_tools/agents/analytics_prompts.py` - Instruction prompts +3. `multi_tool_agent_bquery_tools/agent.py` - Updated to include analytics agent + +## Integration + +The analytics agent is automatically integrated into the root agent's sub-agents list and will be routed to when users ask analytical questions spanning multiple datasets. diff --git a/multi_tool_agent_bquery_tools/.env b/multi_tool_agent_bquery_tools/.env index 737b1adf..90743284 100644 --- a/multi_tool_agent_bquery_tools/.env +++ b/multi_tool_agent_bquery_tools/.env @@ -2,17 +2,17 @@ # Choose one of the following options: # Option 1: Google AI Studio (Free tier) - RECOMMENDED FOR QUICKSTART -GOOGLE_GENAI_USE_VERTEXAI=FALSE +GOOGLE_GENAI_USE_VERTEXAI=TRUE GOOGLE_API_KEY=AIzaSyAcq1AUFa-n4l_vmwtb3-DP1YpXzOj-zGM # Option 2: Google Cloud Vertex AI # GOOGLE_GENAI_USE_VERTEXAI=TRUE -# GOOGLE_CLOUD_PROJECT=YOUR_PROJECT_ID -# GOOGLE_CLOUD_LOCATION=LOCATION +GOOGLE_CLOUD_PROJECT=qwiklabs-gcp-00-4a7d408c735c +GOOGLE_CLOUD_LOCATION=us-central1 # Option 3: Vertex AI Express Mode (Free tier) # GOOGLE_GENAI_USE_VERTEXAI=TRUE -# GOOGLE_API_KEY=PASTE_YOUR_ACTUAL_EXPRESS_MODE_API_KEY_HERE +GOOGLE_API_KEY=PASTE_YOUR_ACTUAL_EXPRESS_MOGOOGLE_API_KEY=AIzaSyALQGawG7iVNjJhG8v5w3Z_eyt5oRdMCvk # BigQuery Configuration (for air quality data) # If using Google AI Studio, you can still access BigQuery public datasets diff --git a/multi_tool_agent_bquery_tools/agent.py b/multi_tool_agent_bquery_tools/agent.py index b8a6ceb2..1dedf15e 100644 --- a/multi_tool_agent_bquery_tools/agent.py +++ b/multi_tool_agent_bquery_tools/agent.py @@ -1,4 +1,5 @@ # ./agent.py +# -*- coding: utf-8 -*- import os import asyncio from google.adk.agents import Agent @@ -16,6 +17,13 @@ from .agents.psa_video import create_psa_video_agents from .tools.health_tools import get_health_faq +# Try to import analytics agent, use None if it fails +try: + from .agents.analytics_agent import analytics_agent +except Exception as e: + print(f"[WARNING] Analytics agent not available: {e}") + analytics_agent = None + # === Model configuration === GEMINI_MODEL = "gemini-2.0-flash" @@ -37,7 +45,8 @@ "3. [DISEASES] Infectious Disease Tracking - County-level CDC data\n" "4. [CLINICS] Find nearby clinics or doctors using Google Search\n" "5. [HEALTH] General wellness, hygiene, and preventive care advice\n" - "6. [PSA VIDEOS] Generate and share public health announcement videos\n\n" + "6. [ANALYTICS] Cross-dataset analysis across air quality and disease data\n" + "7. [PSA VIDEOS] Generate and share public health announcement videos\n\n" "What would you like to know about today?\"\n\n" "Routing Rules:\n" "- Mentions of 'live', 'today', 'current', or 'now' → live_air_quality_agent.\n" @@ -47,6 +56,7 @@ "(e.g., 'I have a rash', 'I feel dizzy', 'my tooth hurts', 'I cut my hand', " "'my child is sick'), route to clinic_finder_agent." "- General health, hygiene, prevention, wellness, or safety advice → health_faq_agent.\n" + "- Analytical questions spanning multiple datasets, correlations, trends, or complex analysis → analytics_agent.\n" "- Requests to create PSA videos, announcements, or post to social media → PSA video agents.\n\n" "Process:\n" "1. If clinic_finder_agent provides a search phrase (e.g., 'dermatologist near San Jose'), " @@ -61,7 +71,7 @@ infectious_diseases_agent, clinic_finder_agent, health_faq_agent, - ] + psa_agents, # Add PSA video agents (ActionLine, VeoPrompt, Twitter) + ] + ([analytics_agent] if analytics_agent else []) + psa_agents # Add PSA video agents (ActionLine, VeoPrompt, Twitter) ) # === Runner & Session Setup === diff --git a/multi_tool_agent_bquery_tools/agents/analytics_agent.py b/multi_tool_agent_bquery_tools/agents/analytics_agent.py new file mode 100644 index 00000000..0e813868 --- /dev/null +++ b/multi_tool_agent_bquery_tools/agents/analytics_agent.py @@ -0,0 +1,41 @@ +import os +from google.adk.agents import Agent +from .analytics_prompts import return_instructions_analytics + +# Import tools from other agents to get data +from ..tools.air_quality_tool import get_air_quality +from ..tools.live_air_quality_tool import get_live_air_quality +from ..tools.disease_tools import get_infectious_disease_data + +GEMINI_MODEL = "gemini-2.0-flash" + +# Try to use VertexAI code executor, fall back to None if not available +# (Agent will still work without code executor for basic data retrieval) +code_executor = None +try: + # Check if running with Google AI Studio (no VertexAI needed) + use_vertex_ai = os.getenv('GOOGLE_GENAI_USE_VERTEXAI', '').lower() == 'true' + + if use_vertex_ai: + from google.adk.code_executors import VertexAiCodeExecutor + code_executor = VertexAiCodeExecutor( + optimize_data_file=True, + stateful=True, + ) + print("[OK] Analytics agent using VertexAI code executor") + else: + print("[INFO] Running with Google AI Studio - code executor disabled") + print("[INFO] Analytics agent will work without code execution (data retrieval only)") +except Exception as e: + print(f"[WARNING] VertexAI code executor not available: {e}") + print("[INFO] Analytics agent will work without code execution (data retrieval only)") + code_executor = None + +analytics_agent = Agent( + name="analytics_agent", + model=GEMINI_MODEL, + description="Analytics agent that performs cross-dataset analysis across air quality and disease data. Provides statistical analysis, correlations, and insights.", + instruction=return_instructions_analytics(), + code_executor=code_executor, + tools=[get_air_quality, get_live_air_quality, get_infectious_disease_data], +) diff --git a/multi_tool_agent_bquery_tools/agents/analytics_prompts.py b/multi_tool_agent_bquery_tools/agents/analytics_prompts.py new file mode 100644 index 00000000..10ad506b --- /dev/null +++ b/multi_tool_agent_bquery_tools/agents/analytics_prompts.py @@ -0,0 +1,73 @@ +"""Module for storing analytics agent instructions.""" + +def return_instructions_analytics() -> str: + instruction_prompt_analytics = """ +# Guidelines + +**Objective:** Assist the user in achieving their data analysis goals by performing +analytics across multiple datasets (air quality and infectious disease data), with +emphasis on avoiding assumptions and ensuring accuracy. + +**Available Data Sources:** +1. **Historical Air Quality Data** - EPA Historical Air Quality dataset via `get_air_quality()` +2. **Live Air Quality Data** - Real-time data via AirNow API using `get_live_air_quality()` +3. **Infectious Disease Data** - CDC BEAM data via `get_infectious_disease_data()` + +**CRITICAL: DO NOT GENERATE PYTHON CODE** +You do NOT have Python code execution available. You can ONLY: +1. Use TOOL CALLS to fetch data (get_air_quality, get_infectious_disease_data, etc.) +2. Analyze the data returned from tools using text-based analysis +3. Provide insights based on the tool responses + +**IMPORTANT:** You CANNOT call functions like `default_api.get_air_quality()` in code. +You MUST use the provided tools (not Python functions) to fetch data. + +**Data Acquisition - USE TOOLS ONLY:** +1. Use tool `get_air_quality()` to fetch historical air quality data +2. Use tool `get_live_air_quality()` for current air quality readings +3. Use tool `get_infectious_disease_data()` for CDC disease data + +**How to work with data:** +- Call tools to get data (they return structured responses) +- Read the returned data from tool responses +- Perform manual analysis (summarize, compare, identify trends) +- Look for patterns: correlations, seasonal trends, geographic differences +- Present findings in clear, organized text + +**No Assumptions:** **Crucially, avoid making assumptions about the nature of +the data or column names.** Base findings solely on the data itself. Always +explore the data structure first before analysis. + +**Answerability:** Some queries may not be answerable with the available data. +In those cases, inform the user why you cannot process their query and +suggest what type of data would be needed to fulfill their request. + +TASK: +You need to assist the user with their queries by: +1. Fetching data from available sources using TOOL CALLS (not Python code) +2. Analyzing the returned data using text-based analysis +3. Looking for patterns, correlations, trends in the data +4. Presenting clear, actionable insights and recommendations + +**IMPORTANT:** After gathering sufficient data, you MUST provide analysis and insights. +Do NOT ask the user for more data or years repeatedly. Instead: +- If you have data, analyze it immediately +- Look for patterns, trends, and correlations +- Present your findings clearly +- Make recommendations based on the data + +**Tool Response Format:** +Tool responses are in JSON format. Access data like: +- `response['data']['total_cases']` - total cases +- `response['data']['diseases'][0]['cases']` - first disease cases +- Look for 'report' field for formatted text summaries + +**Cross-Dataset Analysis:** +When analyzing relationships between air quality and disease data: +- Correlate air quality metrics (AQI, PM2.5) with disease rates +- Identify temporal patterns across both datasets +- Compare geographic trends +- Look for causal relationships (be cautious about claiming causation) + +""" + return instruction_prompt_analytics diff --git a/multi_tool_agent_bquery_tools/tools/air_quality_tool.py b/multi_tool_agent_bquery_tools/tools/air_quality_tool.py index 1cb04f52..ee37813b 100644 --- a/multi_tool_agent_bquery_tools/tools/air_quality_tool.py +++ b/multi_tool_agent_bquery_tools/tools/air_quality_tool.py @@ -39,9 +39,14 @@ def infer_state_from_county(county): def get_air_quality(county: Optional[str] = None, state: Optional[str] = None, city: Optional[str] = None, - year: Optional[int] = None, month: Optional[int] = None, day: Optional[int] = None, + year: Optional[int] = None, start_year: Optional[int] = None, end_year: Optional[int] = None, + month: Optional[int] = None, day: Optional[int] = None, days_back: Optional[int] = None) -> dict: - """Retrieves air quality data from EPA Historical Air Quality BigQuery dataset.""" + """Retrieves air quality data from EPA Historical Air Quality BigQuery dataset. + + Supports both single year (year parameter) and year ranges (start_year/end_year). + Use year for a single year, or start_year + end_year for a range (e.g., 2019-2021). + """ try: # Handle state inference from county if county and not state: @@ -66,8 +71,19 @@ def get_air_quality(county: Optional[str] = None, state: Optional[str] = None, c if days_back is not None: year, month, day = handle_relative_dates(days_back) - # Set default year if not provided - if year is None: + # Handle year range vs single year + if start_year and end_year: + # Year range provided + year_range = list(range(start_year, end_year + 1)) + use_year_range = True + elif year: + # Single year provided + year_range = [year] + use_year_range = False + else: + # No year provided, default to recent + year_range = [2020] + use_year_range = False year = 2020 # Query real EPA data from public BigQuery dataset @@ -78,13 +94,20 @@ def get_air_quality(county: Optional[str] = None, state: Optional[str] = None, c where_conditions.append(f"county_name = '{county}'") if city: where_conditions.append(f"city_name = '{city}'") + + # Date conditions if year and month and day: where_conditions.append(f"date_local = DATE({year}, {month}, {day})") elif year and month: where_conditions.append(f"EXTRACT(YEAR FROM date_local) = {year}") where_conditions.append(f"EXTRACT(MONTH FROM date_local) = {month}") - elif year: - where_conditions.append(f"EXTRACT(YEAR FROM date_local) = {year}") + elif use_year_range and len(year_range) > 1: + # Multiple years - use IN clause + year_list = ','.join(map(str, year_range)) + where_conditions.append(f"EXTRACT(YEAR FROM date_local) IN ({year_list})") + elif use_year_range or year: + # Single year or first year in range + where_conditions.append(f"EXTRACT(YEAR FROM date_local) = {year_range[0]}") where_clause = " AND ".join(where_conditions) if where_conditions else f"EXTRACT(YEAR FROM date_local) = {year}" diff --git a/multi_tool_agent_bquery_tools/tools/disease_tools.py b/multi_tool_agent_bquery_tools/tools/disease_tools.py index 84021ebe..ec698613 100644 --- a/multi_tool_agent_bquery_tools/tools/disease_tools.py +++ b/multi_tool_agent_bquery_tools/tools/disease_tools.py @@ -11,8 +11,13 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[str] = None, - disease: Optional[str] = None, year: Optional[int] = None) -> dict: - """Retrieves infectious disease data from CDC BEAM BigQuery dataset.""" + disease: Optional[str] = None, year: Optional[int] = None, + start_year: Optional[int] = None, end_year: Optional[int] = None) -> dict: + """Retrieves infectious disease data from CDC BEAM BigQuery dataset. + + Supports both single year (year parameter) and year ranges (start_year/end_year). + Use year for a single year, or start_year + end_year for a range (e.g., 2019-2021). + """ try: # Handle state inference from county if county and not state: @@ -60,7 +65,11 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st where_conditions.append(f"State = '{state_abbrev}'") if disease: where_conditions.append(f"LOWER(Pathogen) LIKE LOWER('%{disease}%')") - if year: + + # Handle year range vs single year + if start_year and end_year: + where_conditions.append(f"Year >= {start_year} AND Year <= {end_year}") + elif year: where_conditions.append(f"Year = {year}") else: where_conditions.append("Year = 2025") # Default to recent data From 2e57a651adfd97d049affaf7310df5cf223bc1c9 Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Sat, 25 Oct 2025 23:04:50 -0700 Subject: [PATCH 2/5] Remove .env from tracking and add to .gitignore --- .gitignore | 3 ++- multi_tool_agent_bquery_tools/.env | 25 ------------------------- 2 files changed, 2 insertions(+), 26 deletions(-) delete mode 100644 multi_tool_agent_bquery_tools/.env diff --git a/.gitignore b/.gitignore index 58126d92..dd57e48f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ env/ venv/ ENV/ .env +/.env .venv *.log .DS_Store @@ -22,4 +23,4 @@ TWITTER_INTEGRATION_COMPLETE.md TWITTER_QUICK_START.md INTEGRATION_PLAN.md INTEGRATION_SUMMARY.md -test_integration.py \ No newline at end of file +test_integration.py diff --git a/multi_tool_agent_bquery_tools/.env b/multi_tool_agent_bquery_tools/.env deleted file mode 100644 index 90743284..00000000 --- a/multi_tool_agent_bquery_tools/.env +++ /dev/null @@ -1,25 +0,0 @@ -# Model configuration -# Choose one of the following options: - -# Option 1: Google AI Studio (Free tier) - RECOMMENDED FOR QUICKSTART -GOOGLE_GENAI_USE_VERTEXAI=TRUE -GOOGLE_API_KEY=AIzaSyAcq1AUFa-n4l_vmwtb3-DP1YpXzOj-zGM - -# Option 2: Google Cloud Vertex AI -# GOOGLE_GENAI_USE_VERTEXAI=TRUE -GOOGLE_CLOUD_PROJECT=qwiklabs-gcp-00-4a7d408c735c -GOOGLE_CLOUD_LOCATION=us-central1 - -# Option 3: Vertex AI Express Mode (Free tier) -# GOOGLE_GENAI_USE_VERTEXAI=TRUE -GOOGLE_API_KEY=PASTE_YOUR_ACTUAL_EXPRESS_MOGOOGLE_API_KEY=AIzaSyALQGawG7iVNjJhG8v5w3Z_eyt5oRdMCvk - -# BigQuery Configuration (for air quality data) -# If using Google AI Studio, you can still access BigQuery public datasets -# If using Vertex AI, set your project ID: -# GOOGLE_CLOUD_PROJECT=YOUR_PROJECT_ID - -# For BigQuery authentication, you can either: -# 1. Use the same API key (if using Google AI Studio) -# 2. Use Application Default Credentials: gcloud auth application-default login -# 3. Set service account key: GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json From 25e94c9f9d1b713deada4056279025d23750e0fc Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Sat, 25 Oct 2025 23:08:21 -0700 Subject: [PATCH 3/5] Add .env to .gitignore and ensure it's untracked --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index dd57e48f..5fb8d959 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ env/ venv/ ENV/ .env -/.env +*.env .venv *.log .DS_Store From a1af7f1f1d0becdd92610cee1e76bcbf79d9c76a Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Sun, 26 Oct 2025 11:32:38 -0700 Subject: [PATCH 4/5] Fix --- .../tools/disease_tools.py | 186 ++++++++++++------ 1 file changed, 124 insertions(+), 62 deletions(-) diff --git a/multi_tool_agent_bquery_tools/tools/disease_tools.py b/multi_tool_agent_bquery_tools/tools/disease_tools.py index ec698613..df2ed0e8 100644 --- a/multi_tool_agent_bquery_tools/tools/disease_tools.py +++ b/multi_tool_agent_bquery_tools/tools/disease_tools.py @@ -1,14 +1,25 @@ import os import random import google.auth -from google.adk.tools.bigquery import BigQueryCredentialsConfig, BigQueryToolset -from google.adk.tools.bigquery.config import BigQueryToolConfig, WriteMode +from google.cloud import bigquery from ..tools.common_utils import COUNTY_STATE_MAPPING, infer_state_from_county from typing import Optional, Tuple, Dict, List INFECTIOUS_DISEASES = ["Salmonella", "E. coli", "Norovirus", "Hepatitis A", "Giardia", "Cryptosporidium"] +# Disease synonym mapping - maps user-friendly names to actual CDC BEAM database names +DISEASE_SYNONYMS = { + "e. coli": "STEC", + "e coli": "STEC", + "escherichia coli": "STEC", + "e.coli": "STEC", + "stec": "STEC", + "stec o157": "STEC", + "shiga toxin-producing e. coli": "STEC", + "shiga toxin producing e coli": "STEC", +} + def get_infectious_disease_data(county: Optional[str] = None, state: Optional[str] = None, disease: Optional[str] = None, year: Optional[int] = None, @@ -21,6 +32,8 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st try: # Handle state inference from county if county and not state: + print(f"[DISEASE] Received county query: {county}") + print(f"[DISEASE] Note: CDC BEAM data is at STATE level, not county level. Will query state data for county's state.") inferred_state, is_ambiguous = infer_state_from_county(county) if is_ambiguous: county_lower = county.lower().strip() @@ -33,6 +46,12 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st } elif inferred_state: state = inferred_state + print(f"[DISEASE] Mapped county '{county}' to state '{state}'") + else: + print(f"[DISEASE] Warning: Could not infer state from county '{county}'") + + if county: + print(f"[DISEASE] Warning: County '{county}' specified but CDC data is state-level only. Querying state-level data.") # Get state abbreviation for query state_abbrev = None @@ -54,7 +73,7 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st 'Wisconsin': 'WI', 'Wyoming': 'WY' } state_abbrev = state_map.get(state, state[:2].upper() if len(state) > 2 else state.upper()) - print(f"[DISEASE] Querying for state: {state} -> {state_abbrev}") + print(f"[DISEASE] Query parameters: state={state} ({state_abbrev}), county={county}, disease={disease}, year={year}, start={start_year}, end={end_year}") # Query CDC BEAM dataset project_id = os.getenv("GOOGLE_CLOUD_PROJECT", "qwiklabs-gcp-00-4a7d408c735c") @@ -64,6 +83,13 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st if state_abbrev: where_conditions.append(f"State = '{state_abbrev}'") if disease: + # Check for disease synonym + disease_lower = disease.lower().strip() + mapped_disease = DISEASE_SYNONYMS.get(disease_lower, disease) + if mapped_disease != disease: + print(f"[DISEASE] Mapped disease '{disease}' to '{mapped_disease}'") + disease = mapped_disease + where_conditions.append(f"LOWER(Pathogen) LIKE LOWER('%{disease}%')") # Handle year range vs single year @@ -81,66 +107,69 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st Year, Month, State, - Source_Type, + `Source Type`, Pathogen, - Serotype_or_Species, - SUM(Number_of_isolates) as total_cases + `Serotype or Species`, + SUM(`Number of isolates`) as total_cases FROM `{project_id}.beam_report_data_folder.beam_report_data` WHERE {where_clause} - GROUP BY Year, Month, State, Source_Type, Pathogen, Serotype_or_Species + GROUP BY Year, Month, State, `Source Type`, Pathogen, `Serotype or Species` ORDER BY total_cases DESC LIMIT 50 """ - # Execute query + # Execute query using standard BigQuery client try: - application_default_credentials, _ = google.auth.default() - credentials_config = BigQueryCredentialsConfig( - credentials=application_default_credentials - ) - tool_config = BigQueryToolConfig(write_mode=WriteMode.BLOCKED) + # Debug: Print the full query + print(f"[DISEASE] Executing query on project: {project_id}") + print(f"[DISEASE] Full query:") + print(query) - bigquery_toolset = BigQueryToolset( - credentials_config=credentials_config, - bigquery_tool_config=tool_config - ) + # Initialize BigQuery client + bq_client = bigquery.Client(project=project_id) - result = bigquery_toolset.execute_sql( - project_id=project_id, - query=query - ) + # Execute query + query_job = bq_client.query(query) + results = query_job.result() + data = list(results) - if result.status == "success" and result.data: + print(f"[DISEASE] Query returned {len(data)} rows") + + if data: # Process real data report_data = [] total_cases = 0 - for row in result.data[:10]: # Top 10 pathogens - cases = int(row.get('total_cases', 0)) - pathogen = row.get('Pathogen', 'Unknown') - source = row.get('Source_Type', 'Unknown') + for row in data[:10]: # Top 10 pathogens + row_dict = dict(row) + cases = int(row_dict.get('total_cases', 0)) + pathogen = row_dict.get('Pathogen', 'Unknown') + source = row_dict.get('Source Type', 'Unknown') report_data.append({ "disease": pathogen, "cases": cases, "source": source, - "serotype": row.get('Serotype_or_Species', 'N/A') + "serotype": row_dict.get('Serotype or Species', 'N/A') }) total_cases += cases - location_desc = f"{state}" if state else "All States" - year_text = f" in {year}" if year else " in 2025" + location_desc = f"{county}, {state}" if county and state else f"{state}" if state else "All States" + year_text = f" in {year}" if year else f" in {start_year}-{end_year}" if start_year and end_year else " in 2025" + + # Add note about county if specified + county_note = f"\nNote: Data shown is for {state} state level. County-specific data is not available in CDC BEAM dataset.\n" if county else "" - report = f"""Infectious Disease Report for {location_desc}{year_text}: + report = f"""Infectious Disease Report for {location_desc}{year_text}:{county_note} (Data from CDC BEAM Dashboard via BigQuery) Total Cases Reported: {total_cases} Disease Breakdown:""" - for data in report_data: + for disease_data in report_data: report += f""" -- {data['disease']}: {data['cases']} isolates (Source: {data['source']})""" +- {disease_data['disease']}: {disease_data['cases']} isolates (Source: {disease_data['source']})""" report += f""" @@ -157,51 +186,84 @@ def get_infectious_disease_data(county: Optional[str] = None, state: Optional[st } } else: - # Fallback to mock data if query fails - raise Exception("No data returned from BigQuery") + # No data returned - try to help user with available data + print(f"[DISEASE] Query returned no data rows") + + # Try to get available pathogens for this query + try: + help_query = f""" + SELECT DISTINCT Pathogen + FROM `{project_id}.beam_report_data_folder.beam_report_data` + WHERE {where_clause.split(' AND LOWER(Pathogen)')[0]} -- Remove disease filter + LIMIT 20 + """ + help_job = bq_client.query(help_query) + available_pathogens = [row[0] for row in help_job.result()] + + if available_pathogens: + print(f"[DISEASE] Available pathogens for this query: {', '.join(available_pathogens)}") + available_msg = f" Available pathogens: {', '.join(available_pathogens)}" + else: + available_msg = "" + except Exception as e: + print(f"[DISEASE] Could not fetch available pathogens: {e}") + available_msg = "" + + raise Exception(f"No data returned from BigQuery.{available_msg}") except Exception as query_error: - print(f"[DISEASE] BigQuery error: {query_error}") + error_str = str(query_error) + print(f"[DISEASE] BigQuery error: {error_str}") print(f"[DISEASE] Query was: {query[:200]}...") + + # Check if error contains available pathogens info + if "Available pathogens:" in error_str: + # Don't use mock data - return helpful error + return { + "status": "error", + "error_message": f"No data found for disease '{disease or 'specified disease'}' in the CDC BEAM database. {error_str}", + "suggestion": "Try querying for: STEC, Salmonella, Campylobacter, Shigella, or Vibrio" + } + print(f"[DISEASE] Falling back to mock data") # Generate mock data as fallback location_desc = f"{county}, {state}" if county and state else state if state else "Demo Location" diseases_to_report = [disease] if disease else random.sample(INFECTIOUS_DISEASES, 3) - - report_data = [] - total_cases = 0 - - for disease_name in diseases_to_report: - cases = random.randint(15, 250) - report_data.append({ - "disease": disease_name, - "cases": cases, + + report_data = [] + total_cases = 0 + + for disease_name in diseases_to_report: + cases = random.randint(15, 250) + report_data.append({ + "disease": disease_name, + "cases": cases, "source": "Mock Data" - }) - total_cases += cases - + }) + total_cases += cases + year_text = f" in {year}" if year else " (demo data)" - - report = f"""Infectious Disease Report for {location_desc}{year_text}: + + report = f"""Infectious Disease Report for {location_desc}{year_text}: (Demo Mode - Real data requires BigQuery access) Total Cases Reported: {total_cases} Disease Breakdown:""" - - for data in report_data: - report += f""" -- {data['disease']}: {data['cases']} cases""" - - return { - "status": "success", - "report": report, - "data": { - "location": location_desc, - "total_cases": total_cases, - "diseases": report_data + + for disease_data in report_data: + report += f""" +- {disease_data['disease']}: {disease_data['cases']} cases""" + + return { + "status": "success", + "report": report, + "data": { + "location": location_desc, + "total_cases": total_cases, + "diseases": report_data + } } - } except Exception as e: return { From 3fdf281fed4df4ccc70f99a41adf9f00a0803f83 Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Sun, 26 Oct 2025 12:03:38 -0700 Subject: [PATCH 5/5] Input from the UI form - location and time, default time set to 2024 to 2025 Add a current time to the global prompt so it always uses the current time for all Agents Fix the diseases agent to call Bigquery each time Analytics agent integration --- app.py | 26 +++- multi_tool_agent_bquery_tools/agent.py | 165 ++++++++++++++++++------- static/js/app.js | 65 +++++++++- 3 files changed, 207 insertions(+), 49 deletions(-) diff --git a/app.py b/app.py index ba73d7cc..0ca51053 100644 --- a/app.py +++ b/app.py @@ -303,6 +303,8 @@ def agent_chat(): request_data = request.get_json() question = request_data.get('question', '') + location_context = request_data.get('location_context', None) + time_frame = request_data.get('time_frame', None) if not question: return jsonify({ @@ -310,6 +312,26 @@ def agent_chat(): 'error': 'No question provided' }), 400 + # Add location context to the question if available + if location_context: + location_info = [] + if location_context.get('city'): + location_info.append(f"City: {location_context['city']}") + if location_context.get('state'): + location_info.append(f"State: {location_context['state']}") + if location_context.get('county'): + location_info.append(f"County: {location_context['county']}") + if location_context.get('zipCode'): + location_info.append(f"ZIP Code: {location_context['zipCode']}") + if location_context.get('formattedAddress'): + location_info.append(f"Address: {location_context['formattedAddress']}") + + if location_info: + location_text = " | ".join(location_info) + enhanced_question = f"User Location Context: {location_text}\n\nUser Question: {question}" + print(f"[CHAT] Enhanced question with location context: {enhanced_question}") + question = enhanced_question + # Check if user wants to generate PSA video video_keywords = ['create video', 'generate psa', 'make video', 'create psa', 'video psa', 'psa video'] wants_video = any(keyword in question.lower() for keyword in video_keywords) @@ -391,8 +413,8 @@ def agent_chat(): traceback.print_exc() # Fall through to normal chat - # Normal chat flow - response = call_adk_agent(question) + # Normal chat flow - pass context to agent + response = call_adk_agent(question, location_context=location_context, time_frame=time_frame) return jsonify({ 'success': True, diff --git a/multi_tool_agent_bquery_tools/agent.py b/multi_tool_agent_bquery_tools/agent.py index 1dedf15e..7fcbac84 100644 --- a/multi_tool_agent_bquery_tools/agent.py +++ b/multi_tool_agent_bquery_tools/agent.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import os import asyncio +from datetime import datetime from google.adk.agents import Agent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService @@ -27,52 +28,116 @@ # === Model configuration === GEMINI_MODEL = "gemini-2.0-flash" +def get_current_time_context(): + """Generate current time context for the agent""" + now = datetime.now() + current_time = now.strftime("%A, %B %d, %Y at %I:%M %p") + current_date = now.strftime("%Y-%m-%d") + current_year = now.year + + return f""" +CURRENT TIME CONTEXT: +- Current Date & Time: {current_time} +- Current Date (ISO): {current_date} +- Current Year: {current_year} + +IMPORTANT: Always reference the current time when providing health advice, especially for: +- Seasonal health recommendations +- Time-sensitive health alerts +- Current weather conditions affecting health +- Recent data trends and patterns +""" + +def create_root_agent_with_context(location_context=None, time_frame=None): + """Create the root agent with dynamic context including current time, location, and time frame""" + + # Get current time context + time_context = get_current_time_context() + + # Build location context if provided + location_info = "" + if location_context: + location_parts = [] + if location_context.get('city'): + location_parts.append(f"City: {location_context['city']}") + if location_context.get('state'): + location_parts.append(f"State: {location_context['state']}") + if location_context.get('county'): + location_parts.append(f"County: {location_context['county']}") + if location_context.get('zipCode'): + location_parts.append(f"ZIP Code: {location_context['zipCode']}") + if location_context.get('formattedAddress'): + location_parts.append(f"Address: {location_context['formattedAddress']}") + + if location_parts: + location_info = f""" +USER LOCATION CONTEXT: +- {', '.join(location_parts)} +- Coordinates: {location_context.get('coordinates', {}).get('lat', 'N/A')}, {location_context.get('coordinates', {}).get('lng', 'N/A')} +""" + + # Build time frame context if provided + time_frame_info = "" + if time_frame: + time_frame_info = f""" +DATA TIME FRAME CONTEXT: +- Start Date: {time_frame.get('start_date', 'Not specified')} +- End Date: {time_frame.get('end_date', 'Not specified')} +- Analysis Period: {time_frame.get('period', 'Not specified')} +""" + + # Combine all context + global_context = f"{time_context}{location_info}{time_frame_info}" + + return Agent( + name="community_health_assistant", + model=GEMINI_MODEL, + description="Main community health assistant that routes queries to specialized sub-agents.", + global_instruction=global_context, + instruction=( + "You are a friendly Community Health & Wellness Assistant. " + "When a user greets you, respond warmly with this menu:\n\n" + "\"Welcome to the Community Health & Wellness Assistant!\n\n" + "I can help you with:\n" + "1. [LIVE AIR QUALITY] Check current air quality via the AirNow API\n" + "2. [HISTORICAL AIR QUALITY] View past PM2.5 data from EPA BigQuery\n" + "3. [DISEASES] Infectious Disease Tracking - County-level CDC data\n" + "4. [CLINICS] Find nearby clinics or doctors using Google Search\n" + "5. [HEALTH] General wellness, hygiene, and preventive care advice\n" + "6. [ANALYTICS] Cross-dataset analysis across air quality and disease data\n" + "7. [PSA VIDEOS] Generate and share public health announcement videos\n\n" + "What would you like to know about today?\"\n\n" + "Routing Rules:\n" + "- Mentions of 'live', 'today', 'current', or 'now' → live_air_quality_agent.\n" + "- Questions mentioning years, months, or historical data → air_quality_agent.\n" + "- Mentions of infections, outbreaks, or diseases → infectious_diseases_agent.\n" + "- If the user describes symptoms or feeling unwell " + "(e.g., 'I have a rash', 'I feel dizzy', 'my tooth hurts', 'I cut my hand', " + "'my child is sick'), route to clinic_finder_agent." + "- General health, hygiene, prevention, wellness, or safety advice → health_faq_agent.\n" + "- Analytical questions spanning multiple datasets, correlations, trends, or complex analysis → analytics_agent.\n" + "- Requests to create PSA videos, announcements, or post to social media → PSA video agents.\n\n" + "Process:\n" + "1. If clinic_finder_agent provides a search phrase (e.g., 'dermatologist near San Jose'), " + "use google_search with that phrase.\n" + "2. Summarize the top 3–5 results clearly with clinic names and addresses.\n\n" + "After any response (from you or a sub-agent), always end with: " + "'Is there anything else I can help you with today?'" + ), + sub_agents=[ + air_quality_agent, + live_air_quality_agent, + infectious_diseases_agent, + clinic_finder_agent, + health_faq_agent, + ] + ([analytics_agent] if analytics_agent else []) + psa_agents # Add PSA video agents (ActionLine, VeoPrompt, Twitter) + ) + # === Create PSA Video Agents === psa_agents = create_psa_video_agents(model=GEMINI_MODEL, tools_module=None) -# === Root Agent Definition === -root_agent = Agent( - name="community_health_assistant", - model=GEMINI_MODEL, - description="Main community health assistant that routes queries to specialized sub-agents.", - instruction=( - "You are a friendly Community Health & Wellness Assistant. " - "When a user greets you, respond warmly with this menu:\n\n" - "\"Welcome to the Community Health & Wellness Assistant!\n\n" - "I can help you with:\n" - "1. [LIVE AIR QUALITY] Check current air quality via the AirNow API\n" - "2. [HISTORICAL AIR QUALITY] View past PM2.5 data from EPA BigQuery\n" - "3. [DISEASES] Infectious Disease Tracking - County-level CDC data\n" - "4. [CLINICS] Find nearby clinics or doctors using Google Search\n" - "5. [HEALTH] General wellness, hygiene, and preventive care advice\n" - "6. [ANALYTICS] Cross-dataset analysis across air quality and disease data\n" - "7. [PSA VIDEOS] Generate and share public health announcement videos\n\n" - "What would you like to know about today?\"\n\n" - "Routing Rules:\n" - "- Mentions of 'live', 'today', 'current', or 'now' → live_air_quality_agent.\n" - "- Questions mentioning years, months, or historical data → air_quality_agent.\n" - "- Mentions of infections, outbreaks, or diseases → infectious_diseases_agent.\n" - "- If the user describes symptoms or feeling unwell " - "(e.g., 'I have a rash', 'I feel dizzy', 'my tooth hurts', 'I cut my hand', " - "'my child is sick'), route to clinic_finder_agent." - "- General health, hygiene, prevention, wellness, or safety advice → health_faq_agent.\n" - "- Analytical questions spanning multiple datasets, correlations, trends, or complex analysis → analytics_agent.\n" - "- Requests to create PSA videos, announcements, or post to social media → PSA video agents.\n\n" - "Process:\n" - "1. If clinic_finder_agent provides a search phrase (e.g., 'dermatologist near San Jose'), " - "use google_search with that phrase.\n" - "2. Summarize the top 3–5 results clearly with clinic names and addresses.\n\n" - "After any response (from you or a sub-agent), always end with: " - "'Is there anything else I can help you with today?'" - ), - sub_agents=[ - air_quality_agent, - live_air_quality_agent, - infectious_diseases_agent, - clinic_finder_agent, - health_faq_agent, - ] + ([analytics_agent] if analytics_agent else []) + psa_agents # Add PSA video agents (ActionLine, VeoPrompt, Twitter) -) +# === Default Root Agent (for backward compatibility) === +root_agent = create_root_agent_with_context() # === Runner & Session Setup === APP_NAME = "community_health_app" @@ -95,11 +160,21 @@ def _initialize_session_and_runner(): ) _runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=_session_service) -def call_agent(query: str) -> str: +def call_agent(query: str, location_context=None, time_frame=None) -> str: """Helper function to call the agent with a query and return the response.""" _initialize_session_and_runner() + + # Create agent with context if provided + if location_context or time_frame: + agent_with_context = create_root_agent_with_context(location_context, time_frame) + # Create a new runner with the context-aware agent + context_runner = Runner(agent=agent_with_context, app_name=APP_NAME, session_service=_session_service) + runner_to_use = context_runner + else: + runner_to_use = _runner + content = types.Content(role="user", parts=[types.Part(text=query)]) - events = _runner.run(user_id=USER_ID, session_id=SESSION_ID, new_message=content) + events = runner_to_use.run(user_id=USER_ID, session_id=SESSION_ID, new_message=content) for event in events: if event.is_final_response(): diff --git a/static/js/app.js b/static/js/app.js index 1fda60bd..1e57f6cc 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -335,6 +335,33 @@ async function askAI() { const loadingMsg = addMessage('Thinking...', 'bot'); try { + // Get stored location data for chat agent + const storedLocationData = localStorage.getItem('currentLocationData'); + let locationContext = null; + + if (storedLocationData) { + try { + locationContext = JSON.parse(storedLocationData); + console.log('[Chat] Using stored location data:', locationContext); + } catch (e) { + console.warn('[Chat] Failed to parse stored location data:', e); + } + } + + // Get time frame from date inputs + const startDateInput = document.getElementById('startDate'); + const endDateInput = document.getElementById('endDate'); + let timeFrame = null; + + if (startDateInput && endDateInput && startDateInput.value && endDateInput.value) { + timeFrame = { + start_date: startDateInput.value, + end_date: endDateInput.value, + period: `${startDateInput.value} to ${endDateInput.value}` + }; + console.log('[Chat] Using time frame:', timeFrame); + } + // Try ADK agent first const response = await fetch('/api/agent-chat', { method: 'POST', @@ -344,7 +371,9 @@ async function askAI() { body: JSON.stringify({ question: question, state: currentState, - days: currentDays + days: currentDays, + location_context: locationContext, + time_frame: timeFrame }) }); @@ -356,7 +385,39 @@ async function askAI() { if (data.success) { // Add agent badge if available const agentBadge = data.agent ? `
via ${data.agent}
` : ''; - addMessage(data.response + agentBadge, 'bot'); + + // Add context indicators if available + let contextIndicators = ''; + if (locationContext) { + const locationText = [locationContext.city, locationContext.state, locationContext.zipCode].filter(Boolean).join(', '); + contextIndicators += `
+ + Using location: ${locationText} +
`; + } + + if (timeFrame) { + contextIndicators += `
+ + Using time frame: ${timeFrame.period} +
`; + } + + // Add current time indicator + const now = new Date(); + const currentTime = now.toLocaleString('en-US', { + weekday: 'short', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit' + }); + contextIndicators += `
+ + Current time: ${currentTime} +
`; + + addMessage(data.response + agentBadge + contextIndicators, 'bot'); // If video generation started, begin polling if (data.task_id) {