From 92e05f53c63b77c0ccaf6569b8aa9a37363f94d2 Mon Sep 17 00:00:00 2001
From: Matheus Buniotto <matheus.buniotto@gmail.com>
Date: Mon, 2 Feb 2026 21:58:47 -0300
Subject: [PATCH] fix image input

---
 llm_council_tool/llm_council.py       | 53 +++++++++++++--
 llm_council_tool/llm_council_pt.py    | 52 +++++++++++++--
 llm_council_tool/test_council_mock.py | 93 ++++++++++++++++++++++++---
 3 files changed, 178 insertions(+), 20 deletions(-)

diff --git a/llm_council_tool/llm_council.py b/llm_council_tool/llm_council.py
index 31e424d..c050b83 100644
--- a/llm_council_tool/llm_council.py
+++ b/llm_council_tool/llm_council.py
@@ -9,7 +9,7 @@
 import os
 import asyncio
 import re
-from typing import List, Dict, Any, Tuple, Optional
+from typing import List, Dict, Any, Tuple, Optional, Union
 from pydantic import BaseModel, Field
 import requests
 
@@ -192,6 +192,42 @@ async def _query_model_async(
         )
         return model, result
 
+    def _normalize_topic_to_content(
+        self, topic: Union[str, List[Dict[str, Any]]]
+    ) -> Union[str, List[Dict[str, Any]]]:
+        """
+        Normalize topic (string or OpenWebUI multimodal content) for API message content.
+        Returns content as-is for API: str or list of parts (text + image_url).
+        """
+        if isinstance(topic, str):
+            return topic
+        if isinstance(topic, list):
+            return topic if topic else ""
+        # Fallback: coerce to string (e.g. unexpected type)
+        return str(topic) if topic is not None else ""
+
+    def _topic_to_text(self, topic: Union[str, List[Dict[str, Any]]]) -> str:
+        """
+        Extract a plain-text representation of topic for use in prompts (ranking, chairman).
+        Handles string input and multimodal content (text + image parts).
+        """
+        if topic is None:
+            return ""
+        if isinstance(topic, str):
+            return topic
+        if not isinstance(topic, list):
+            return str(topic)
+        parts = []
+        for item in topic:
+            if not isinstance(item, dict):
+                continue
+            kind = item.get("type")
+            if kind == "text":
+                parts.append(item.get("text", ""))
+            elif kind == "image_url":
+                parts.append("[Image attached]")
+        return " ".join(parts).strip() or "[No text content]"
+
     def _parse_ranking_from_text(self, ranking_text: str) -> List[str]:
         """
         Extracts the ranking list from the model's text response.
@@ -241,7 +277,7 @@ def _get_available_models(self, api_key: str, base_url: str) -> List[str]:
 
     async def consult_council(
         self,
-        topic: str,
+        topic: Union[str, List[Dict[str, Any]]],
         __user__: Optional[dict] = None,
         __event_emitter__: Any = None,
     ) -> str:
@@ -250,7 +286,14 @@ async def consult_council(
         1. Council provides individual responses.
         2. Council ranks peer responses.
         3. Chairperson synthesizes the final answer.
+
+        topic: User input as plain text (str) or OpenWebUI multimodal content (list of
+               parts with "type": "text" and/or "type": "image_url"). Supports images.
         """
+        # Normalize topic for API (preserve images) and for text-only prompts
+        user_content = self._normalize_topic_to_content(topic)
+        topic_text = self._topic_to_text(topic)
+
         # Resolve API key and base URL (try OpenWebUI first, then fallback)
         api_key = self._resolve_api_key(__user__)
         base_url = self._resolve_base_url()
@@ -358,7 +401,7 @@ async def consult_council(
             False,
         )
 
-        stage1_messages = [{"role": "user", "content": topic}]
+        stage1_messages = [{"role": "user", "content": user_content}]
         tasks = [
             self._query_model_async(model, stage1_messages, api_key, base_url)
             for model in council_models_list
@@ -406,7 +449,7 @@ async def consult_council(
 
         ranking_prompt = f"""You are evaluating different responses to the following question:
 
-Question: {topic}
+Question: {topic_text}
 
 Here are the responses from different models (anonymized):
 
@@ -466,7 +509,7 @@ async def consult_council(
 
         chairman_prompt = f"""You are the Chairperson of an LLM Council.
         
-Original Question: {topic}
+Original Question: {topic_text}
 
 STAGE 1 - Individual Responses:
 {stage1_summary}
diff --git a/llm_council_tool/llm_council_pt.py b/llm_council_tool/llm_council_pt.py
index e879d72..2f4d678 100644
--- a/llm_council_tool/llm_council_pt.py
+++ b/llm_council_tool/llm_council_pt.py
@@ -9,7 +9,7 @@
 import os
 import asyncio
 import re
-from typing import List, Dict, Any, Tuple, Optional
+from typing import List, Dict, Any, Tuple, Optional, Union
 from pydantic import BaseModel, Field
 import requests
 
@@ -190,6 +190,41 @@ async def _query_model_async(
         )
         return model, result
 
+    def _normalize_topic_to_content(
+        self, topic: Union[str, List[Dict[str, Any]]]
+    ) -> Union[str, List[Dict[str, Any]]]:
+        """
+        Normaliza o topico (string ou conteudo multimodal do OpenWebUI) para o content da mensagem da API.
+        Retorna o content como esta para a API: str ou lista de partes (texto + image_url).
+        """
+        if isinstance(topic, str):
+            return topic
+        if isinstance(topic, list):
+            return topic if topic else ""
+        return str(topic) if topic is not None else ""
+
+    def _topic_to_text(self, topic: Union[str, List[Dict[str, Any]]]) -> str:
+        """
+        Extrai representacao em texto puro do topico para uso em prompts (ranking, presidente).
+        Aceita string e conteudo multimodal (texto + imagens).
+        """
+        if topic is None:
+            return ""
+        if isinstance(topic, str):
+            return topic
+        if not isinstance(topic, list):
+            return str(topic)
+        parts = []
+        for item in topic:
+            if not isinstance(item, dict):
+                continue
+            kind = item.get("type")
+            if kind == "text":
+                parts.append(item.get("text", ""))
+            elif kind == "image_url":
+                parts.append("[Imagem anexada]")
+        return " ".join(parts).strip() or "[Sem conteudo de texto]"
+
     def _parse_ranking_from_text(self, ranking_text: str) -> List[str]:
         """
         Extrai a lista de ranking da resposta de texto do modelo.
@@ -235,7 +270,7 @@ def _get_available_models(self, api_key: str, base_url: str) -> List[str]:
 
     async def consultar_conselho(
         self,
-        topico: str,
+        topico: Union[str, List[Dict[str, Any]]],
         __user__: Optional[dict] = None,
         __event_emitter__: Any = None,
     ) -> str:
@@ -244,7 +279,14 @@ async def consultar_conselho(
         1. Conselho fornece respostas individuais.
         2. Conselho classifica as respostas dos pares.
         3. Presidente sintetiza a resposta final.
+
+        topico: Entrada do usuario como texto (str) ou conteudo multimodal do OpenWebUI
+                (lista de partes com "type": "text" e/ou "type": "image_url"). Suporta imagens.
         """
+        # Normaliza topico para API (preserva imagens) e para prompts so texto
+        user_content = self._normalize_topic_to_content(topico)
+        topic_text = self._topic_to_text(topico)
+
         # Resolve chave de API e URL base (tenta OpenWebUI primeiro, depois fallback)
         api_key = self._resolve_api_key(__user__)
         base_url = self._resolve_base_url()
@@ -346,7 +388,7 @@ async def consultar_conselho(
             False,
         )
 
-        stage1_messages = [{"role": "user", "content": topico}]
+        stage1_messages = [{"role": "user", "content": user_content}]
         tasks = [
             self._query_model_async(model, stage1_messages, api_key, base_url)
             for model in council_models_list
@@ -391,7 +433,7 @@ async def consultar_conselho(
 
         ranking_prompt = f"""Voce esta avaliando diferentes respostas para a seguinte pergunta:
 
-Pergunta: {topico}
+Pergunta: {topic_text}
 
 Aqui estao as respostas de diferentes modelos (anonimizadas):
 
@@ -452,7 +494,7 @@ async def consultar_conselho(
 
         chairman_prompt = f"""Voce e o Presidente de um Conselho de LLMs.
 
-Pergunta Original: {topico}
+Pergunta Original: {topic_text}
 
 ETAPA 1 - Respostas Individuais:
 {stage1_summary}
diff --git a/llm_council_tool/test_council_mock.py b/llm_council_tool/test_council_mock.py
index 3a9ee25..23b2ad7 100644
--- a/llm_council_tool/test_council_mock.py
+++ b/llm_council_tool/test_council_mock.py
@@ -25,7 +25,7 @@
     ]
 }
 
-def mock_requests_get(url, headers, timeout):
+def mock_requests_get(url, *args, **kwargs):
     mock_resp = MagicMock()
     if "/models" in url:
         mock_resp.status_code = 200
@@ -33,16 +33,32 @@ def mock_requests_get(url, headers, timeout):
         return mock_resp
     return mock_resp
 
+def _content_to_str(content):
+    """Extract a single string from message content (str or multimodal list)."""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = []
+        for item in content:
+            if isinstance(item, dict) and item.get("type") == "text":
+                parts.append(item.get("text", ""))
+        return " ".join(parts)
+    return str(content)
+
+
 def mock_requests_post(url, headers, json, timeout):
     mock_resp = MagicMock()
     mock_resp.status_code = 200
-    
+
     model = json.get("model")
     messages = json.get("messages", [])
-    last_msg = messages[-1]["content"] if messages else ""
-    
+    raw_content = messages[-1]["content"] if messages else ""
+    last_msg = _content_to_str(raw_content)
+
     content = ""
-    
+
     # Simple heuristic to determine stage
     if "FINAL RANKING:" in last_msg: 
         content = MOCK_STAGE_2_RANKINGS.get(model, "FINAL RANKING:\n1. Response A")
@@ -75,19 +91,76 @@ async def async_emitter(x):
         
         print(f"\nResult: {result}")
         
-        assert result == MOCK_CHAIRMAN_RESPONSE
+        assert MOCK_CHAIRMAN_RESPONSE in result
+        assert "Stage 1" in result and "Stage 3" in result
         
         # Verification:
-        # 1. GET /models called once
-        mock_get.assert_called_once()
+        # 1. GET was used (base URL probe + /models, or just /models)
+        assert mock_get.called
         
         # 2. Check that 'invalid-model' was NOT queried in Stage 1
         # Extract all models called in POST requests
         called_models = [call.kwargs['json']['model'] for call in mock_post.mock_calls]
         assert "invalid-model" not in called_models
         assert "llama3:latest" in called_models
-        assert "gpt-4o" in called_models 
-        
+        assert "gpt-4o" in called_models
+
+
+@pytest.mark.asyncio
+async def test_consult_council_with_image_input():
+    """Council tool accepts multimodal input (text + image) without breaking."""
+    tools = Tools()
+    tools.valves.council_models = "llama3:latest,gpt-4o"
+    tools.valves.chairperson_model = "gpt-4o"
+    tools.valves.openwebui_api_key = "test-key"
+
+    mock_emitter = MagicMock()
+    async def async_emitter(x):
+        mock_emitter(x)
+
+    # Simulate OpenWebUI passing content with an image (list of parts)
+    multimodal_topic = [
+        {"type": "text", "text": "What is in this image?"},
+        {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc123"}},
+    ]
+
+    with patch("requests.post", side_effect=mock_requests_post) as mock_post, \
+         patch("requests.get", side_effect=mock_requests_get) as mock_get:
+        result = await tools.consult_council(multimodal_topic, __event_emitter__=async_emitter)
+        assert result
+        # Stage 1 requests must send the list content (with image) to the API
+        stage1_calls = [c for c in mock_post.mock_calls if c.kwargs.get("json") and "FINAL RANKING:" not in _content_to_str((c.kwargs["json"].get("messages") or [{}])[-1].get("content", "")) and "Chairperson" not in _content_to_str((c.kwargs["json"].get("messages") or [{}])[-1].get("content", ""))]
+        assert stage1_calls, "Expected at least one Stage 1 request"
+        first_msg_content = stage1_calls[0].kwargs["json"]["messages"][0]["content"]
+        assert first_msg_content == multimodal_topic
+
+
+@pytest.mark.asyncio
+async def test_consult_council_empty_list_topic():
+    """Empty list topic is normalized to empty string and does not send '[]' to API."""
+    tools = Tools()
+    tools.valves.council_models = "llama3:latest,gpt-4o"
+    tools.valves.chairperson_model = "gpt-4o"
+    tools.valves.openwebui_api_key = "test-key"
+    mock_emitter = MagicMock()
+    async def async_emitter(x):
+        mock_emitter(x)
+
+    with patch("requests.post", side_effect=mock_requests_post) as mock_post, \
+         patch("requests.get", side_effect=mock_requests_get) as mock_get:
+        result = await tools.consult_council([], __event_emitter__=async_emitter)
+        assert result
+        # Stage 1 must not send the literal "[]" as content
+        for call in mock_post.mock_calls:
+            if not call.kwargs.get("json"):
+                continue
+            messages = call.kwargs["json"].get("messages", [])
+            if not messages:
+                continue
+            content = messages[0].get("content", "")
+            assert content != "[]", "Empty list should normalize to '' not '[]'"
+
+
 if __name__ == "__main__":
     # Manually running the async test if executed as script
     loop = asyncio.new_event_loop()