GetStream · Nash0x7E2 · Apr 3, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/examples/12_resale_advisor_example/pyproject.toml b/examples/12_resale_advisor_example/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "resale-advisor-example"
+version = "0.0.0"
+requires-python = ">=3.10"
+
+dependencies = [
+  "python-dotenv>=1.0",
+  "vision-agents-plugins-huggingface[mlx-vlm,transformers]",
+  "vision-agents-plugins-getstream",
+  "vision-agents-plugins-deepgram",
+  "vision-agents",
+  "mlx-vlm",
+  "torchvision",
+]
+
+[tool.uv.sources]
+"vision-agents-plugins-huggingface" = {path = "../../plugins/huggingface", editable=true}
+"vision-agents-plugins-getstream" = {path = "../../plugins/getstream", editable=true}
+"vision-agents-plugins-deepgram" = {path = "../../plugins/deepgram", editable=true}
+"vision-agents" = {path = "../../agents-core", editable=true}
diff --git a/examples/12_resale_advisor_example/resale_advisor_example.py b/examples/12_resale_advisor_example/resale_advisor_example.py
@@ -0,0 +1,75 @@
+"""
+Vision AI with Gemma 4 - Local VLM Agent (MLX)
+
+A real-time vision + voice assistant powered by Gemma 4 E4B running on Apple
+Silicon via MLX.  Demonstrates how to build a multimodal AI agent that can see
+the user's video feed and respond with voice:
+
+- Gemma 4 E4B (8-bit quantized) via mlx-vlm for vision-language inference
+- Deepgram for speech-to-text and text-to-speech
+- GetStream for real-time communication
+
+The user speaks naturally and the agent responds with voice, describing what
+it sees and answering questions about the video feed.
+
+Requirements:
+- STREAM_API_KEY and STREAM_API_SECRET environment variables
+- DEEPGRAM_API_KEY environment variable
+- Apple Silicon Mac with 16GB+ unified memory
+
+First run will download the MLX model (~8GB).
+"""
+
+import asyncio
+import logging
+
+from dotenv import load_dotenv
+from vision_agents.core import Agent, Runner, User
+from vision_agents.core.agents import AgentLauncher
+from vision_agents.plugins import deepgram, getstream, huggingface
+
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+SYSTEM_PROMPT = (
+    "You are a vision assistant running on a local Gemma 4 model. "
+    "You can see the user's camera feed. Describe what you see concisely. "
+    "Speak naturally, as if having a conversation. No lists or formatting. "
+    "Never use emojis or special characters. Keep responses under 50 words."
+)
+
+
+async def create_agent(**kwargs) -> Agent:
+    """Create a vision AI agent with Gemma 4 VLM."""
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="Vision Assistant", id="agent"),
+        instructions=SYSTEM_PROMPT,
+        llm=huggingface.MlxVLM(
+            model="mlx-community/gemma-4-e4b-it-8bit",
+            max_new_tokens=150,
+        ),
+        tts=deepgram.TTS(),
+        stt=deepgram.STT(),
+    )
+
+    return agent
+
+
+async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
+    """Join the call and run the agent."""
+    call = await agent.create_call(call_type, call_id)
+
+    logger.info("Starting Vision Assistant...")
+
+    async with agent.join(call):
+        await asyncio.sleep(2)
+        await agent.llm.simple_response(
+            text="Greet the user briefly. Tell them you can see their camera and can describe what you see.",
+        )
+        await agent.finish()
+
+
+if __name__ == "__main__":
+    Runner(AgentLauncher(create_agent=create_agent, join_call=join_call)).cli()
diff --git a/plugins/huggingface/pyproject.toml b/plugins/huggingface/pyproject.toml
@@ -12,12 +12,12 @@ requires-python = ">=3.10"
 license = "MIT"
 dependencies = [
     "vision-agents",
-    "huggingface_hub<1.0",
+    "huggingface_hub>=0.20.0,<2",
 ]
 
 [project.optional-dependencies]
 transformers = [
-    "transformers>=4.45.0,<5",
+    "transformers>=5.3.0,<6",
     "torch>=2.0.0,<3",
     "accelerate>=0.25.0,<2",
     "supervision>=0.21.0,<1",
@@ -27,6 +27,16 @@ transformers-quantized = [
     "vision-agents-plugins-huggingface[transformers]",
     "bitsandbytes>=0.41.0",
 ]
+mlx = [
+    "mlx>=0.22.0",
+    "mlx-lm>=0.22.0",
+]
+mlx-vlm = [
+    "mlx>=0.22.0",
+    "mlx-vlm>=0.4.0",
+    "av",
+    "aiortc",
+]
 
 [project.urls]
 Documentation = "https://visionagents.ai/"

diff --git a/plugins/huggingface/tests/test_transformers_vlm.py b/plugins/huggingface/tests/test_transformers_vlm.py
@@ -155,7 +155,7 @@ async def test_processor_fallback(self, vlm):
         }
 
         messages = [{"role": "user", "content": "describe this"}]
-        result = vlm._build_processor_inputs(messages, [])
+        result = vlm._build_processor_inputs(processor, messages, [], None)
         assert "input_ids" in result
 
         call_kwargs = processor.call_args.kwargs
@@ -174,7 +174,7 @@ async def test_build_processor_inputs_passes_tools(self, vlm):
             }
         ]
         messages = [{"role": "user", "content": "hi"}]
-        vlm._build_processor_inputs(messages, [], tools)
+        vlm._build_processor_inputs(vlm._resources.processor, messages, [], tools)
 
         call_kwargs = vlm._resources.processor.apply_chat_template.call_args.kwargs
         assert call_kwargs["tools"] is tools
@@ -205,7 +205,7 @@ def _side_effect(*args, **kwargs):
             }
         ]
         result = vlm._build_processor_inputs(
-            [{"role": "user", "content": "hi"}], [], tools
+            vlm._resources.processor, [{"role": "user", "content": "hi"}], [], tools
         )
         assert "input_ids" in result
         assert call_count == 2

diff --git a/plugins/huggingface/vision_agents/plugins/huggingface/__init__.py b/plugins/huggingface/vision_agents/plugins/huggingface/__init__.py
@@ -22,3 +22,35 @@
         )
     else:
         raise
+
+try:
+    from .mlx_llm import MlxLLM
+
+    __all__ += ["MlxLLM"]
+except ImportError as e:
+    import warnings
+
+    if e.name in {"mlx", "mlx_lm"}:
+        warnings.warn(
+            f"Optional dependency '{e.name}' is not installed. "
+            "Install the [mlx] extra to enable MLX plugins.",
+            stacklevel=2,
+        )
+    else:
+        raise
+
+try:
+    from .mlx_vlm import MlxVLM
+
+    __all__ += ["MlxVLM"]
+except ImportError as e:
+    import warnings
+
+    if e.name in {"mlx", "mlx_vlm", "av", "aiortc"}:
+        warnings.warn(
+            f"Optional dependency '{e.name}' is not installed. "
+            "Install the [mlx-vlm] extra to enable MLX VLM plugins.",
+            stacklevel=2,
+        )
+    else:
+        raise