From 5834126dd026740c729cececf2454147a4803707 Mon Sep 17 00:00:00 2001
From: lokesh12344 <ls172880@gmail.com>
Date: Tue, 31 Mar 2026 01:17:54 +0530
Subject: [PATCH 1/2] fix: resolve NameError and import pathing in main.py

---
 src/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/main.py b/src/main.py
index 5bb632b..1c12af7 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,8 +1,8 @@
 import os
-# from backend import Fill  
-from commonforms import prepare_form 
+from typing import Union
+from commonforms import prepare_form
 from pypdf import PdfReader
-from controller import Controller
+from src.controller import Controller
 
 def input_fields(num_fields: int):
     fields = []

From 1e8a9903db445f86517c9dce04652c55ec411d62 Mon Sep 17 00:00:00 2001
From: lokesh12344 <ls172880@gmail.com>
Date: Tue, 31 Mar 2026 01:47:09 +0530
Subject: [PATCH 2/2] feat: implement batch LLM extraction with JSON mode

---
 api/main.py | 26 +++++++++++++-
 src/llm.py  | 98 ++++++++++++++++++++++++++++++++---------------------
 2 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/api/main.py b/api/main.py
index d0b8c79..35fba4b 100644
--- a/api/main.py
+++ b/api/main.py
@@ -1,7 +1,31 @@
 from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
 from api.routes import templates, forms
 
-app = FastAPI()
+app = FastAPI(title="FireForm API")
+
+@app.get("/", response_class=HTMLResponse)
+def root():
+    return """
+    <html>
+        <head>
+            <title>FireForm API</title>
+            <style>
+                body { font-family: sans-serif; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; background-color: #f0f2f5; }
+                .container { background: white; padding: 2rem; border-radius: 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); text-align: center; }
+                h1 { color: #ff4b2b; }
+                a { color: #007bff; text-decoration: none; font-weight: bold; }
+            </style>
+        </head>
+        <body>
+            <div class="container">
+                <h1>🔥 FireForm API</h1>
+                <p>Digital Public Good for First Responders</p>
+                <p>Visit the <a href="/docs">Interactive API Documentation</a> to test endpoints.</p>
+            </div>
+        </body>
+    </html>
+    """
 
 app.include_router(templates.router)
 app.include_router(forms.router)
\ No newline at end of file
diff --git a/src/llm.py b/src/llm.py
index 70937f9..5a3dbde 100644
--- a/src/llm.py
+++ b/src/llm.py
@@ -23,58 +23,78 @@ def type_check_all(self):
                 Target fields must be a list. Input:\n\ttarget_fields: {self._target_fields}"
             )
 
-    def build_prompt(self, current_field):
+    def build_batch_prompt(self, field_list):
         """
-        This method is in charge of the prompt engineering. It creates a specific prompt for each target field.
-        @params: current_field -> represents the current element of the json that is being prompted.
+        Creates a prompt for batch extraction of all fields in JSON format.
         """
-        prompt = f""" 
+        prompt = f"""
             SYSTEM PROMPT:
-            You are an AI assistant designed to help fillout json files with information extracted from transcribed voice recordings. 
-            You will receive the transcription, and the name of the JSON field whose value you have to identify in the context. Return 
-            only a single string containing the identified value for the JSON field. 
-            If the field name is plural, and you identify more than one possible value in the text, return both separated by a ";".
-            If you don't identify the value in the provided text, return "-1".
+            You are an AI assistant designed to extract information from transcribed voice recordings and format it as JSON.
+            You will receive the transcription and a list of JSON fields to identify.
+            Your output MUST be a valid JSON object where the keys are the field names and the values are the identified data.
+            If a value is not identified, use "-1".
+            If a field name is plural and you identify more than one value, use a ";" separated string.
+
+            Example format:
+            {{
+                "Field1": "value",
+                "Field2": "value1; value2",
+                "Field3": "-1"
+            }}
+
             ---
             DATA:
-            Target JSON field to find in text: {current_field}
-            
+            Target JSON fields: {list(field_list)}
+
             TEXT: {self._transcript_text}
             """
-
         return prompt
 
     def main_loop(self):
         # self.type_check_all()
-        for field in self._target_fields.keys():
-            prompt = self.build_prompt(field)
-            # print(prompt)
-            # ollama_url = "http://localhost:11434/api/generate"
-            ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
-            ollama_url = f"{ollama_host}/api/generate"
-
-            payload = {
-                "model": "mistral",
-                "prompt": prompt,
-                "stream": False,  # don't really know why --> look into this later.
-            }
+        ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
+        ollama_url = f"{ollama_host}/api/generate"
+        model_name = os.getenv("OLLAMA_MODEL", "mistral")
+
+        prompt = self.build_batch_prompt(self._target_fields.keys())
+
+        payload = {
+            "model": model_name,
+            "prompt": prompt,
+            "stream": False,
+            "format": "json"
+        }
+
+        print(f"\t[LOG] Sending batch request to Ollama ({model_name})...")
+        try:
+            response = requests.post(ollama_url, json=payload, timeout=300)
+            response.raise_for_status()
+            json_data = response.json()
+            raw_response = json_data["response"]
 
+            # Parse the extracted JSON
             try:
-                response = requests.post(ollama_url, json=payload)
-                response.raise_for_status()
-            except requests.exceptions.ConnectionError:
-                raise ConnectionError(
-                    f"Could not connect to Ollama at {ollama_url}. "
-                    "Please ensure Ollama is running and accessible."
-                )
-            except requests.exceptions.HTTPError as e:
-                raise RuntimeError(f"Ollama returned an error: {e}")
-
-            # parse response
-            json_data = response.json()
-            parsed_response = json_data["response"]
-            # print(parsed_response)
-            self.add_response_to_json(field, parsed_response)
+                extracted_data = json.loads(raw_response)
+            except json.JSONDecodeError:
+                # Fallback: find the first { and last }
+                start = raw_response.find('{')
+                end = raw_response.rfind('}')
+                if start != -1 and end != -1:
+                    extracted_data = json.loads(raw_response[start:end+1])
+                else:
+                    raise ValueError("Could not parse JSON from LLM response.")
+
+            # Process each field
+            for field, value in extracted_data.items():
+                self.add_response_to_json(field, str(value))
+
+        except requests.exceptions.ConnectionError:
+            raise ConnectionError(
+                f"Could not connect to Ollama at {ollama_url}. "
+                "Please ensure Ollama is running and accessible."
+            )
+        except Exception as e:
+            raise RuntimeError(f"Ollama/Extraction error: {e}")
 
         print("----------------------------------")
         print("\t[LOG] Resulting JSON created from the input text:")