From 77f0700c1b7f1b4e06f2d172afaf2c1e102a57c1 Mon Sep 17 00:00:00 2001
From: John Trujillo <johnandrestrujillo@gmail.com>
Date: Fri, 19 Jun 2026 14:26:33 -0400
Subject: [PATCH 1/3] fix(ADFA-4388): Prevent crash when loading embedding
 models for chat

Added multi-layer protection to detect and reject embedding models:

**Native Layer (C++):**
- Check pooling_type in new_context() - reject if not LLAMA_POOLING_TYPE_NONE
- Added get_pooling_type() JNI function for Kotlin validation
- Clear error messages explaining embedding vs generative models

**Kotlin Layer:**
- Validate model during load() in LLamaAndroid.kt
- Catch IllegalStateException and wrap with user-friendly message
- File format validation for ONNX, PyTorch, TensorFlow, etc.

**UI Layer:**
- Proper exception handling in AiSettingsViewModel
- Display error in ModelLoadingState.Error instead of crashing
- Keep bottom sheet expanded after file picker to show error/status

**Infrastructure:**
- Rebuilt llama.cpp AAR with updated native code (v8)
- Updated LLAMA_LIB_VERSION to 8 in DynamicLibraryLoader

The app now gracefully handles embedding models with clear error
messages instead of crashing with SIGABRT.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .../agent/fragments/AiSettingsFragment.kt     |  21 ++
 .../agent/repository/LlmInferenceEngine.kt    | 108 +++++++-
 .../agent/viewmodel/AiSettingsViewModel.kt    |  26 +-
 .../androidide/utils/DynamicLibraryLoader.kt  |   2 +-
 llama-impl/src/main/cpp/llama-android.cpp     | 251 ++++++++++++++++--
 .../java/android/llama/cpp/LLamaAndroid.kt    | 229 +++++++++++++++-
 6 files changed, 603 insertions(+), 34 deletions(-)

diff --git a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
index f73cbae499..79dd25d5eb 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
@@ -16,10 +16,12 @@ import androidx.core.net.toUri
 import androidx.fragment.app.Fragment
 import androidx.fragment.app.viewModels
 import androidx.navigation.fragment.findNavController
+import com.google.android.material.bottomsheet.BottomSheetBehavior
 import com.google.android.material.textfield.TextInputEditText
 import com.google.android.material.textfield.TextInputLayout
 import com.google.android.material.materialswitch.MaterialSwitch
 import com.itsaky.androidide.R
+import com.itsaky.androidide.activities.editor.BaseEditorActivity
 import com.itsaky.androidide.agent.repository.AiBackend
 import com.itsaky.androidide.agent.repository.Util.getCurrentBackend
 import com.itsaky.androidide.agent.viewmodel.AiSettingsViewModel
@@ -28,6 +30,7 @@ import com.itsaky.androidide.agent.viewmodel.ModelLoadingState
 import com.itsaky.androidide.databinding.FragmentAiSettingsBinding
 import com.itsaky.androidide.utils.flashInfo
 import com.itsaky.androidide.utils.getFileName
+import com.itsaky.androidide.viewmodel.BottomSheetViewModel
 import java.text.SimpleDateFormat
 import java.util.Date
 import java.util.Locale
@@ -50,6 +53,15 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) {
                 val uriString = it.toString()
                 viewModel.loadModelFromUri(uriString, requireContext())
                 flashInfo("Attempting to load selected model...")
+
+                // Keep the bottom sheet expanded after file picker returns
+                // Post with delay to ensure it happens after all lifecycle callbacks
+                view?.postDelayed({
+                    (activity as? BaseEditorActivity)?.bottomSheetViewModel?.setSheetState(
+                        sheetState = BottomSheetBehavior.STATE_EXPANDED,
+                        currentTab = BottomSheetViewModel.TAB_AGENT
+                    )
+                }, 100)
             }
         }
 
@@ -204,6 +216,15 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) {
             }
             if (hasPermission) {
                 viewModel.loadModelFromUri(savedUri, requireContext())
+
+                // Keep the bottom sheet expanded when loading from saved
+                // Post with delay to ensure it happens after all lifecycle callbacks
+                view?.postDelayed({
+                    (activity as? BaseEditorActivity)?.bottomSheetViewModel?.setSheetState(
+                        sheetState = BottomSheetBehavior.STATE_EXPANDED,
+                        currentTab = BottomSheetViewModel.TAB_AGENT
+                    )
+                }, 100)
             } else {
                 requireActivity().getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE).edit {
                     remove(SAVED_MODEL_URI_KEY)
diff --git a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
index 10e2bde6ea..5c3f58e219 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
@@ -291,9 +291,13 @@ class LlmInferenceEngine(
         modelUriString: String,
         expectedSha256: String?
     ): Boolean {
+        val modelUri = modelUriString.toUri()
+        val displayName = resolveModelDisplayName(context, modelUri)
+
         return try {
-            val modelUri = modelUriString.toUri()
-            val displayName = resolveModelDisplayName(context, modelUri)
+            // Validate file format before attempting to load
+            validateModelFormat(displayName)
+
             val destinationFile = File(context.cacheDir, "local_model.gguf")
 
             if (!copyModelToCache(context, modelUri, destinationFile)) {
@@ -313,6 +317,25 @@ class LlmInferenceEngine(
             currentModelFamily = detectModelFamily(displayName)
             log.info("Successfully loaded local model: {}", loadedModelName)
             true
+        } catch (e: IllegalStateException) {
+            // Check if this is an embedding model error
+            if (e.message?.contains("embedding model") == true) {
+                log.error("Cannot use embedding model for chat: {}", displayName, e)
+                throw IllegalArgumentException(
+                    "The selected model '$displayName' is an embedding model designed for semantic " +
+                    "search and similarity tasks. It cannot be used for chat or text generation.\n\n" +
+                    "Please select a chat/instruct model instead (e.g., models with 'chat', 'instruct', " +
+                    "'conversational' in their name).", e
+                )
+            } else {
+                log.error("Failed to load model", e)
+                throw e
+            }
+        } catch (e: IllegalArgumentException) {
+            // Re-throw validation errors (file format, etc.)
+            log.error("Model validation failed: {}", displayName, e)
+            resetLoadedModelState()
+            throw e
         } catch (e: Exception) {
             log.error("Failed to initialize or load model from file", e)
             resetLoadedModelState()
@@ -458,6 +481,87 @@ class LlmInferenceEngine(
         }
     }
 
+    /**
+     * Validates that the model file format is supported.
+     * This app uses llama.cpp which only supports GGUF format.
+     *
+     * @throws IllegalArgumentException if the model format is not supported
+     */
+    private fun validateModelFormat(filename: String) {
+        val lowerName = filename.lowercase()
+
+        // Check for unsupported formats
+        when {
+            lowerName.endsWith(".onnx") -> {
+                throw IllegalArgumentException(
+                    "ONNX models (.onnx) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "To use this model:\n" +
+                    "1. Convert it to GGUF format using llama.cpp conversion tools\n" +
+                    "2. Or download a pre-converted GGUF version from Hugging Face"
+                )
+            }
+            lowerName.endsWith(".pt") || lowerName.endsWith(".pth") || lowerName.endsWith(".bin") -> {
+                throw IllegalArgumentException(
+                    "PyTorch models (.pt, .pth, .bin) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "To use this model:\n" +
+                    "1. Convert it to GGUF format using convert_hf_to_gguf.py\n" +
+                    "2. Or download a pre-converted GGUF version from Hugging Face"
+                )
+            }
+            lowerName.endsWith(".safetensors") -> {
+                throw IllegalArgumentException(
+                    "SafeTensors models (.safetensors) are not directly supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "To use this model:\n" +
+                    "1. Convert it to GGUF format using convert_hf_to_gguf.py\n" +
+                    "2. Or download a pre-converted GGUF version from Hugging Face"
+                )
+            }
+            lowerName.endsWith(".pb") || lowerName.contains("tensorflow") -> {
+                throw IllegalArgumentException(
+                    "TensorFlow models (.pb) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "To use this model:\n" +
+                    "1. Convert it to GGUF format using appropriate conversion tools\n" +
+                    "2. Or download a pre-converted GGUF version from Hugging Face"
+                )
+            }
+            lowerName.endsWith(".tflite") -> {
+                throw IllegalArgumentException(
+                    "TensorFlow Lite models (.tflite) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "Please select a GGUF format model."
+                )
+            }
+            lowerName.endsWith(".ggml") -> {
+                throw IllegalArgumentException(
+                    "GGML models (.ggml) are deprecated.\n\n" +
+                    "This app uses the newer GGUF format (.gguf).\n\n" +
+                    "To use this model:\n" +
+                    "1. Convert it to GGUF using convert_llama_ggml_to_gguf.py\n" +
+                    "2. Or download a GGUF version from Hugging Face"
+                )
+            }
+            !lowerName.endsWith(".gguf") -> {
+                log.warn("Model file '{}' doesn't have .gguf extension. May fail to load.", filename)
+                // Don't throw - maybe it's a GGUF file with wrong extension
+            }
+        }
+
+        // Additional check for common embedding model patterns in filename
+        if (lowerName.contains("all-mini") ||
+            lowerName.contains("all-mpnet") ||
+            lowerName.contains("e5-") ||
+            (lowerName.contains("embed") && !lowerName.contains("llama"))) {
+            log.warn(
+                "Model '{}' appears to be an embedding model based on filename. " +
+                "This may not work for chat. Will validate during load.", filename
+            )
+        }
+    }
+
     private fun detectModelFamily(path: String): ModelFamily {
         val lowerPath = path.lowercase()
         return when {
diff --git a/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt b/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt
index db379a08c9..a6af5f7eee 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt
@@ -105,14 +105,24 @@ class AiSettingsViewModel(application: Application) : AndroidViewModel(applicati
 
         viewModelScope.launch {
             _modelLoadingState.value = ModelLoadingState.Loading
-            val expectedHash = getLocalModelSha256()
-            val success = llmInferenceEngine.initModelFromFile(context, path, expectedHash)
-            if (success && llmInferenceEngine.loadedModelName != null) {
-                _modelLoadingState.value = ModelLoadingState.Loaded(llmInferenceEngine.loadedModelName!!)
-                // Also save the path on successful load
-                saveLocalModelPath(path)
-            } else {
-                _modelLoadingState.value = ModelLoadingState.Error("Failed to load model file.")
+            try {
+                val expectedHash = getLocalModelSha256()
+                val success = llmInferenceEngine.initModelFromFile(context, path, expectedHash)
+                if (success && llmInferenceEngine.loadedModelName != null) {
+                    _modelLoadingState.value = ModelLoadingState.Loaded(llmInferenceEngine.loadedModelName!!)
+                    // Also save the path on successful load
+                    saveLocalModelPath(path)
+                } else {
+                    _modelLoadingState.value = ModelLoadingState.Error("Failed to load model file.")
+                }
+            } catch (e: IllegalArgumentException) {
+                // Handle validation errors (embedding models, unsupported formats, etc.)
+                _modelLoadingState.value = ModelLoadingState.Error(e.message ?: "Model validation failed.")
+                Log.e("ModelLoad", "Model validation error: ${e.message}", e)
+            } catch (e: Exception) {
+                // Handle any other unexpected errors
+                _modelLoadingState.value = ModelLoadingState.Error("Failed to load model: ${e.message}")
+                Log.e("ModelLoad", "Unexpected error loading model", e)
             }
         }
     }
diff --git a/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt b/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt
index e34abefcb7..ef720ed27e 100644
--- a/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt
+++ b/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt
@@ -10,7 +10,7 @@ import java.util.zip.ZipInputStream
 
 object DynamicLibraryLoader {
 
-    private const val LLAMA_LIB_VERSION = 5 // Increment this if you update the AAR
+    private const val LLAMA_LIB_VERSION = 8 // Increment this if you update the AAR
     private const val PREFS_NAME = "dynamic_libs"
     private const val PREFS_KEY = "llama_lib_version"
 
diff --git a/llama-impl/src/main/cpp/llama-android.cpp b/llama-impl/src/main/cpp/llama-android.cpp
index 5c970621b0..9d01e68d7f 100644
--- a/llama-impl/src/main/cpp/llama-android.cpp
+++ b/llama-impl/src/main/cpp/llama-android.cpp
@@ -238,20 +238,54 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) {
     return JNI_VERSION_1_6;
 }
 
+// Helper function to validate GGUF file format
+static bool is_valid_gguf_file(const char *path) {
+    FILE *file = fopen(path, "rb");
+    if (!file) {
+        LOGe("Cannot open file: %s", path);
+        return false;
+    }
+
+    // GGUF magic number: "GGUF" (0x46554747)
+    uint32_t magic = 0;
+    size_t read = fread(&magic, sizeof(uint32_t), 1, file);
+    fclose(file);
+
+    if (read != 1) {
+        LOGe("Failed to read magic number from file");
+        return false;
+    }
+
+    // Check for GGUF magic (little-endian: 0x46554747)
+    const uint32_t GGUF_MAGIC = 0x46554747;
+    return magic == GGUF_MAGIC;
+}
+
 extern "C"
 JNIEXPORT jlong JNICALL
 Java_android_llama_cpp_LLamaAndroid_load_1model(JNIEnv *env, jobject, jstring filename) {
-    llama_model_params model_params = llama_model_default_params();
-
     auto path_to_model = env->GetStringUTFChars(filename, 0);
     LOGi("Loading model from %s", path_to_model);
 
+    // Validate file format before attempting to load
+    if (!is_valid_gguf_file(path_to_model)) {
+        LOGe("Invalid GGUF file format: %s", path_to_model);
+        env->ReleaseStringUTFChars(filename, path_to_model);
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                      "Invalid model file format. This app only supports GGUF format models. "
+                      "Please ensure you have selected a valid .gguf model file.");
+        return 0;
+    }
+
+    llama_model_params model_params = llama_model_default_params();
     auto model = llama_model_load_from_file(path_to_model, model_params);
     env->ReleaseStringUTFChars(filename, path_to_model);
 
     if (!model) {
-        LOGe("load_model() failed");
-        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "load_model() failed");
+        LOGe("load_model() failed - model loading returned null");
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                      "Failed to load model. The file may be corrupted, incompatible, or require "
+                      "more memory than available. Please try a smaller model or restart the app.");
         return 0;
     }
 
@@ -286,6 +320,18 @@ Java_android_llama_cpp_LLamaAndroid_new_1context(JNIEnv *env, jobject, jlong jmo
     }
     LOGi("Using %d threads (batch=%d)", n_threads, n_threads_batch);
 
+    // Validate model parameters before creating context
+    int32_t model_n_ctx_train = llama_model_n_ctx_train(model);
+
+    LOGi("Model info: ctx_train=%d", model_n_ctx_train);
+
+    if (model_n_ctx_train <= 0) {
+        LOGe("Invalid model training context: %d", model_n_ctx_train);
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                      "Model has invalid training context. The model file may be corrupted.");
+        return 0;
+    }
+
     llama_context_params ctx_params = llama_context_default_params();
 
     const int configured_ctx = g_n_ctx.load();
@@ -293,15 +339,44 @@ Java_android_llama_cpp_LLamaAndroid_new_1context(JNIEnv *env, jobject, jlong jmo
     ctx_params.n_threads = n_threads;
     ctx_params.n_threads_batch = n_threads_batch;
 
+    // Clamp context size to model's training context
+    if (ctx_params.n_ctx > model_n_ctx_train) {
+        LOGi("Clamping requested context %d to model's training context %d",
+             ctx_params.n_ctx, model_n_ctx_train);
+        ctx_params.n_ctx = model_n_ctx_train;
+    }
+
+    LOGi("Creating context with n_ctx=%d, n_threads=%d, n_threads_batch=%d",
+         ctx_params.n_ctx, ctx_params.n_threads, ctx_params.n_threads_batch);
+
     llama_context *context = llama_init_from_model(model, ctx_params);
 
     if (!context) {
-        LOGe("llama_new_context_with_model() returned null)");
+        LOGe("llama_init_from_model() returned null");
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                      "Failed to create model context. This may indicate:\n"
+                      "1. Insufficient memory (try freeing memory or using a smaller model)\n"
+                      "2. Incompatible model architecture\n"
+                      "3. Corrupted model file\n"
+                      "Try restarting the app or selecting a different model.");
+        return 0;
+    }
+
+    // CRITICAL: Verify this is not an embedding model IMMEDIATELY after context creation
+    const auto pooling_type = llama_pooling_type(context);
+    LOGi("Context pooling_type: %d (0=none/generative, 1=mean/embed, 2=cls, 3=last, 4=rank)", pooling_type);
+
+    if (pooling_type != LLAMA_POOLING_TYPE_NONE) {
+        LOGe("REJECTED: Model is configured for embeddings (pooling_type=%d), cannot generate text", pooling_type);
+        llama_free(context);
         env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
-                      "llama_new_context_with_model() returned null)");
+                      "This model is an embedding model and cannot be used for text generation. "
+                      "Embedding models use 'encode' operations, not 'decode'. "
+                      "Please select a chat/instruct model (Llama, Qwen, Gemma, etc.) for conversation.");
         return 0;
     }
 
+    LOGi("Context created successfully - model is suitable for text generation");
     return reinterpret_cast<jlong>(context);
 }
 
@@ -585,6 +660,20 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init(
         jboolean format_chat,
         jint n_len, jobjectArray stop) {
 
+    const auto context = reinterpret_cast<llama_context *>(context_pointer);
+    const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
+
+    // Safety check: Verify this is not an embedding model
+    if (context) {
+        const auto pooling_type = llama_pooling_type(context);
+        if (pooling_type != LLAMA_POOLING_TYPE_NONE) {
+            LOGe("completion_init failed: Model has pooling_type=%d, cannot generate text", pooling_type);
+            env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                          "This model is configured for embeddings and cannot generate text. Please use a generative model for chat.");
+            return 0;
+        }
+    }
+
     {
         std::lock_guard<std::mutex> lock(g_globals_mutex);
         cached_token_chars.clear();
@@ -611,21 +700,57 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init(
         }
     }
 
+    if (!context) {
+        LOGe("completion_init: context is null");
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "Model context is null");
+        return 0;
+    }
+
+    if (!batch) {
+        LOGe("completion_init: batch is null");
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "Batch is null");
+        return 0;
+    }
+
     const auto text = env->GetStringUTFChars(jtext, 0);
-    const auto context = reinterpret_cast<llama_context *>(context_pointer);
-    const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
+    if (!text) {
+        LOGe("completion_init: failed to get text string");
+        env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), "Invalid text input");
+        return 0;
+    }
 
     bool parse_special = (format_chat == JNI_TRUE);
+    LOGi("Tokenizing input (parse_special=%d)...", parse_special);
+
     const auto tokens_list = common_tokenize(context, text, true, parse_special);
+    LOGi("Tokenized %zu tokens", tokens_list.size());
+
+    if (tokens_list.empty()) {
+        LOGe("Tokenization produced no tokens");
+        env->ReleaseStringUTFChars(jtext, text);
+        env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"),
+                      "Failed to tokenize input text. The text may be empty or invalid.");
+        return 0;
+    }
 
     int n_ctx = llama_n_ctx(context);
+    if (n_ctx <= 0) {
+        LOGe("Invalid context size: %d", n_ctx);
+        env->ReleaseStringUTFChars(jtext, text);
+        env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                      "Model context size is invalid. Model may be corrupted.");
+        return 0;
+    }
+
     size_t n_kv_req = tokens_list.size() + static_cast<size_t>(n_len);
-    LOGi("n_len = %d, n_ctx = %d, n_kv_req = %zu", n_len, n_ctx, n_kv_req);
+    LOGi("n_len = %d, n_ctx = %d, n_tokens = %zu, n_kv_req = %zu", n_len, n_ctx, tokens_list.size(), n_kv_req);
 
     if (n_kv_req > n_ctx) {
-        LOGe("error: n_kv_req > n_ctx, the required KV cache size is not big enough");
+        LOGe("error: n_kv_req (%zu) > n_ctx (%d), the required KV cache size is not big enough", n_kv_req, n_ctx);
+        env->ReleaseStringUTFChars(jtext, text);
         env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"),
-                      "Prompt is too long for the model's context size.");
+                      "Prompt is too long for the model's context size. "
+                      "Try a shorter message or reduce max output tokens.");
         return 0;
     }
 
@@ -684,11 +809,60 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init(
     }
 
     if (batch->n_tokens > 0) {
+        LOGi("Processing batch with %d tokens", batch->n_tokens);
+
+        // Validate batch before decode
+        if (batch->n_tokens < 0) {
+            LOGe("Invalid batch token count: %d", batch->n_tokens);
+            env->ReleaseStringUTFChars(jtext, text);
+            env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                          "Batch state corrupted. Token count is negative.");
+            return 0;
+        }
+
         // llama_decode will output logits only for the last token of the prompt
         batch->logits[batch->n_tokens - 1] = true;
-        if (llama_decode(context, *batch) != 0) {
-            LOGe("llama_decode() failed");
+
+        LOGi("Calling llama_decode for initial prompt processing...");
+
+        // Double-check pooling type before decode (belt and suspenders)
+        const auto pooling_check = llama_pooling_type(context);
+        if (pooling_check != LLAMA_POOLING_TYPE_NONE) {
+            LOGe("CRITICAL: Attempted decode on embedding model (pooling=%d)", pooling_check);
+            env->ReleaseStringUTFChars(jtext, text);
+            env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
+                          "Cannot decode with embedding model. This model only supports 'encode' operations.");
+            return 0;
         }
+
+        int decode_result = llama_decode(context, *batch);
+
+        if (decode_result != 0) {
+            LOGe("llama_decode() failed with error code: %d", decode_result);
+            env->ReleaseStringUTFChars(jtext, text);
+
+            const char* error_msg;
+            switch (decode_result) {
+                case -1:
+                    error_msg = "Model decode failed (error -1). This may indicate:\n"
+                                "1. Insufficient memory for model operations\n"
+                                "2. Incompatible model architecture (possibly an embedding model)\n"
+                                "3. Corrupted model file\n"
+                                "Try: Restart app, use smaller model, or select a chat/instruct model";
+                    break;
+                case -2:
+                    error_msg = "Model decode failed (error -2). Context or batch state is invalid.";
+                    break;
+                default:
+                    error_msg = "Model decode failed with unknown error. Model may be incompatible.";
+            }
+
+            env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), error_msg);
+            return 0;
+        }
+        LOGi("Initial decode completed successfully");
+    } else {
+        LOGi("Batch is empty, skipping decode");
     }
 
     env->ReleaseStringUTFChars(jtext, text);
@@ -816,8 +990,24 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
 
     env->CallVoidMethod(intvar_ncur, la_int_var_inc);
 
-    if (llama_decode(context, *batch) != 0) {
-        LOGe("llama_decode() returned null");
+    // Safety check before decode
+    if (!batch || batch->n_tokens <= 0) {
+        LOGe("Invalid batch state before decode: n_tokens=%d", batch ? batch->n_tokens : -1);
+        return nullptr;
+    }
+
+    // Verify not an embedding model before each decode
+    const auto pooling_check = llama_pooling_type(context);
+    if (pooling_check != LLAMA_POOLING_TYPE_NONE) {
+        LOGe("CRITICAL: Detected embedding model during generation (pooling=%d)", pooling_check);
+        log_info_to_kt("Cannot continue generation: model is for embeddings, not text generation.");
+        return nullptr;
+    }
+
+    int decode_result = llama_decode(context, *batch);
+    if (decode_result != 0) {
+        LOGe("llama_decode() failed during generation with error: %d", decode_result);
+        log_info_to_kt("Generation decode failed with error %d. Stopping generation.", decode_result);
         return nullptr;
     }
 
@@ -851,6 +1041,37 @@ Java_android_llama_cpp_LLamaAndroid_model_1n_1ctx(
     return llama_n_ctx(context);
 }
 
+extern "C"
+JNIEXPORT jint JNICALL
+Java_android_llama_cpp_LLamaAndroid_get_1pooling_1type(
+        JNIEnv *env,
+        jobject /* this */,
+        jlong context_ptr) {
+    auto *context = reinterpret_cast<llama_context *>(context_ptr);
+    if (!context) {
+        LOGe("get_pooling_type: context is null");
+        return -1; // LLAMA_POOLING_TYPE_UNSPECIFIED
+    }
+    return static_cast<jint>(llama_pooling_type(context));
+}
+
+extern "C"
+JNIEXPORT jstring JNICALL
+Java_android_llama_cpp_LLamaAndroid_get_1model_1desc(
+        JNIEnv *env,
+        jobject /* this */,
+        jlong model_ptr) {
+    auto *model = reinterpret_cast<llama_model *>(model_ptr);
+    if (!model) {
+        LOGe("get_model_desc: model is null");
+        return env->NewStringUTF("unknown");
+    }
+
+    char desc[256];
+    llama_model_desc(model, desc, sizeof(desc));
+    return new_jstring_utf8(env, desc);
+}
+
 extern "C" JNIEXPORT jintArray JNICALL
 Java_android_llama_cpp_LLamaAndroid_tokenize(
         JNIEnv *env,
diff --git a/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt b/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt
index 0ae80d2398..b8f3698820 100644
--- a/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt
+++ b/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt
@@ -17,6 +17,8 @@ class LLamaAndroid : ILlamaController {
     private val log = LoggerFactory.getLogger(LLamaAndroid::class.java)
 
     private external fun model_n_ctx(context: Long): Int
+    private external fun get_pooling_type(context: Long): Int
+    private external fun get_model_desc(model: Long): String
 
     private external fun tokenize(context: Long, text: String, add_bos: Boolean): IntArray
     suspend fun getContextSize(): Int {
@@ -147,8 +149,50 @@ class LLamaAndroid : ILlamaController {
                     val model = load_model(pathToModel)
                     if (model == 0L) throw IllegalStateException("load_model() failed")
 
+                    // Log model information for diagnostics
+                    val modelDesc = try {
+                        get_model_desc(model)
+                    } catch (e: Exception) {
+                        log.warn("Failed to get model description", e)
+                        "unknown"
+                    }
+                    log.info("Model description: {}", modelDesc)
+
                     val context = new_context(model)
-                    if (context == 0L) throw IllegalStateException("new_context() failed")
+                    if (context == 0L) {
+                        free_model(model)
+                        throw IllegalStateException("new_context() failed")
+                    }
+
+                    // Check if this is an embedding-only model (not suitable for text generation)
+                    val poolingType = try {
+                        get_pooling_type(context)
+                    } catch (e: UnsatisfiedLinkError) {
+                        // Function not available in pre-built AAR - will detect via decode error instead
+                        log.warn("get_pooling_type() not available (old AAR), will validate during inference")
+                        -1 // Unknown
+                    } catch (e: Exception) {
+                        log.warn("Failed to get pooling type", e)
+                        -1 // Unknown
+                    }
+
+                    if (poolingType >= 0) {
+                        log.info("Model pooling type: {} (0=generative, 1=mean, 2=cls, 3=last, 4=rank)", poolingType)
+
+                        // Pooling types: NONE=0, MEAN=1, CLS=2, LAST=3, RANK=4
+                        // Models with pooling (1-4) are embedding models, not suitable for chat
+                        if (poolingType != 0) {
+                            free_context(context)
+                            free_model(model)
+                            throw IllegalStateException(
+                                "This model is an embedding model (pooling_type=$poolingType) and cannot be used for text generation. " +
+                                "Please select a chat/instruct model instead. Embedding models are designed for " +
+                                "semantic search and similarity tasks, not conversational AI."
+                            )
+                        }
+                    } else {
+                        log.warn("Could not determine pooling type - will attempt inference and catch errors")
+                    }
 
                     val batch = new_batch(2048, 0, 1)
                     if (batch == 0L) throw IllegalStateException("new_batch() failed")
@@ -156,6 +200,52 @@ class LLamaAndroid : ILlamaController {
                     val sampler = new_sampler()
                     if (sampler == 0L) throw IllegalStateException("new_sampler() failed")
 
+                    // CRITICAL: Test the model with a tiny inference to validate it's not an embedding model
+                    // This prevents crashes during actual user interaction
+                    log.info("Validating model can perform text generation (dry run test)...")
+                    try {
+                        val testResult = completion_init(
+                            context,
+                            batch,
+                            "Hi",  // Minimal test prompt
+                            false,  // No chat formatting
+                            1,      // Only 1 token output
+                            emptyArray()  // No stop strings
+                        )
+
+                        if (testResult <= 0) {
+                            throw IllegalStateException("Validation failed: model returned $testResult tokens")
+                        }
+
+                        log.info("Model validation passed - {} tokens processed", testResult)
+
+                        // Clear the test from KV cache
+                        kv_cache_clear(context)
+
+                    } catch (e: Exception) {
+                        // Model failed validation - clean up and reject
+                        log.error("Model validation failed - this is likely an embedding model or incompatible architecture", e)
+
+                        free_sampler(sampler)
+                        free_batch(batch)
+                        free_context(context)
+                        free_model(model)
+
+                        throw IllegalStateException(
+                            "Model validation failed: Cannot perform text generation.\n\n" +
+                            "This typically indicates:\n" +
+                            "• An embedding model (e.g., all-MiniLM, e5, bge, mpnet)\n" +
+                            "• Incompatible model architecture\n" +
+                            "• Corrupted model file\n\n" +
+                            "Please use a chat/instruct model instead:\n" +
+                            "• Llama-3.2-1B-Instruct-Q4_K_M.gguf\n" +
+                            "• Qwen2.5-0.5B-Instruct-Q4_K_M.gguf\n" +
+                            "• gemma-2-2b-it-Q4_K_M.gguf\n\n" +
+                            "Error: ${e.message}",
+                            e
+                        )
+                    }
+
                     log.info("Loaded model {}", pathToModel)
                     threadLocalState.set(State.Loaded(model, context, batch, sampler))
                 }
@@ -180,14 +270,54 @@ class LLamaAndroid : ILlamaController {
     ): Flow<String> = flow {
         when (val state = threadLocalState.get()) {
             is State.Loaded -> {
+                log.debug("Starting inference - formatChat={}, clearCache={}, nlen={}", formatChat, clearCache, nlen)
+
+                // Defensive check: verify this is not an embedding model
+                try {
+                    val poolingType = get_pooling_type(state.context)
+                    log.debug("Model pooling_type: {}", poolingType)
+                    if (poolingType != 0) {
+                        log.error("Attempted to use embedding model (pooling_type={}) for text generation", poolingType)
+                        throw IllegalStateException(
+                            "Cannot perform text generation with an embedding model (pooling_type=$poolingType). " +
+                            "This model is designed for embeddings, not chat."
+                        )
+                    }
+                } catch (e: UnsatisfiedLinkError) {
+                    log.warn("Unable to check pooling type, proceeding with generation", e)
+                } catch (e: IllegalStateException) {
+                    log.error("Embedding model check failed", e)
+                    throw e
+                }
+
+                // Check context size vs message length
+                try {
+                    val contextSize = model_n_ctx(state.context)
+                    val tokenCount = tokenize(state.context, message, true).size
+                    log.debug("Context size: {}, message tokens: {}, max output: {}", contextSize, tokenCount, nlen)
+
+                    if (tokenCount + nlen > contextSize) {
+                        log.error("Message too long: {} tokens + {} max output > {} context", tokenCount, nlen, contextSize)
+                        throw IllegalStateException(
+                            "Message is too long for the model's context window. " +
+                            "Message requires $tokenCount tokens plus $nlen for output, but context is only $contextSize tokens."
+                        )
+                    }
+                } catch (e: Exception) {
+                    log.error("Failed to validate context size", e)
+                    throw IllegalStateException("Failed to validate message length: ${e.message}", e)
+                }
+
                 isStopped.set(false)
 
                 if (clearCache) {
+                    log.debug("Clearing KV cache")
                     kv_cache_clear(state.context)
                 }
 
-                val ncur = IntVar(
-                    completion_init(
+                log.debug("Calling completion_init")
+                val ncur = try {
+                    val result = completion_init(
                         state.context,
                         state.batch,
                         message,
@@ -195,7 +325,43 @@ class LLamaAndroid : ILlamaController {
                         nlen,
                         stop.toTypedArray()
                     )
-                )
+
+                    if (result <= 0) {
+                        log.error("completion_init returned invalid token count: {}", result)
+                        throw IllegalStateException(
+                            "Model failed to initialize text generation. " +
+                            "This may indicate an embedding model or incompatible model architecture. " +
+                            "Please ensure you're using a chat/instruct model, not an embedding model."
+                        )
+                    }
+
+                    log.debug("completion_init succeeded with {} tokens", result)
+                    IntVar(result)
+                } catch (e: IllegalStateException) {
+                    // Re-throw our own exceptions
+                    throw e
+                } catch (e: Exception) {
+                    log.error("completion_init failed", e)
+                    val errorMsg = e.message ?: ""
+
+                    // Check for embedding model indicators in error message
+                    if (errorMsg.contains("embed", ignoreCase = true) ||
+                        errorMsg.contains("encode", ignoreCase = true) ||
+                        errorMsg.contains("pooling", ignoreCase = true)) {
+                        throw IllegalStateException(
+                            "This appears to be an embedding model and cannot be used for text generation. " +
+                            "Please select a chat/instruct model (Llama, Qwen, Gemma, etc.) instead.",
+                            e
+                        )
+                    }
+
+                    throw IllegalStateException("Failed to initialize text generation: ${e.message}", e)
+                }
+
+                log.debug("Starting generation loop")
+                var loopCount = 0
+                var consecutiveNullOrEmpty = 0
+                var totalEmitted = 0
 
                 while (true) {
                     if (isStopped.get()) {
@@ -203,11 +369,58 @@ class LLamaAndroid : ILlamaController {
                         break
                     }
 
-                    val str = completion_loop(state.context, state.batch, state.sampler, nlen, ncur)
-                    if (str == null) {
-                        break
+                    try {
+                        val str = completion_loop(state.context, state.batch, state.sampler, nlen, ncur)
+
+                        if (str == null) {
+                            log.debug("Generation completed after {} iterations ({} tokens emitted)", loopCount, totalEmitted)
+                            break
+                        }
+
+                        if (str.isEmpty()) {
+                            consecutiveNullOrEmpty++
+                            if (consecutiveNullOrEmpty > 10 && totalEmitted == 0) {
+                                log.error("Model producing only empty strings - likely embedding model")
+                                throw IllegalStateException(
+                                    "Model is not generating text properly. This is typically caused by using " +
+                                    "an embedding model for text generation. Please use a chat/instruct model."
+                                )
+                            }
+                        } else {
+                            consecutiveNullOrEmpty = 0
+                            totalEmitted++
+                        }
+
+                        emit(str)
+                        loopCount++
+
+                        // Safety limit for infinite loops
+                        if (loopCount > 10000) {
+                            log.error("Generation loop exceeded 10000 iterations, stopping")
+                            break
+                        }
+
+                    } catch (e: IllegalStateException) {
+                        // Re-throw our own error messages
+                        throw e
+                    } catch (e: Exception) {
+                        log.error("Error during generation loop at iteration {} ({} tokens emitted)", loopCount, totalEmitted, e)
+
+                        val errorMsg = e.message ?: ""
+                        if (totalEmitted == 0 || errorMsg.contains("decode", ignoreCase = true)) {
+                            throw IllegalStateException(
+                                "Text generation failed before producing output. " +
+                                "This often indicates an embedding model or incompatible architecture. " +
+                                "Please use a chat/instruct model, not an embedding model.",
+                                e
+                            )
+                        }
+
+                        throw IllegalStateException(
+                            "Text generation failed: ${e.message}",
+                            e
+                        )
                     }
-                    emit(str)
                 }
             }
 

From 7ef3e809b4a3e945879f25c1b97d83cefea1331d Mon Sep 17 00:00:00 2001
From: John Trujillo <johnandrestrujillo@gmail.com>
Date: Fri, 19 Jun 2026 14:44:38 -0400
Subject: [PATCH 2/3] chore(ADFA-4388): Remove explanatory comments from Kotlin
 files

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .../agent/fragments/AiSettingsFragment.kt     |  2 +-
 .../agent/repository/LlmInferenceEngine.kt    |  6 ----
 .../agent/viewmodel/AiSettingsViewModel.kt    |  5 ---
 .../java/android/llama/cpp/LLamaAndroid.kt    | 34 ++++---------------
 4 files changed, 7 insertions(+), 40 deletions(-)

diff --git a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
index 79dd25d5eb..d3c61c4c33 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
@@ -123,7 +123,7 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) {
         val browseButton = view.findViewById<Button>(R.id.btn_browse_model)
         val loadSavedButton = view.findViewById<Button>(R.id.loadSavedButton)
         val modelStatusTextView = view.findViewById<TextView>(R.id.model_status_text_view)
-        val engineStatusTextView = view.findViewById<TextView>(R.id.engine_status_text) // <-- NEW: Get reference to the new TextView
+        val engineStatusTextView = view.findViewById<TextView>(R.id.engine_status_text)
         val simplePromptSwitch = view.findViewById<MaterialSwitch>(R.id.switch_simple_local_prompt)
         val shaInput = view.findViewById<TextInputEditText>(R.id.local_model_sha_input)
 
diff --git a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
index 5c3f58e219..88d265917d 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
@@ -295,7 +295,6 @@ class LlmInferenceEngine(
         val displayName = resolveModelDisplayName(context, modelUri)
 
         return try {
-            // Validate file format before attempting to load
             validateModelFormat(displayName)
 
             val destinationFile = File(context.cacheDir, "local_model.gguf")
@@ -318,7 +317,6 @@ class LlmInferenceEngine(
             log.info("Successfully loaded local model: {}", loadedModelName)
             true
         } catch (e: IllegalStateException) {
-            // Check if this is an embedding model error
             if (e.message?.contains("embedding model") == true) {
                 log.error("Cannot use embedding model for chat: {}", displayName, e)
                 throw IllegalArgumentException(
@@ -332,7 +330,6 @@ class LlmInferenceEngine(
                 throw e
             }
         } catch (e: IllegalArgumentException) {
-            // Re-throw validation errors (file format, etc.)
             log.error("Model validation failed: {}", displayName, e)
             resetLoadedModelState()
             throw e
@@ -490,7 +487,6 @@ class LlmInferenceEngine(
     private fun validateModelFormat(filename: String) {
         val lowerName = filename.lowercase()
 
-        // Check for unsupported formats
         when {
             lowerName.endsWith(".onnx") -> {
                 throw IllegalArgumentException(
@@ -546,11 +542,9 @@ class LlmInferenceEngine(
             }
             !lowerName.endsWith(".gguf") -> {
                 log.warn("Model file '{}' doesn't have .gguf extension. May fail to load.", filename)
-                // Don't throw - maybe it's a GGUF file with wrong extension
             }
         }
 
-        // Additional check for common embedding model patterns in filename
         if (lowerName.contains("all-mini") ||
             lowerName.contains("all-mpnet") ||
             lowerName.contains("e5-") ||
diff --git a/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt b/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt
index a6af5f7eee..e22373e704 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt
@@ -38,18 +38,15 @@ class AiSettingsViewModel(application: Application) : AndroidViewModel(applicati
     private val llmInferenceEngine: LlmInferenceEngine = LlmInferenceEngineProvider.instance
     private var pendingModelUri: String? = null
 
-    // --- State LiveData ---
     private val _savedModelPath = MutableLiveData<String?>(null)
     val savedModelPath: LiveData<String?> get() = _savedModelPath
 
     private val _modelLoadingState = MutableLiveData<ModelLoadingState>()
     val modelLoadingState: LiveData<ModelLoadingState> get() = _modelLoadingState
 
-    // NEW: LiveData to track if the engine library is ready
     private val _engineState = MutableLiveData<EngineState>(EngineState.Uninitialized)
     val engineState: LiveData<EngineState> get() = _engineState
 
-    // --- Initialization ---
     init {
         initializeLlmEngine()
         checkInitialSavedModel()
@@ -139,8 +136,6 @@ class AiSettingsViewModel(application: Application) : AndroidViewModel(applicati
         _savedModelPath.value = getLocalModelPath()
     }
 
-    // --- Preference and Key Management (No changes needed here) ---
-
     fun getAvailableBackends(): List<AiBackend> = AiBackend.entries
 
     fun saveBackend(backend: AiBackend) {
diff --git a/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt b/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt
index b8f3698820..02f329d84a 100644
--- a/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt
+++ b/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt
@@ -149,7 +149,6 @@ class LLamaAndroid : ILlamaController {
                     val model = load_model(pathToModel)
                     if (model == 0L) throw IllegalStateException("load_model() failed")
 
-                    // Log model information for diagnostics
                     val modelDesc = try {
                         get_model_desc(model)
                     } catch (e: Exception) {
@@ -164,23 +163,19 @@ class LLamaAndroid : ILlamaController {
                         throw IllegalStateException("new_context() failed")
                     }
 
-                    // Check if this is an embedding-only model (not suitable for text generation)
                     val poolingType = try {
                         get_pooling_type(context)
                     } catch (e: UnsatisfiedLinkError) {
-                        // Function not available in pre-built AAR - will detect via decode error instead
                         log.warn("get_pooling_type() not available (old AAR), will validate during inference")
-                        -1 // Unknown
+                        -1
                     } catch (e: Exception) {
                         log.warn("Failed to get pooling type", e)
-                        -1 // Unknown
+                        -1
                     }
 
                     if (poolingType >= 0) {
                         log.info("Model pooling type: {} (0=generative, 1=mean, 2=cls, 3=last, 4=rank)", poolingType)
 
-                        // Pooling types: NONE=0, MEAN=1, CLS=2, LAST=3, RANK=4
-                        // Models with pooling (1-4) are embedding models, not suitable for chat
                         if (poolingType != 0) {
                             free_context(context)
                             free_model(model)
@@ -200,17 +195,15 @@ class LLamaAndroid : ILlamaController {
                     val sampler = new_sampler()
                     if (sampler == 0L) throw IllegalStateException("new_sampler() failed")
 
-                    // CRITICAL: Test the model with a tiny inference to validate it's not an embedding model
-                    // This prevents crashes during actual user interaction
                     log.info("Validating model can perform text generation (dry run test)...")
                     try {
                         val testResult = completion_init(
                             context,
                             batch,
-                            "Hi",  // Minimal test prompt
-                            false,  // No chat formatting
-                            1,      // Only 1 token output
-                            emptyArray()  // No stop strings
+                            "Hi",
+                            false,
+                            1,
+                            emptyArray()
                         )
 
                         if (testResult <= 0) {
@@ -219,11 +212,9 @@ class LLamaAndroid : ILlamaController {
 
                         log.info("Model validation passed - {} tokens processed", testResult)
 
-                        // Clear the test from KV cache
                         kv_cache_clear(context)
 
                     } catch (e: Exception) {
-                        // Model failed validation - clean up and reject
                         log.error("Model validation failed - this is likely an embedding model or incompatible architecture", e)
 
                         free_sampler(sampler)
@@ -255,13 +246,6 @@ class LLamaAndroid : ILlamaController {
         }
     }
 
-
-    /*
-
-        formatChat: Boolean = false,
-        stop: List<String> = emptyList(),
-        clearCache: Boolean = false
-     */
     override fun send(
         message: String,
         formatChat: Boolean,
@@ -272,7 +256,6 @@ class LLamaAndroid : ILlamaController {
             is State.Loaded -> {
                 log.debug("Starting inference - formatChat={}, clearCache={}, nlen={}", formatChat, clearCache, nlen)
 
-                // Defensive check: verify this is not an embedding model
                 try {
                     val poolingType = get_pooling_type(state.context)
                     log.debug("Model pooling_type: {}", poolingType)
@@ -290,7 +273,6 @@ class LLamaAndroid : ILlamaController {
                     throw e
                 }
 
-                // Check context size vs message length
                 try {
                     val contextSize = model_n_ctx(state.context)
                     val tokenCount = tokenize(state.context, message, true).size
@@ -338,13 +320,11 @@ class LLamaAndroid : ILlamaController {
                     log.debug("completion_init succeeded with {} tokens", result)
                     IntVar(result)
                 } catch (e: IllegalStateException) {
-                    // Re-throw our own exceptions
                     throw e
                 } catch (e: Exception) {
                     log.error("completion_init failed", e)
                     val errorMsg = e.message ?: ""
 
-                    // Check for embedding model indicators in error message
                     if (errorMsg.contains("embed", ignoreCase = true) ||
                         errorMsg.contains("encode", ignoreCase = true) ||
                         errorMsg.contains("pooling", ignoreCase = true)) {
@@ -394,14 +374,12 @@ class LLamaAndroid : ILlamaController {
                         emit(str)
                         loopCount++
 
-                        // Safety limit for infinite loops
                         if (loopCount > 10000) {
                             log.error("Generation loop exceeded 10000 iterations, stopping")
                             break
                         }
 
                     } catch (e: IllegalStateException) {
-                        // Re-throw our own error messages
                         throw e
                     } catch (e: Exception) {
                         log.error("Error during generation loop at iteration {} ({} tokens emitted)", loopCount, totalEmitted, e)

From 8200a6b947080f046d4bd426815dc98a564481be Mon Sep 17 00:00:00 2001
From: John Trujillo <johnandrestrujillo@gmail.com>
Date: Fri, 19 Jun 2026 14:44:38 -0400
Subject: [PATCH 3/3] chore(ADFA-4388): Remove comments and extract magic
 strings to constants

- Removed inline comments explaining bottom sheet behavior
- Extracted file extension strings to named constants (EXT_*)
- Extracted keyword strings to named constants (KEYWORD_*)
- Improved code maintainability and reduced duplication

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .../agent/fragments/AiSettingsFragment.kt     |  4 -
 .../agent/repository/LlmInferenceEngine.kt    | 81 ++++++++++++-------
 2 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
index d3c61c4c33..2ed9ed6219 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt
@@ -54,8 +54,6 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) {
                 viewModel.loadModelFromUri(uriString, requireContext())
                 flashInfo("Attempting to load selected model...")
 
-                // Keep the bottom sheet expanded after file picker returns
-                // Post with delay to ensure it happens after all lifecycle callbacks
                 view?.postDelayed({
                     (activity as? BaseEditorActivity)?.bottomSheetViewModel?.setSheetState(
                         sheetState = BottomSheetBehavior.STATE_EXPANDED,
@@ -217,8 +215,6 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) {
             if (hasPermission) {
                 viewModel.loadModelFromUri(savedUri, requireContext())
 
-                // Keep the bottom sheet expanded when loading from saved
-                // Post with delay to ensure it happens after all lifecycle callbacks
                 view?.postDelayed({
                     (activity as? BaseEditorActivity)?.bottomSheetViewModel?.setSheetState(
                         sheetState = BottomSheetBehavior.STATE_EXPANDED,
diff --git a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
index 88d265917d..93f5dd1cb2 100644
--- a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
+++ b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt
@@ -63,6 +63,29 @@ class LlmInferenceEngine(
         private const val CONTEXT_SIZE_MID_MEM = 2048
         private const val CONTEXT_SIZE_HIGH_MEM = 3072
         private const val CONTEXT_SIZE_MAX = 4096
+
+        private const val EXT_ONNX = ".onnx"
+        private const val EXT_PT = ".pt"
+        private const val EXT_PTH = ".pth"
+        private const val EXT_BIN = ".bin"
+        private const val EXT_SAFETENSORS = ".safetensors"
+        private const val EXT_PB = ".pb"
+        private const val EXT_TFLITE = ".tflite"
+        private const val EXT_GGML = ".ggml"
+        private const val EXT_GGUF = ".gguf"
+
+        private const val KEYWORD_TENSORFLOW = "tensorflow"
+        private const val KEYWORD_ALL_MINI = "all-mini"
+        private const val KEYWORD_ALL_MPNET = "all-mpnet"
+        private const val KEYWORD_E5 = "e5-"
+        private const val KEYWORD_EMBED = "embed"
+        private const val KEYWORD_LLAMA = "llama"
+        private const val KEYWORD_H2O = "h2o"
+        private const val KEYWORD_DANUBE = "danube"
+        private const val KEYWORD_QWEN = "qwen"
+        private const val KEYWORD_GEMMA3 = "gemma3"
+        private const val KEYWORD_GEMMA_3 = "gemma-3"
+        private const val KEYWORD_GEMMA = "gemma"
     }
 
     /**
@@ -488,67 +511,67 @@ class LlmInferenceEngine(
         val lowerName = filename.lowercase()
 
         when {
-            lowerName.endsWith(".onnx") -> {
+            lowerName.endsWith(EXT_ONNX) -> {
                 throw IllegalArgumentException(
-                    "ONNX models (.onnx) are not supported.\n\n" +
-                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "ONNX models ($EXT_ONNX) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format ($EXT_GGUF).\n\n" +
                     "To use this model:\n" +
                     "1. Convert it to GGUF format using llama.cpp conversion tools\n" +
                     "2. Or download a pre-converted GGUF version from Hugging Face"
                 )
             }
-            lowerName.endsWith(".pt") || lowerName.endsWith(".pth") || lowerName.endsWith(".bin") -> {
+            lowerName.endsWith(EXT_PT) || lowerName.endsWith(EXT_PTH) || lowerName.endsWith(EXT_BIN) -> {
                 throw IllegalArgumentException(
-                    "PyTorch models (.pt, .pth, .bin) are not supported.\n\n" +
-                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "PyTorch models ($EXT_PT, $EXT_PTH, $EXT_BIN) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format ($EXT_GGUF).\n\n" +
                     "To use this model:\n" +
                     "1. Convert it to GGUF format using convert_hf_to_gguf.py\n" +
                     "2. Or download a pre-converted GGUF version from Hugging Face"
                 )
             }
-            lowerName.endsWith(".safetensors") -> {
+            lowerName.endsWith(EXT_SAFETENSORS) -> {
                 throw IllegalArgumentException(
-                    "SafeTensors models (.safetensors) are not directly supported.\n\n" +
-                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "SafeTensors models ($EXT_SAFETENSORS) are not directly supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format ($EXT_GGUF).\n\n" +
                     "To use this model:\n" +
                     "1. Convert it to GGUF format using convert_hf_to_gguf.py\n" +
                     "2. Or download a pre-converted GGUF version from Hugging Face"
                 )
             }
-            lowerName.endsWith(".pb") || lowerName.contains("tensorflow") -> {
+            lowerName.endsWith(EXT_PB) || lowerName.contains(KEYWORD_TENSORFLOW) -> {
                 throw IllegalArgumentException(
-                    "TensorFlow models (.pb) are not supported.\n\n" +
-                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "TensorFlow models ($EXT_PB) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format ($EXT_GGUF).\n\n" +
                     "To use this model:\n" +
                     "1. Convert it to GGUF format using appropriate conversion tools\n" +
                     "2. Or download a pre-converted GGUF version from Hugging Face"
                 )
             }
-            lowerName.endsWith(".tflite") -> {
+            lowerName.endsWith(EXT_TFLITE) -> {
                 throw IllegalArgumentException(
-                    "TensorFlow Lite models (.tflite) are not supported.\n\n" +
-                    "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" +
+                    "TensorFlow Lite models ($EXT_TFLITE) are not supported.\n\n" +
+                    "This app uses llama.cpp which only supports GGUF format ($EXT_GGUF).\n\n" +
                     "Please select a GGUF format model."
                 )
             }
-            lowerName.endsWith(".ggml") -> {
+            lowerName.endsWith(EXT_GGML) -> {
                 throw IllegalArgumentException(
-                    "GGML models (.ggml) are deprecated.\n\n" +
-                    "This app uses the newer GGUF format (.gguf).\n\n" +
+                    "GGML models ($EXT_GGML) are deprecated.\n\n" +
+                    "This app uses the newer GGUF format ($EXT_GGUF).\n\n" +
                     "To use this model:\n" +
                     "1. Convert it to GGUF using convert_llama_ggml_to_gguf.py\n" +
                     "2. Or download a GGUF version from Hugging Face"
                 )
             }
-            !lowerName.endsWith(".gguf") -> {
-                log.warn("Model file '{}' doesn't have .gguf extension. May fail to load.", filename)
+            !lowerName.endsWith(EXT_GGUF) -> {
+                log.warn("Model file '{}' doesn't have $EXT_GGUF extension. May fail to load.", filename)
             }
         }
 
-        if (lowerName.contains("all-mini") ||
-            lowerName.contains("all-mpnet") ||
-            lowerName.contains("e5-") ||
-            (lowerName.contains("embed") && !lowerName.contains("llama"))) {
+        if (lowerName.contains(KEYWORD_ALL_MINI) ||
+            lowerName.contains(KEYWORD_ALL_MPNET) ||
+            lowerName.contains(KEYWORD_E5) ||
+            (lowerName.contains(KEYWORD_EMBED) && !lowerName.contains(KEYWORD_LLAMA))) {
             log.warn(
                 "Model '{}' appears to be an embedding model based on filename. " +
                 "This may not work for chat. Will validate during load.", filename
@@ -559,11 +582,11 @@ class LlmInferenceEngine(
     private fun detectModelFamily(path: String): ModelFamily {
         val lowerPath = path.lowercase()
         return when {
-            lowerPath.contains("h2o") || lowerPath.contains("danube") -> ModelFamily.H2O
-            lowerPath.contains("qwen") -> ModelFamily.QWEN
-            lowerPath.contains("gemma-3") || lowerPath.contains("gemma3") -> ModelFamily.GEMMA3
-            lowerPath.contains("gemma") -> ModelFamily.GEMMA2
-            lowerPath.contains("llama") -> ModelFamily.LLAMA3
+            lowerPath.contains(KEYWORD_H2O) || lowerPath.contains(KEYWORD_DANUBE) -> ModelFamily.H2O
+            lowerPath.contains(KEYWORD_QWEN) -> ModelFamily.QWEN
+            lowerPath.contains(KEYWORD_GEMMA_3) || lowerPath.contains(KEYWORD_GEMMA3) -> ModelFamily.GEMMA3
+            lowerPath.contains(KEYWORD_GEMMA) -> ModelFamily.GEMMA2
+            lowerPath.contains(KEYWORD_LLAMA) -> ModelFamily.LLAMA3
             else -> ModelFamily.UNKNOWN
         }
     }