From 77f0700c1b7f1b4e06f2d172afaf2c1e102a57c1 Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Fri, 19 Jun 2026 14:26:33 -0400 Subject: [PATCH 1/3] fix(ADFA-4388): Prevent crash when loading embedding models for chat Added multi-layer protection to detect and reject embedding models: **Native Layer (C++):** - Check pooling_type in new_context() - reject if not LLAMA_POOLING_TYPE_NONE - Added get_pooling_type() JNI function for Kotlin validation - Clear error messages explaining embedding vs generative models **Kotlin Layer:** - Validate model during load() in LLamaAndroid.kt - Catch IllegalStateException and wrap with user-friendly message - File format validation for ONNX, PyTorch, TensorFlow, etc. **UI Layer:** - Proper exception handling in AiSettingsViewModel - Display error in ModelLoadingState.Error instead of crashing - Keep bottom sheet expanded after file picker to show error/status **Infrastructure:** - Rebuilt llama.cpp AAR with updated native code (v8) - Updated LLAMA_LIB_VERSION to 8 in DynamicLibraryLoader The app now gracefully handles embedding models with clear error messages instead of crashing with SIGABRT. Co-Authored-By: Claude Sonnet 4.5 --- .../agent/fragments/AiSettingsFragment.kt | 21 ++ .../agent/repository/LlmInferenceEngine.kt | 108 +++++++- .../agent/viewmodel/AiSettingsViewModel.kt | 26 +- .../androidide/utils/DynamicLibraryLoader.kt | 2 +- llama-impl/src/main/cpp/llama-android.cpp | 251 ++++++++++++++++-- .../java/android/llama/cpp/LLamaAndroid.kt | 229 +++++++++++++++- 6 files changed, 603 insertions(+), 34 deletions(-) diff --git a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt index f73cbae499..79dd25d5eb 100644 --- a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt +++ b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt @@ -16,10 +16,12 @@ import androidx.core.net.toUri import androidx.fragment.app.Fragment import androidx.fragment.app.viewModels import androidx.navigation.fragment.findNavController +import com.google.android.material.bottomsheet.BottomSheetBehavior import com.google.android.material.textfield.TextInputEditText import com.google.android.material.textfield.TextInputLayout import com.google.android.material.materialswitch.MaterialSwitch import com.itsaky.androidide.R +import com.itsaky.androidide.activities.editor.BaseEditorActivity import com.itsaky.androidide.agent.repository.AiBackend import com.itsaky.androidide.agent.repository.Util.getCurrentBackend import com.itsaky.androidide.agent.viewmodel.AiSettingsViewModel @@ -28,6 +30,7 @@ import com.itsaky.androidide.agent.viewmodel.ModelLoadingState import com.itsaky.androidide.databinding.FragmentAiSettingsBinding import com.itsaky.androidide.utils.flashInfo import com.itsaky.androidide.utils.getFileName +import com.itsaky.androidide.viewmodel.BottomSheetViewModel import java.text.SimpleDateFormat import java.util.Date import java.util.Locale @@ -50,6 +53,15 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) { val uriString = it.toString() viewModel.loadModelFromUri(uriString, requireContext()) flashInfo("Attempting to load selected model...") + + // Keep the bottom sheet expanded after file picker returns + // Post with delay to ensure it happens after all lifecycle callbacks + view?.postDelayed({ + (activity as? BaseEditorActivity)?.bottomSheetViewModel?.setSheetState( + sheetState = BottomSheetBehavior.STATE_EXPANDED, + currentTab = BottomSheetViewModel.TAB_AGENT + ) + }, 100) } } @@ -204,6 +216,15 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) { } if (hasPermission) { viewModel.loadModelFromUri(savedUri, requireContext()) + + // Keep the bottom sheet expanded when loading from saved + // Post with delay to ensure it happens after all lifecycle callbacks + view?.postDelayed({ + (activity as? BaseEditorActivity)?.bottomSheetViewModel?.setSheetState( + sheetState = BottomSheetBehavior.STATE_EXPANDED, + currentTab = BottomSheetViewModel.TAB_AGENT + ) + }, 100) } else { requireActivity().getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE).edit { remove(SAVED_MODEL_URI_KEY) diff --git a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt index 10e2bde6ea..5c3f58e219 100644 --- a/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt +++ b/app/src/main/java/com/itsaky/androidide/agent/repository/LlmInferenceEngine.kt @@ -291,9 +291,13 @@ class LlmInferenceEngine( modelUriString: String, expectedSha256: String? ): Boolean { + val modelUri = modelUriString.toUri() + val displayName = resolveModelDisplayName(context, modelUri) + return try { - val modelUri = modelUriString.toUri() - val displayName = resolveModelDisplayName(context, modelUri) + // Validate file format before attempting to load + validateModelFormat(displayName) + val destinationFile = File(context.cacheDir, "local_model.gguf") if (!copyModelToCache(context, modelUri, destinationFile)) { @@ -313,6 +317,25 @@ class LlmInferenceEngine( currentModelFamily = detectModelFamily(displayName) log.info("Successfully loaded local model: {}", loadedModelName) true + } catch (e: IllegalStateException) { + // Check if this is an embedding model error + if (e.message?.contains("embedding model") == true) { + log.error("Cannot use embedding model for chat: {}", displayName, e) + throw IllegalArgumentException( + "The selected model '$displayName' is an embedding model designed for semantic " + + "search and similarity tasks. It cannot be used for chat or text generation.\n\n" + + "Please select a chat/instruct model instead (e.g., models with 'chat', 'instruct', " + + "'conversational' in their name).", e + ) + } else { + log.error("Failed to load model", e) + throw e + } + } catch (e: IllegalArgumentException) { + // Re-throw validation errors (file format, etc.) + log.error("Model validation failed: {}", displayName, e) + resetLoadedModelState() + throw e } catch (e: Exception) { log.error("Failed to initialize or load model from file", e) resetLoadedModelState() @@ -458,6 +481,87 @@ class LlmInferenceEngine( } } + /** + * Validates that the model file format is supported. + * This app uses llama.cpp which only supports GGUF format. + * + * @throws IllegalArgumentException if the model format is not supported + */ + private fun validateModelFormat(filename: String) { + val lowerName = filename.lowercase() + + // Check for unsupported formats + when { + lowerName.endsWith(".onnx") -> { + throw IllegalArgumentException( + "ONNX models (.onnx) are not supported.\n\n" + + "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" + + "To use this model:\n" + + "1. Convert it to GGUF format using llama.cpp conversion tools\n" + + "2. Or download a pre-converted GGUF version from Hugging Face" + ) + } + lowerName.endsWith(".pt") || lowerName.endsWith(".pth") || lowerName.endsWith(".bin") -> { + throw IllegalArgumentException( + "PyTorch models (.pt, .pth, .bin) are not supported.\n\n" + + "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" + + "To use this model:\n" + + "1. Convert it to GGUF format using convert_hf_to_gguf.py\n" + + "2. Or download a pre-converted GGUF version from Hugging Face" + ) + } + lowerName.endsWith(".safetensors") -> { + throw IllegalArgumentException( + "SafeTensors models (.safetensors) are not directly supported.\n\n" + + "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" + + "To use this model:\n" + + "1. Convert it to GGUF format using convert_hf_to_gguf.py\n" + + "2. Or download a pre-converted GGUF version from Hugging Face" + ) + } + lowerName.endsWith(".pb") || lowerName.contains("tensorflow") -> { + throw IllegalArgumentException( + "TensorFlow models (.pb) are not supported.\n\n" + + "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" + + "To use this model:\n" + + "1. Convert it to GGUF format using appropriate conversion tools\n" + + "2. Or download a pre-converted GGUF version from Hugging Face" + ) + } + lowerName.endsWith(".tflite") -> { + throw IllegalArgumentException( + "TensorFlow Lite models (.tflite) are not supported.\n\n" + + "This app uses llama.cpp which only supports GGUF format (.gguf).\n\n" + + "Please select a GGUF format model." + ) + } + lowerName.endsWith(".ggml") -> { + throw IllegalArgumentException( + "GGML models (.ggml) are deprecated.\n\n" + + "This app uses the newer GGUF format (.gguf).\n\n" + + "To use this model:\n" + + "1. Convert it to GGUF using convert_llama_ggml_to_gguf.py\n" + + "2. Or download a GGUF version from Hugging Face" + ) + } + !lowerName.endsWith(".gguf") -> { + log.warn("Model file '{}' doesn't have .gguf extension. May fail to load.", filename) + // Don't throw - maybe it's a GGUF file with wrong extension + } + } + + // Additional check for common embedding model patterns in filename + if (lowerName.contains("all-mini") || + lowerName.contains("all-mpnet") || + lowerName.contains("e5-") || + (lowerName.contains("embed") && !lowerName.contains("llama"))) { + log.warn( + "Model '{}' appears to be an embedding model based on filename. " + + "This may not work for chat. Will validate during load.", filename + ) + } + } + private fun detectModelFamily(path: String): ModelFamily { val lowerPath = path.lowercase() return when { diff --git a/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt b/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt index db379a08c9..a6af5f7eee 100644 --- a/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt +++ b/app/src/main/java/com/itsaky/androidide/agent/viewmodel/AiSettingsViewModel.kt @@ -105,14 +105,24 @@ class AiSettingsViewModel(application: Application) : AndroidViewModel(applicati viewModelScope.launch { _modelLoadingState.value = ModelLoadingState.Loading - val expectedHash = getLocalModelSha256() - val success = llmInferenceEngine.initModelFromFile(context, path, expectedHash) - if (success && llmInferenceEngine.loadedModelName != null) { - _modelLoadingState.value = ModelLoadingState.Loaded(llmInferenceEngine.loadedModelName!!) - // Also save the path on successful load - saveLocalModelPath(path) - } else { - _modelLoadingState.value = ModelLoadingState.Error("Failed to load model file.") + try { + val expectedHash = getLocalModelSha256() + val success = llmInferenceEngine.initModelFromFile(context, path, expectedHash) + if (success && llmInferenceEngine.loadedModelName != null) { + _modelLoadingState.value = ModelLoadingState.Loaded(llmInferenceEngine.loadedModelName!!) + // Also save the path on successful load + saveLocalModelPath(path) + } else { + _modelLoadingState.value = ModelLoadingState.Error("Failed to load model file.") + } + } catch (e: IllegalArgumentException) { + // Handle validation errors (embedding models, unsupported formats, etc.) + _modelLoadingState.value = ModelLoadingState.Error(e.message ?: "Model validation failed.") + Log.e("ModelLoad", "Model validation error: ${e.message}", e) + } catch (e: Exception) { + // Handle any other unexpected errors + _modelLoadingState.value = ModelLoadingState.Error("Failed to load model: ${e.message}") + Log.e("ModelLoad", "Unexpected error loading model", e) } } } diff --git a/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt b/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt index e34abefcb7..ef720ed27e 100644 --- a/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt +++ b/app/src/main/java/com/itsaky/androidide/utils/DynamicLibraryLoader.kt @@ -10,7 +10,7 @@ import java.util.zip.ZipInputStream object DynamicLibraryLoader { - private const val LLAMA_LIB_VERSION = 5 // Increment this if you update the AAR + private const val LLAMA_LIB_VERSION = 8 // Increment this if you update the AAR private const val PREFS_NAME = "dynamic_libs" private const val PREFS_KEY = "llama_lib_version" diff --git a/llama-impl/src/main/cpp/llama-android.cpp b/llama-impl/src/main/cpp/llama-android.cpp index 5c970621b0..9d01e68d7f 100644 --- a/llama-impl/src/main/cpp/llama-android.cpp +++ b/llama-impl/src/main/cpp/llama-android.cpp @@ -238,20 +238,54 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) { return JNI_VERSION_1_6; } +// Helper function to validate GGUF file format +static bool is_valid_gguf_file(const char *path) { + FILE *file = fopen(path, "rb"); + if (!file) { + LOGe("Cannot open file: %s", path); + return false; + } + + // GGUF magic number: "GGUF" (0x46554747) + uint32_t magic = 0; + size_t read = fread(&magic, sizeof(uint32_t), 1, file); + fclose(file); + + if (read != 1) { + LOGe("Failed to read magic number from file"); + return false; + } + + // Check for GGUF magic (little-endian: 0x46554747) + const uint32_t GGUF_MAGIC = 0x46554747; + return magic == GGUF_MAGIC; +} + extern "C" JNIEXPORT jlong JNICALL Java_android_llama_cpp_LLamaAndroid_load_1model(JNIEnv *env, jobject, jstring filename) { - llama_model_params model_params = llama_model_default_params(); - auto path_to_model = env->GetStringUTFChars(filename, 0); LOGi("Loading model from %s", path_to_model); + // Validate file format before attempting to load + if (!is_valid_gguf_file(path_to_model)) { + LOGe("Invalid GGUF file format: %s", path_to_model); + env->ReleaseStringUTFChars(filename, path_to_model); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Invalid model file format. This app only supports GGUF format models. " + "Please ensure you have selected a valid .gguf model file."); + return 0; + } + + llama_model_params model_params = llama_model_default_params(); auto model = llama_model_load_from_file(path_to_model, model_params); env->ReleaseStringUTFChars(filename, path_to_model); if (!model) { - LOGe("load_model() failed"); - env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "load_model() failed"); + LOGe("load_model() failed - model loading returned null"); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Failed to load model. The file may be corrupted, incompatible, or require " + "more memory than available. Please try a smaller model or restart the app."); return 0; } @@ -286,6 +320,18 @@ Java_android_llama_cpp_LLamaAndroid_new_1context(JNIEnv *env, jobject, jlong jmo } LOGi("Using %d threads (batch=%d)", n_threads, n_threads_batch); + // Validate model parameters before creating context + int32_t model_n_ctx_train = llama_model_n_ctx_train(model); + + LOGi("Model info: ctx_train=%d", model_n_ctx_train); + + if (model_n_ctx_train <= 0) { + LOGe("Invalid model training context: %d", model_n_ctx_train); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Model has invalid training context. The model file may be corrupted."); + return 0; + } + llama_context_params ctx_params = llama_context_default_params(); const int configured_ctx = g_n_ctx.load(); @@ -293,15 +339,44 @@ Java_android_llama_cpp_LLamaAndroid_new_1context(JNIEnv *env, jobject, jlong jmo ctx_params.n_threads = n_threads; ctx_params.n_threads_batch = n_threads_batch; + // Clamp context size to model's training context + if (ctx_params.n_ctx > model_n_ctx_train) { + LOGi("Clamping requested context %d to model's training context %d", + ctx_params.n_ctx, model_n_ctx_train); + ctx_params.n_ctx = model_n_ctx_train; + } + + LOGi("Creating context with n_ctx=%d, n_threads=%d, n_threads_batch=%d", + ctx_params.n_ctx, ctx_params.n_threads, ctx_params.n_threads_batch); + llama_context *context = llama_init_from_model(model, ctx_params); if (!context) { - LOGe("llama_new_context_with_model() returned null)"); + LOGe("llama_init_from_model() returned null"); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Failed to create model context. This may indicate:\n" + "1. Insufficient memory (try freeing memory or using a smaller model)\n" + "2. Incompatible model architecture\n" + "3. Corrupted model file\n" + "Try restarting the app or selecting a different model."); + return 0; + } + + // CRITICAL: Verify this is not an embedding model IMMEDIATELY after context creation + const auto pooling_type = llama_pooling_type(context); + LOGi("Context pooling_type: %d (0=none/generative, 1=mean/embed, 2=cls, 3=last, 4=rank)", pooling_type); + + if (pooling_type != LLAMA_POOLING_TYPE_NONE) { + LOGe("REJECTED: Model is configured for embeddings (pooling_type=%d), cannot generate text", pooling_type); + llama_free(context); env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), - "llama_new_context_with_model() returned null)"); + "This model is an embedding model and cannot be used for text generation. " + "Embedding models use 'encode' operations, not 'decode'. " + "Please select a chat/instruct model (Llama, Qwen, Gemma, etc.) for conversation."); return 0; } + LOGi("Context created successfully - model is suitable for text generation"); return reinterpret_cast(context); } @@ -585,6 +660,20 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init( jboolean format_chat, jint n_len, jobjectArray stop) { + const auto context = reinterpret_cast(context_pointer); + const auto batch = reinterpret_cast(batch_pointer); + + // Safety check: Verify this is not an embedding model + if (context) { + const auto pooling_type = llama_pooling_type(context); + if (pooling_type != LLAMA_POOLING_TYPE_NONE) { + LOGe("completion_init failed: Model has pooling_type=%d, cannot generate text", pooling_type); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "This model is configured for embeddings and cannot generate text. Please use a generative model for chat."); + return 0; + } + } + { std::lock_guard lock(g_globals_mutex); cached_token_chars.clear(); @@ -611,21 +700,57 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init( } } + if (!context) { + LOGe("completion_init: context is null"); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "Model context is null"); + return 0; + } + + if (!batch) { + LOGe("completion_init: batch is null"); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "Batch is null"); + return 0; + } + const auto text = env->GetStringUTFChars(jtext, 0); - const auto context = reinterpret_cast(context_pointer); - const auto batch = reinterpret_cast(batch_pointer); + if (!text) { + LOGe("completion_init: failed to get text string"); + env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), "Invalid text input"); + return 0; + } bool parse_special = (format_chat == JNI_TRUE); + LOGi("Tokenizing input (parse_special=%d)...", parse_special); + const auto tokens_list = common_tokenize(context, text, true, parse_special); + LOGi("Tokenized %zu tokens", tokens_list.size()); + + if (tokens_list.empty()) { + LOGe("Tokenization produced no tokens"); + env->ReleaseStringUTFChars(jtext, text); + env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), + "Failed to tokenize input text. The text may be empty or invalid."); + return 0; + } int n_ctx = llama_n_ctx(context); + if (n_ctx <= 0) { + LOGe("Invalid context size: %d", n_ctx); + env->ReleaseStringUTFChars(jtext, text); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Model context size is invalid. Model may be corrupted."); + return 0; + } + size_t n_kv_req = tokens_list.size() + static_cast(n_len); - LOGi("n_len = %d, n_ctx = %d, n_kv_req = %zu", n_len, n_ctx, n_kv_req); + LOGi("n_len = %d, n_ctx = %d, n_tokens = %zu, n_kv_req = %zu", n_len, n_ctx, tokens_list.size(), n_kv_req); if (n_kv_req > n_ctx) { - LOGe("error: n_kv_req > n_ctx, the required KV cache size is not big enough"); + LOGe("error: n_kv_req (%zu) > n_ctx (%d), the required KV cache size is not big enough", n_kv_req, n_ctx); + env->ReleaseStringUTFChars(jtext, text); env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), - "Prompt is too long for the model's context size."); + "Prompt is too long for the model's context size. " + "Try a shorter message or reduce max output tokens."); return 0; } @@ -684,11 +809,60 @@ Java_android_llama_cpp_LLamaAndroid_completion_1init( } if (batch->n_tokens > 0) { + LOGi("Processing batch with %d tokens", batch->n_tokens); + + // Validate batch before decode + if (batch->n_tokens < 0) { + LOGe("Invalid batch token count: %d", batch->n_tokens); + env->ReleaseStringUTFChars(jtext, text); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Batch state corrupted. Token count is negative."); + return 0; + } + // llama_decode will output logits only for the last token of the prompt batch->logits[batch->n_tokens - 1] = true; - if (llama_decode(context, *batch) != 0) { - LOGe("llama_decode() failed"); + + LOGi("Calling llama_decode for initial prompt processing..."); + + // Double-check pooling type before decode (belt and suspenders) + const auto pooling_check = llama_pooling_type(context); + if (pooling_check != LLAMA_POOLING_TYPE_NONE) { + LOGe("CRITICAL: Attempted decode on embedding model (pooling=%d)", pooling_check); + env->ReleaseStringUTFChars(jtext, text); + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), + "Cannot decode with embedding model. This model only supports 'encode' operations."); + return 0; } + + int decode_result = llama_decode(context, *batch); + + if (decode_result != 0) { + LOGe("llama_decode() failed with error code: %d", decode_result); + env->ReleaseStringUTFChars(jtext, text); + + const char* error_msg; + switch (decode_result) { + case -1: + error_msg = "Model decode failed (error -1). This may indicate:\n" + "1. Insufficient memory for model operations\n" + "2. Incompatible model architecture (possibly an embedding model)\n" + "3. Corrupted model file\n" + "Try: Restart app, use smaller model, or select a chat/instruct model"; + break; + case -2: + error_msg = "Model decode failed (error -2). Context or batch state is invalid."; + break; + default: + error_msg = "Model decode failed with unknown error. Model may be incompatible."; + } + + env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), error_msg); + return 0; + } + LOGi("Initial decode completed successfully"); + } else { + LOGi("Batch is empty, skipping decode"); } env->ReleaseStringUTFChars(jtext, text); @@ -816,8 +990,24 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( env->CallVoidMethod(intvar_ncur, la_int_var_inc); - if (llama_decode(context, *batch) != 0) { - LOGe("llama_decode() returned null"); + // Safety check before decode + if (!batch || batch->n_tokens <= 0) { + LOGe("Invalid batch state before decode: n_tokens=%d", batch ? batch->n_tokens : -1); + return nullptr; + } + + // Verify not an embedding model before each decode + const auto pooling_check = llama_pooling_type(context); + if (pooling_check != LLAMA_POOLING_TYPE_NONE) { + LOGe("CRITICAL: Detected embedding model during generation (pooling=%d)", pooling_check); + log_info_to_kt("Cannot continue generation: model is for embeddings, not text generation."); + return nullptr; + } + + int decode_result = llama_decode(context, *batch); + if (decode_result != 0) { + LOGe("llama_decode() failed during generation with error: %d", decode_result); + log_info_to_kt("Generation decode failed with error %d. Stopping generation.", decode_result); return nullptr; } @@ -851,6 +1041,37 @@ Java_android_llama_cpp_LLamaAndroid_model_1n_1ctx( return llama_n_ctx(context); } +extern "C" +JNIEXPORT jint JNICALL +Java_android_llama_cpp_LLamaAndroid_get_1pooling_1type( + JNIEnv *env, + jobject /* this */, + jlong context_ptr) { + auto *context = reinterpret_cast(context_ptr); + if (!context) { + LOGe("get_pooling_type: context is null"); + return -1; // LLAMA_POOLING_TYPE_UNSPECIFIED + } + return static_cast(llama_pooling_type(context)); +} + +extern "C" +JNIEXPORT jstring JNICALL +Java_android_llama_cpp_LLamaAndroid_get_1model_1desc( + JNIEnv *env, + jobject /* this */, + jlong model_ptr) { + auto *model = reinterpret_cast(model_ptr); + if (!model) { + LOGe("get_model_desc: model is null"); + return env->NewStringUTF("unknown"); + } + + char desc[256]; + llama_model_desc(model, desc, sizeof(desc)); + return new_jstring_utf8(env, desc); +} + extern "C" JNIEXPORT jintArray JNICALL Java_android_llama_cpp_LLamaAndroid_tokenize( JNIEnv *env, diff --git a/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt b/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt index 0ae80d2398..b8f3698820 100644 --- a/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt +++ b/llama-impl/src/main/java/android/llama/cpp/LLamaAndroid.kt @@ -17,6 +17,8 @@ class LLamaAndroid : ILlamaController { private val log = LoggerFactory.getLogger(LLamaAndroid::class.java) private external fun model_n_ctx(context: Long): Int + private external fun get_pooling_type(context: Long): Int + private external fun get_model_desc(model: Long): String private external fun tokenize(context: Long, text: String, add_bos: Boolean): IntArray suspend fun getContextSize(): Int { @@ -147,8 +149,50 @@ class LLamaAndroid : ILlamaController { val model = load_model(pathToModel) if (model == 0L) throw IllegalStateException("load_model() failed") + // Log model information for diagnostics + val modelDesc = try { + get_model_desc(model) + } catch (e: Exception) { + log.warn("Failed to get model description", e) + "unknown" + } + log.info("Model description: {}", modelDesc) + val context = new_context(model) - if (context == 0L) throw IllegalStateException("new_context() failed") + if (context == 0L) { + free_model(model) + throw IllegalStateException("new_context() failed") + } + + // Check if this is an embedding-only model (not suitable for text generation) + val poolingType = try { + get_pooling_type(context) + } catch (e: UnsatisfiedLinkError) { + // Function not available in pre-built AAR - will detect via decode error instead + log.warn("get_pooling_type() not available (old AAR), will validate during inference") + -1 // Unknown + } catch (e: Exception) { + log.warn("Failed to get pooling type", e) + -1 // Unknown + } + + if (poolingType >= 0) { + log.info("Model pooling type: {} (0=generative, 1=mean, 2=cls, 3=last, 4=rank)", poolingType) + + // Pooling types: NONE=0, MEAN=1, CLS=2, LAST=3, RANK=4 + // Models with pooling (1-4) are embedding models, not suitable for chat + if (poolingType != 0) { + free_context(context) + free_model(model) + throw IllegalStateException( + "This model is an embedding model (pooling_type=$poolingType) and cannot be used for text generation. " + + "Please select a chat/instruct model instead. Embedding models are designed for " + + "semantic search and similarity tasks, not conversational AI." + ) + } + } else { + log.warn("Could not determine pooling type - will attempt inference and catch errors") + } val batch = new_batch(2048, 0, 1) if (batch == 0L) throw IllegalStateException("new_batch() failed") @@ -156,6 +200,52 @@ class LLamaAndroid : ILlamaController { val sampler = new_sampler() if (sampler == 0L) throw IllegalStateException("new_sampler() failed") + // CRITICAL: Test the model with a tiny inference to validate it's not an embedding model + // This prevents crashes during actual user interaction + log.info("Validating model can perform text generation (dry run test)...") + try { + val testResult = completion_init( + context, + batch, + "Hi", // Minimal test prompt + false, // No chat formatting + 1, // Only 1 token output + emptyArray() // No stop strings + ) + + if (testResult <= 0) { + throw IllegalStateException("Validation failed: model returned $testResult tokens") + } + + log.info("Model validation passed - {} tokens processed", testResult) + + // Clear the test from KV cache + kv_cache_clear(context) + + } catch (e: Exception) { + // Model failed validation - clean up and reject + log.error("Model validation failed - this is likely an embedding model or incompatible architecture", e) + + free_sampler(sampler) + free_batch(batch) + free_context(context) + free_model(model) + + throw IllegalStateException( + "Model validation failed: Cannot perform text generation.\n\n" + + "This typically indicates:\n" + + "• An embedding model (e.g., all-MiniLM, e5, bge, mpnet)\n" + + "• Incompatible model architecture\n" + + "• Corrupted model file\n\n" + + "Please use a chat/instruct model instead:\n" + + "• Llama-3.2-1B-Instruct-Q4_K_M.gguf\n" + + "• Qwen2.5-0.5B-Instruct-Q4_K_M.gguf\n" + + "• gemma-2-2b-it-Q4_K_M.gguf\n\n" + + "Error: ${e.message}", + e + ) + } + log.info("Loaded model {}", pathToModel) threadLocalState.set(State.Loaded(model, context, batch, sampler)) } @@ -180,14 +270,54 @@ class LLamaAndroid : ILlamaController { ): Flow = flow { when (val state = threadLocalState.get()) { is State.Loaded -> { + log.debug("Starting inference - formatChat={}, clearCache={}, nlen={}", formatChat, clearCache, nlen) + + // Defensive check: verify this is not an embedding model + try { + val poolingType = get_pooling_type(state.context) + log.debug("Model pooling_type: {}", poolingType) + if (poolingType != 0) { + log.error("Attempted to use embedding model (pooling_type={}) for text generation", poolingType) + throw IllegalStateException( + "Cannot perform text generation with an embedding model (pooling_type=$poolingType). " + + "This model is designed for embeddings, not chat." + ) + } + } catch (e: UnsatisfiedLinkError) { + log.warn("Unable to check pooling type, proceeding with generation", e) + } catch (e: IllegalStateException) { + log.error("Embedding model check failed", e) + throw e + } + + // Check context size vs message length + try { + val contextSize = model_n_ctx(state.context) + val tokenCount = tokenize(state.context, message, true).size + log.debug("Context size: {}, message tokens: {}, max output: {}", contextSize, tokenCount, nlen) + + if (tokenCount + nlen > contextSize) { + log.error("Message too long: {} tokens + {} max output > {} context", tokenCount, nlen, contextSize) + throw IllegalStateException( + "Message is too long for the model's context window. " + + "Message requires $tokenCount tokens plus $nlen for output, but context is only $contextSize tokens." + ) + } + } catch (e: Exception) { + log.error("Failed to validate context size", e) + throw IllegalStateException("Failed to validate message length: ${e.message}", e) + } + isStopped.set(false) if (clearCache) { + log.debug("Clearing KV cache") kv_cache_clear(state.context) } - val ncur = IntVar( - completion_init( + log.debug("Calling completion_init") + val ncur = try { + val result = completion_init( state.context, state.batch, message, @@ -195,7 +325,43 @@ class LLamaAndroid : ILlamaController { nlen, stop.toTypedArray() ) - ) + + if (result <= 0) { + log.error("completion_init returned invalid token count: {}", result) + throw IllegalStateException( + "Model failed to initialize text generation. " + + "This may indicate an embedding model or incompatible model architecture. " + + "Please ensure you're using a chat/instruct model, not an embedding model." + ) + } + + log.debug("completion_init succeeded with {} tokens", result) + IntVar(result) + } catch (e: IllegalStateException) { + // Re-throw our own exceptions + throw e + } catch (e: Exception) { + log.error("completion_init failed", e) + val errorMsg = e.message ?: "" + + // Check for embedding model indicators in error message + if (errorMsg.contains("embed", ignoreCase = true) || + errorMsg.contains("encode", ignoreCase = true) || + errorMsg.contains("pooling", ignoreCase = true)) { + throw IllegalStateException( + "This appears to be an embedding model and cannot be used for text generation. " + + "Please select a chat/instruct model (Llama, Qwen, Gemma, etc.) instead.", + e + ) + } + + throw IllegalStateException("Failed to initialize text generation: ${e.message}", e) + } + + log.debug("Starting generation loop") + var loopCount = 0 + var consecutiveNullOrEmpty = 0 + var totalEmitted = 0 while (true) { if (isStopped.get()) { @@ -203,11 +369,58 @@ class LLamaAndroid : ILlamaController { break } - val str = completion_loop(state.context, state.batch, state.sampler, nlen, ncur) - if (str == null) { - break + try { + val str = completion_loop(state.context, state.batch, state.sampler, nlen, ncur) + + if (str == null) { + log.debug("Generation completed after {} iterations ({} tokens emitted)", loopCount, totalEmitted) + break + } + + if (str.isEmpty()) { + consecutiveNullOrEmpty++ + if (consecutiveNullOrEmpty > 10 && totalEmitted == 0) { + log.error("Model producing only empty strings - likely embedding model") + throw IllegalStateException( + "Model is not generating text properly. This is typically caused by using " + + "an embedding model for text generation. Please use a chat/instruct model." + ) + } + } else { + consecutiveNullOrEmpty = 0 + totalEmitted++ + } + + emit(str) + loopCount++ + + // Safety limit for infinite loops + if (loopCount > 10000) { + log.error("Generation loop exceeded 10000 iterations, stopping") + break + } + + } catch (e: IllegalStateException) { + // Re-throw our own error messages + throw e + } catch (e: Exception) { + log.error("Error during generation loop at iteration {} ({} tokens emitted)", loopCount, totalEmitted, e) + + val errorMsg = e.message ?: "" + if (totalEmitted == 0 || errorMsg.contains("decode", ignoreCase = true)) { + throw IllegalStateException( + "Text generation failed before producing output. " + + "This often indicates an embedding model or incompatible architecture. " + + "Please use a chat/instruct model, not an embedding model.", + e + ) + } + + throw IllegalStateException( + "Text generation failed: ${e.message}", + e + ) } - emit(str) } } From 7ef3e809b4a3e945879f25c1b97d83cefea1331d Mon Sep 17 00:00:00 2001 From: John Trujillo Date: Fri, 19 Jun 2026 14:44:38 -0400 Subject: [PATCH 2/3] chore(ADFA-4388): Remove explanatory comments from Kotlin files Co-Authored-By: Claude Sonnet 4.5 --- .../agent/fragments/AiSettingsFragment.kt | 2 +- .../agent/repository/LlmInferenceEngine.kt | 6 ---- .../agent/viewmodel/AiSettingsViewModel.kt | 5 --- .../java/android/llama/cpp/LLamaAndroid.kt | 34 ++++--------------- 4 files changed, 7 insertions(+), 40 deletions(-) diff --git a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt index 79dd25d5eb..d3c61c4c33 100644 --- a/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt +++ b/app/src/main/java/com/itsaky/androidide/agent/fragments/AiSettingsFragment.kt @@ -123,7 +123,7 @@ class AiSettingsFragment : Fragment(R.layout.fragment_ai_settings) { val browseButton = view.findViewById