Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
ab8940d
fix: Address import crash, enhance logging, and refine UI
google-labs-jules[bot] Jun 2, 2025
04f39c2
fix: Resolve compilation errors and revert button style
google-labs-jules[bot] Jun 2, 2025
6b24021
Okay, I've made some corrections to the import logic and Toasts to ad…
google-labs-jules[bot] Jun 2, 2025
91f714a
fix: Resolve critical compilation errors
google-labs-jules[bot] Jun 2, 2025
6fe23ec
fix: Resolve "Expecting a top level declaration" and other compilatio…
google-labs-jules[bot] Jun 2, 2025
cb76cf5
Update PhotoReasoningScreen.kt
Android-PowerUser Jun 2, 2025
f4732e2
Fix: Make SystemMessageEntry Parcelable to prevent crash
google-labs-jules[bot] Jun 2, 2025
66260df
Update local.properties
Android-PowerUser Jun 2, 2025
4408c6b
Fix: Add kotlin-parcelize plugin to app build.gradle
google-labs-jules[bot] Jun 2, 2025
5801fba
Refactor: Align kotlin.plugin.serialization version with Kotlin version
google-labs-jules[bot] Jun 2, 2025
b660081
Feature: Reorder AI prompt components
google-labs-jules[bot] Jun 3, 2025
9c543c1
I've made some changes to `PhotoReasoningViewModel.kt` to include the…
google-labs-jules[bot] Jun 3, 2025
9115e8c
Feature: Populate default entries in System Message Database on first…
google-labs-jules[bot] Jun 3, 2025
33b7a02
Support percentage-based coordinates for input actions
google-labs-jules[bot] Jun 3, 2025
1e87d92
Support percentage-based distance for scroll commands
google-labs-jules[bot] Jun 3, 2025
fccd072
Delete app/src/test/kotlin directory
Android-PowerUser Jun 3, 2025
7c9fbe1
Add enhanced logging for debugging percentage scrolls
google-labs-jules[bot] Jun 4, 2025
457a07f
The message you provided appears to be a commit message, which is a d…
google-labs-jules[bot] Jun 4, 2025
17105dd
Fix: Correct inverted horizontal scroll directions
google-labs-jules[bot] Jun 4, 2025
9a950bc
Fix: Correct direction for basic horizontal scroll commands
google-labs-jules[bot] Jun 4, 2025
7c20372
Fix: Ensure all horizontal scroll commands have correct direction
google-labs-jules[bot] Jun 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
plugins {
id("com.android.application")
id("org.jetbrains.kotlin.android")
id("org.jetbrains.kotlin.plugin.serialization") version "1.9.0"
id("org.jetbrains.kotlin.plugin.serialization") version "1.9.20"
id("com.google.android.libraries.mapsplatform.secrets-gradle-plugin")
id("kotlin-parcelize")
}

android {
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.google.ai.sample.feature.multimodal

import android.content.Context
import android.graphics.Bitmap
import android.graphics.drawable.BitmapDrawable
import android.net.Uri
Expand All @@ -23,6 +24,8 @@ import com.google.ai.sample.util.ChatHistoryPreferences
import com.google.ai.sample.util.Command
import com.google.ai.sample.util.CommandParser
import com.google.ai.sample.util.SystemMessagePreferences
import com.google.ai.sample.util.SystemMessageEntryPreferences // Added import
import com.google.ai.sample.util.SystemMessageEntry // Added import
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
Expand Down Expand Up @@ -90,15 +93,7 @@ class PhotoReasoningViewModel(
) {
_uiState.value = PhotoReasoningUiState.Loading

// Get the system message
val systemMessageText = _systemMessage.value

// Create the prompt with system message if available
val prompt = if (systemMessageText.isNotBlank()) {
"System Message: $systemMessageText\n\nFOLLOW THE INSTRUCTIONS STRICTLY: $userInput"
} else {
"FOLLOW THE INSTRUCTIONS STRICTLY: $userInput"
}
val prompt = "FOLLOW THE INSTRUCTIONS STRICTLY: $userInput"

// Store the current user input and selected images
currentUserInput = userInput
Expand Down Expand Up @@ -452,7 +447,7 @@ class PhotoReasoningViewModel(
/**
* Update the system message
*/
fun updateSystemMessage(message: String, context: android.content.Context) {
fun updateSystemMessage(message: String, context: Context) {
_systemMessage.value = message

// Save to SharedPreferences for persistence
Expand All @@ -462,13 +457,31 @@ class PhotoReasoningViewModel(
/**
* Load the system message from SharedPreferences
*/
fun loadSystemMessage(context: android.content.Context) {
fun loadSystemMessage(context: Context) {
val message = SystemMessagePreferences.loadSystemMessage(context)
_systemMessage.value = message

// Also load chat history
loadChatHistory(context)
}

/**
* Helper function to format database entries as text.
*/
private fun formatDatabaseEntriesAsText(context: Context): String {
val entries = SystemMessageEntryPreferences.loadEntries(context)
if (entries.isEmpty()) {
return ""
}
val builder = StringBuilder()
builder.append("Available System Guides:\n---\n")
for (entry in entries) {
builder.append("Title: ${entry.title}\n")
builder.append("Guide: ${entry.guide}\n")
builder.append("---\n")
}
return builder.toString()
}

/**
* Process commands found in the AI response
Expand Down Expand Up @@ -513,7 +526,7 @@ class PhotoReasoningViewModel(
/**
* Save chat history to SharedPreferences
*/
private fun saveChatHistory(context: android.content.Context?) {
private fun saveChatHistory(context: Context?) {
context?.let {
ChatHistoryPreferences.saveChatMessages(it, chatMessages)
}
Expand All @@ -522,7 +535,7 @@ class PhotoReasoningViewModel(
/**
* Load chat history from SharedPreferences
*/
fun loadChatHistory(context: android.content.Context) {
fun loadChatHistory(context: Context) {
val savedMessages = ChatHistoryPreferences.loadChatMessages(context)
if (savedMessages.isNotEmpty()) {
_chatState.clearMessages()
Expand All @@ -532,18 +545,29 @@ class PhotoReasoningViewModel(
_chatMessagesFlow.value = chatMessages

// Rebuild the chat history for the AI
rebuildChatHistory()
rebuildChatHistory(context) // Pass context here
}
}

/**
* Rebuild the chat history for the AI based on the current messages
*/
private fun rebuildChatHistory() {
private fun rebuildChatHistory(context: Context) { // Added context parameter
// Convert the current chat messages to Content objects for the chat history
val history = mutableListOf<Content>()

// 1. Active System Message
if (_systemMessage.value.isNotBlank()) {
history.add(content(role = "user") { text(_systemMessage.value) })
}

// 2. Formatted Database Entries
val formattedDbEntries = formatDatabaseEntriesAsText(context)
if (formattedDbEntries.isNotBlank()) {
history.add(content(role = "user") { text(formattedDbEntries) })
}

// Group messages by participant to create proper conversation turns
// 3. Group messages by participant to create proper conversation turns
var currentUserContent = ""
var currentModelContent = ""

Expand Down Expand Up @@ -597,20 +621,30 @@ class PhotoReasoningViewModel(
chat = generativeModel.startChat(
history = history
)
} else {
// Ensure chat is reset even if history is empty (e.g. only system message was there and it's now blank)
chat = generativeModel.startChat(history = emptyList())
}
}

/**
* Clear the chat history
*/
fun clearChatHistory(context: android.content.Context? = null) {
fun clearChatHistory(context: Context? = null) {
_chatState.clearMessages()
_chatMessagesFlow.value = emptyList()

// Reset the chat with empty history
chat = generativeModel.startChat(
history = emptyList()
)
val initialHistory = mutableListOf<Content>()
if (_systemMessage.value.isNotBlank()) {
initialHistory.add(content(role = "user") { text(_systemMessage.value) })
}
context?.let { ctx ->
val formattedDbEntries = formatDatabaseEntriesAsText(ctx)
if (formattedDbEntries.isNotBlank()) {
initialHistory.add(content(role = "user") { text(formattedDbEntries) })
}
}
chat = generativeModel.startChat(history = initialHistory.toList())

// Also clear from SharedPreferences if context is provided
context?.let {
Expand All @@ -627,7 +661,7 @@ class PhotoReasoningViewModel(
*/
fun addScreenshotToConversation(
screenshotUri: Uri,
context: android.content.Context,
context: Context,
screenInfo: String? = null
) {
PhotoReasoningApplication.applicationScope.launch(Dispatchers.Main) {
Expand Down
79 changes: 40 additions & 39 deletions app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,13 @@ object CommandParser {
// Tap coordinates patterns - expanded to catch more variations
private val TAP_COORDINATES_PATTERNS = listOf(
// Standard patterns
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) (?:coordinates?|koordinaten|position|stelle|punkt)[:\\s]\\s*\\(?\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)?"),
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) \\(?\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)?"),
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) (?:coordinates?|koordinaten|position|stelle|punkt)[:\\s]\\s*\\(?\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)?"),
Regex("(?i)\\b(?:tap|click|press|tippe|klicke|tippe auf|klicke auf) (?:at|on|auf) \\(?\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)?"),

// Function-like patterns
Regex("(?i)\\btapAtCoordinates\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)"),
Regex("(?i)\\bclickAtPosition\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)"),
Regex("(?i)\\btapAt\\(\\s*(\\d+(?:\\.\\d+)?)\\s*,\\s*(\\d+(?:\\.\\d+)?)\\s*\\)")
Regex("(?i)\\btapAtCoordinates\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"),
Regex("(?i)\\bclickAtPosition\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)"),
Regex("(?i)\\btapAt\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*\\)")
)

// Screenshot patterns - expanded for consistency
Expand Down Expand Up @@ -475,13 +475,14 @@ object CommandParser {
for (match in matches) {
try {
if (match.groupValues.size > 2) {
val x = match.groupValues[1].trim().toFloat()
val y = match.groupValues[2].trim().toFloat()
val xString = match.groupValues[1].trim()
val yString = match.groupValues[2].trim()

// Check if this command is already in the list (avoid duplicates)
if (!commands.any { it is Command.TapCoordinates && it.x == x && it.y == y }) {
Log.d(TAG, "Found tap coordinates command with pattern ${pattern.pattern}: ($x, $y)")
commands.add(Command.TapCoordinates(x, y))
// Note: Comparison now happens with strings directly.
if (!commands.any { it is Command.TapCoordinates && it.x == xString && it.y == yString }) {
Log.d(TAG, "Found tap coordinates command with pattern ${pattern.pattern}: ($xString, $yString)")
commands.add(Command.TapCoordinates(xString, yString))
}
}
} catch (e: Exception) {
Expand Down Expand Up @@ -568,19 +569,19 @@ object CommandParser {
*/
private fun findScrollDownCommands(text: String, commands: MutableList<Command>) {
// First check for coordinate-based scroll down commands
val coordPattern = Regex("(?i)\\bscrollDown\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)")
val coordPattern = Regex("(?i)\\bscrollDown\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)")
val matches = coordPattern.findAll(text)

for (match in matches) {
if (match.groupValues.size >= 5) {
try {
val x = match.groupValues[1].toFloat()
val y = match.groupValues[2].toFloat()
val distance = match.groupValues[3].toFloat()
val xString = match.groupValues[1].trim()
val yString = match.groupValues[2].trim()
val distanceString = match.groupValues[3].trim()
val duration = match.groupValues[4].toLong()

Log.d(TAG, "Found coordinate-based scroll down command: scrollDown($x, $y, $distance, $duration)")
commands.add(Command.ScrollDownFromCoordinates(x, y, distance, duration))
Log.d(TAG, "Found coordinate-based scroll down command: scrollDown($xString, $yString, $distanceString, $duration)")
commands.add(Command.ScrollDownFromCoordinates(xString, yString, distanceString, duration))
} catch (e: Exception) {
Log.e(TAG, "Error parsing coordinate-based scroll down command: ${e.message}")
}
Expand Down Expand Up @@ -609,19 +610,19 @@ object CommandParser {
*/
private fun findScrollUpCommands(text: String, commands: MutableList<Command>) {
// First check for coordinate-based scroll up commands
val coordPattern = Regex("(?i)\\bscrollUp\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)")
val coordPattern = Regex("(?i)\\bscrollUp\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)")
val matches = coordPattern.findAll(text)

for (match in matches) {
if (match.groupValues.size >= 5) {
try {
val x = match.groupValues[1].toFloat()
val y = match.groupValues[2].toFloat()
val distance = match.groupValues[3].toFloat()
val xString = match.groupValues[1].trim()
val yString = match.groupValues[2].trim()
val distanceString = match.groupValues[3].trim()
val duration = match.groupValues[4].toLong()

Log.d(TAG, "Found coordinate-based scroll up command: scrollUp($x, $y, $distance, $duration)")
commands.add(Command.ScrollUpFromCoordinates(x, y, distance, duration))
Log.d(TAG, "Found coordinate-based scroll up command: scrollUp($xString, $yString, $distanceString, $duration)")
commands.add(Command.ScrollUpFromCoordinates(xString, yString, distanceString, duration))
} catch (e: Exception) {
Log.e(TAG, "Error parsing coordinate-based scroll up command: ${e.message}")
}
Expand Down Expand Up @@ -650,19 +651,19 @@ object CommandParser {
*/
private fun findScrollLeftCommands(text: String, commands: MutableList<Command>) {
// First check for coordinate-based scroll left commands
val coordPattern = Regex("(?i)\\bscrollLeft\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)")
val coordPattern = Regex("(?i)\\bscrollLeft\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)")
val matches = coordPattern.findAll(text)

for (match in matches) {
if (match.groupValues.size >= 5) {
try {
val x = match.groupValues[1].toFloat()
val y = match.groupValues[2].toFloat()
val distance = match.groupValues[3].toFloat()
val xString = match.groupValues[1].trim()
val yString = match.groupValues[2].trim()
val distanceString = match.groupValues[3].trim()
val duration = match.groupValues[4].toLong()

Log.d(TAG, "Found coordinate-based scroll left command: scrollLeft($x, $y, $distance, $duration)")
commands.add(Command.ScrollLeftFromCoordinates(x, y, distance, duration))
Log.d(TAG, "Found coordinate-based scroll left command: scrollLeft($xString, $yString, $distanceString, $duration)")
commands.add(Command.ScrollLeftFromCoordinates(xString, yString, distanceString, duration))
} catch (e: Exception) {
Log.e(TAG, "Error parsing coordinate-based scroll left command: ${e.message}")
}
Expand Down Expand Up @@ -691,19 +692,19 @@ object CommandParser {
*/
private fun findScrollRightCommands(text: String, commands: MutableList<Command>) {
// First check for coordinate-based scroll right commands
val coordPattern = Regex("(?i)\\bscrollRight\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)")
val coordPattern = Regex("(?i)\\bscrollRight\\s*\\(\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*([\\d\\.%]+)\\s*,\\s*(\\d+)\\s*\\)")
val matches = coordPattern.findAll(text)

for (match in matches) {
if (match.groupValues.size >= 5) {
try {
val x = match.groupValues[1].toFloat()
val y = match.groupValues[2].toFloat()
val distance = match.groupValues[3].toFloat()
val xString = match.groupValues[1].trim()
val yString = match.groupValues[2].trim()
val distanceString = match.groupValues[3].trim()
val duration = match.groupValues[4].toLong()

Log.d(TAG, "Found coordinate-based scroll right command: scrollRight($x, $y, $distance, $duration)")
commands.add(Command.ScrollRightFromCoordinates(x, y, distance, duration))
Log.d(TAG, "Found coordinate-based scroll right command: scrollRight($xString, $yString, $distanceString, $duration)")
commands.add(Command.ScrollRightFromCoordinates(xString, yString, distanceString, duration))
} catch (e: Exception) {
Log.e(TAG, "Error parsing coordinate-based scroll right command: ${e.message}")
}
Expand Down Expand Up @@ -796,7 +797,7 @@ sealed class Command {
/**
* Command to tap at the specified coordinates
*/
data class TapCoordinates(val x: Float, val y: Float) : Command()
data class TapCoordinates(val x: String, val y: String) : Command()

/**
* Command to take a screenshot
Expand Down Expand Up @@ -846,22 +847,22 @@ sealed class Command {
/**
* Command to scroll down from specific coordinates with custom distance and duration
*/
data class ScrollDownFromCoordinates(val x: Float, val y: Float, val distance: Float, val duration: Long) : Command()
data class ScrollDownFromCoordinates(val x: String, val y: String, val distance: String, val duration: Long) : Command()

/**
* Command to scroll up from specific coordinates with custom distance and duration
*/
data class ScrollUpFromCoordinates(val x: Float, val y: Float, val distance: Float, val duration: Long) : Command()
data class ScrollUpFromCoordinates(val x: String, val y: String, val distance: String, val duration: Long) : Command()

/**
* Command to scroll left from specific coordinates with custom distance and duration
*/
data class ScrollLeftFromCoordinates(val x: Float, val y: Float, val distance: Float, val duration: Long) : Command()
data class ScrollLeftFromCoordinates(val x: String, val y: String, val distance: String, val duration: Long) : Command()

/**
* Command to scroll right from specific coordinates with custom distance and duration
*/
data class ScrollRightFromCoordinates(val x: Float, val y: Float, val distance: Float, val duration: Long) : Command()
data class ScrollRightFromCoordinates(val x: String, val y: String, val distance: String, val duration: Long) : Command()

/**
* Command to open an app by package name
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package com.google.ai.sample.util

import android.os.Parcelable
import kotlinx.parcelize.Parcelize
import kotlinx.serialization.Serializable

@Parcelize
@Serializable
data class SystemMessageEntry(
val title: String,
val guide: String
)
) : Parcelable
Loading