|
| 1 | +/** |
| 2 | + * ChatGPT web browser automation helpers for image generation. |
| 3 | + * Cross-platform: works on Linux/macOS/Windows via OpenCLI's CDP browser automation. |
| 4 | + */ |
| 5 | + |
| 6 | +export const CHATGPT_DOMAIN = 'chatgpt.com'; |
| 7 | +export const CHATGPT_URL = 'https://chatgpt.com'; |
| 8 | + |
| 9 | +// Selectors |
| 10 | +const COMPOSER_SELECTOR = '[aria-label="Chat with ChatGPT"]'; |
| 11 | +const SEND_BTN_SELECTOR = 'button[aria-label="Send prompt"]'; |
| 12 | + |
| 13 | +function buildComposerLocatorScript() { |
| 14 | + const selectorsJson = JSON.stringify([COMPOSER_SELECTOR]); |
| 15 | + const markerAttr = 'data-opencli-chatgpt-composer'; |
| 16 | + return ` |
| 17 | + const isVisible = (el) => { |
| 18 | + if (!(el instanceof HTMLElement)) return false; |
| 19 | + const style = window.getComputedStyle(el); |
| 20 | + if (style.display === 'none' || style.visibility === 'hidden') return false; |
| 21 | + const rect = el.getBoundingClientRect(); |
| 22 | + return rect.width > 0 && rect.height > 0; |
| 23 | + }; |
| 24 | +
|
| 25 | + const markerAttr = ${JSON.stringify(markerAttr)}; |
| 26 | + const clearMarkers = (active) => { |
| 27 | + document.querySelectorAll('[' + markerAttr + ']').forEach(node => { |
| 28 | + if (node !== active) node.removeAttribute(markerAttr); |
| 29 | + }); |
| 30 | + }; |
| 31 | +
|
| 32 | + const findComposer = () => { |
| 33 | + const marked = document.querySelector('[' + markerAttr + '="1"]'); |
| 34 | + if (marked instanceof HTMLElement && isVisible(marked)) return marked; |
| 35 | +
|
| 36 | + for (const selector of ${JSON.stringify([COMPOSER_SELECTOR])}) { |
| 37 | + const node = Array.from(document.querySelectorAll(selector)).find(c => c instanceof HTMLElement && isVisible(c)); |
| 38 | + if (node instanceof HTMLElement) { |
| 39 | + node.setAttribute(markerAttr, '1'); |
| 40 | + return node; |
| 41 | + } |
| 42 | + } |
| 43 | + return null; |
| 44 | + }; |
| 45 | +
|
| 46 | + findComposer.toString = () => 'findComposer'; |
| 47 | + return { findComposer, markerAttr }; |
| 48 | + `; |
| 49 | +} |
| 50 | + |
| 51 | +/** |
| 52 | + * Send a message to the ChatGPT composer and submit it. |
| 53 | + * Returns true if the message was sent successfully. |
| 54 | + */ |
| 55 | +export async function sendChatGPTMessage(page, text) { |
| 56 | + // Close sidebar if open (it can cover the chat composer) |
| 57 | + await page.evaluate(` |
| 58 | + (() => { |
| 59 | + const closeBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Close sidebar'); |
| 60 | + if (closeBtn) closeBtn.click(); |
| 61 | + })() |
| 62 | + `); |
| 63 | + await page.wait(0.5); |
| 64 | + |
| 65 | + // Wait for composer to be ready and use Playwright's type() |
| 66 | + await page.wait(1.5); |
| 67 | + |
| 68 | + const typeResult = await page.evaluate(` |
| 69 | + (() => { |
| 70 | + ${buildComposerLocatorScript()} |
| 71 | + const composer = findComposer(); |
| 72 | + if (!composer) return false; |
| 73 | + composer.focus(); |
| 74 | + composer.textContent = ''; |
| 75 | + return true; |
| 76 | + })() |
| 77 | + `); |
| 78 | + |
| 79 | + if (!typeResult) return false; |
| 80 | + |
| 81 | + // Use page.type() which is Playwright's native method |
| 82 | + try { |
| 83 | + if (page.nativeType) { |
| 84 | + await page.nativeType(text); |
| 85 | + } else { |
| 86 | + throw new Error('nativeType unavailable'); |
| 87 | + } |
| 88 | + } catch (e) { |
| 89 | + // Fallback: use execCommand |
| 90 | + await page.evaluate(` |
| 91 | + (() => { |
| 92 | + const composer = document.querySelector('[aria-label="Chat with ChatGPT"]'); |
| 93 | + if (!composer) return; |
| 94 | + composer.focus(); |
| 95 | + document.execCommand('insertText', false, ${JSON.stringify(text)}); |
| 96 | + })() |
| 97 | + `); |
| 98 | + } |
| 99 | + |
| 100 | + // Wait for send button to appear (it only shows when there's text) |
| 101 | + await page.wait(1.5); |
| 102 | + |
| 103 | + // Click send button |
| 104 | + const sent = await page.evaluate(` |
| 105 | + (() => { |
| 106 | + const btns = Array.from(document.querySelectorAll('button')); |
| 107 | + const sendBtn = btns.find(b => b.getAttribute('aria-label') === 'Send prompt'); |
| 108 | + return { sendBtnFound: !!sendBtn }; |
| 109 | + })() |
| 110 | + `); |
| 111 | + |
| 112 | + if (!sent || !sent.sendBtnFound) { |
| 113 | + return false; |
| 114 | + } |
| 115 | + |
| 116 | + await page.evaluate(` |
| 117 | + (() => { |
| 118 | + const sendBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Send prompt'); |
| 119 | + if (sendBtn) sendBtn.click(); |
| 120 | + })() |
| 121 | + `); |
| 122 | + return true; |
| 123 | +} |
| 124 | + |
| 125 | +/** |
| 126 | + * Check if ChatGPT is still generating a response. |
| 127 | + */ |
| 128 | +export async function isGenerating(page) { |
| 129 | + return await page.evaluate(` |
| 130 | + (() => { |
| 131 | + return Array.from(document.querySelectorAll('button')).some(b => { |
| 132 | + const label = b.getAttribute('aria-label') || ''; |
| 133 | + return label === 'Stop generating' || label.includes('Thinking'); |
| 134 | + }); |
| 135 | + })() |
| 136 | + `); |
| 137 | +} |
| 138 | + |
| 139 | +/** |
| 140 | + * Get visible image URLs from the ChatGPT page (excluding profile/avatar images). |
| 141 | + */ |
| 142 | +export async function getChatGPTVisibleImageUrls(page) { |
| 143 | + return await page.evaluate(` |
| 144 | + (() => { |
| 145 | + const isVisible = (el) => { |
| 146 | + if (!(el instanceof HTMLElement)) return false; |
| 147 | + const style = window.getComputedStyle(el); |
| 148 | + if (style.display === 'none' || style.visibility === 'hidden') return false; |
| 149 | + const rect = el.getBoundingClientRect(); |
| 150 | + return rect.width > 32 && rect.height > 32; |
| 151 | + }; |
| 152 | +
|
| 153 | + const imgs = Array.from(document.querySelectorAll('img')).filter(img => |
| 154 | + img instanceof HTMLImageElement && isVisible(img) |
| 155 | + ); |
| 156 | +
|
| 157 | + const urls = []; |
| 158 | + const seen = new Set(); |
| 159 | +
|
| 160 | + for (const img of imgs) { |
| 161 | + const src = img.currentSrc || img.src || ''; |
| 162 | + const alt = (img.getAttribute('alt') || '').toLowerCase(); |
| 163 | + const cls = (img.className || '').toLowerCase(); |
| 164 | + const width = img.naturalWidth || img.width || 0; |
| 165 | + const height = img.naturalHeight || img.height || 0; |
| 166 | +
|
| 167 | + if (!src) continue; |
| 168 | + if (alt.includes('avatar') || alt.includes('profile') || alt.includes('logo') || alt.includes('icon')) continue; |
| 169 | + if (cls.includes('avatar') || cls.includes('profile') || cls.includes('icon')) continue; |
| 170 | + if (width < 128 && height < 128) continue; |
| 171 | + if (seen.has(src)) continue; |
| 172 | +
|
| 173 | + seen.add(src); |
| 174 | + urls.push(src); |
| 175 | + } |
| 176 | + return urls; |
| 177 | + })() |
| 178 | + `); |
| 179 | +} |
| 180 | + |
| 181 | +/** |
| 182 | + * Wait for new images to appear after sending a prompt. |
| 183 | + */ |
| 184 | +export async function waitForChatGPTImages(page, beforeUrls, timeoutSeconds) { |
| 185 | + const beforeSet = new Set(beforeUrls); |
| 186 | + const pollIntervalSeconds = 3; |
| 187 | + const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds)); |
| 188 | + let lastUrls = []; |
| 189 | + let stableCount = 0; |
| 190 | + |
| 191 | + for (let i = 0; i < maxPolls; i++) { |
| 192 | + await page.wait(i === 0 ? 3 : pollIntervalSeconds); |
| 193 | + |
| 194 | + // Check if still generating |
| 195 | + const generating = await isGenerating(page); |
| 196 | + if (generating) continue; |
| 197 | + |
| 198 | + const urls = (await getChatGPTVisibleImageUrls(page)).filter(url => !beforeSet.has(url)); |
| 199 | + if (urls.length === 0) continue; |
| 200 | + |
| 201 | + const key = urls.join('\n'); |
| 202 | + const prevKey = lastUrls.join('\n'); |
| 203 | + if (key === prevKey) { |
| 204 | + stableCount += 1; |
| 205 | + } else { |
| 206 | + lastUrls = urls; |
| 207 | + stableCount = 1; |
| 208 | + } |
| 209 | + |
| 210 | + if (stableCount >= 2 || i === maxPolls - 1) { |
| 211 | + return lastUrls; |
| 212 | + } |
| 213 | + } |
| 214 | + return lastUrls; |
| 215 | +} |
| 216 | + |
| 217 | +/** |
| 218 | + * Export images by URL: fetch from ChatGPT backend API and convert to base64 data URLs. |
| 219 | + */ |
| 220 | +export async function getChatGPTImageAssets(page, urls) { |
| 221 | + const urlsJson = JSON.stringify(urls); |
| 222 | + return await page.evaluate(` |
| 223 | + (async (targetUrls) => { |
| 224 | + const blobToDataUrl = (blob) => new Promise((resolve, reject) => { |
| 225 | + const reader = new FileReader(); |
| 226 | + reader.onloadend = () => resolve(String(reader.result || '')); |
| 227 | + reader.onerror = () => reject(new Error('Failed to read blob')); |
| 228 | + reader.readAsDataURL(blob); |
| 229 | + }); |
| 230 | +
|
| 231 | + const inferMime = (value, fallbackUrl) => { |
| 232 | + if (value) return value; |
| 233 | + const lower = String(fallbackUrl || '').toLowerCase(); |
| 234 | + if (lower.includes('.png')) return 'image/png'; |
| 235 | + if (lower.includes('.webp')) return 'image/webp'; |
| 236 | + if (lower.includes('.gif')) return 'image/gif'; |
| 237 | + return 'image/jpeg'; |
| 238 | + }; |
| 239 | +
|
| 240 | + const results = []; |
| 241 | +
|
| 242 | + for (const targetUrl of targetUrls) { |
| 243 | + let dataUrl = ''; |
| 244 | + let mimeType = 'image/jpeg'; |
| 245 | + let width = 0; |
| 246 | + let height = 0; |
| 247 | +
|
| 248 | + // Try to find the img element for size info |
| 249 | + const img = Array.from(document.querySelectorAll('img')).find(el => |
| 250 | + (el.currentSrc || el.src || '') === targetUrl |
| 251 | + ); |
| 252 | + if (img) { |
| 253 | + width = img.naturalWidth || img.width || 0; |
| 254 | + height = img.naturalHeight || img.height || 0; |
| 255 | + } |
| 256 | +
|
| 257 | + try { |
| 258 | + if (String(targetUrl).startsWith('data:')) { |
| 259 | + dataUrl = String(targetUrl); |
| 260 | + mimeType = (String(targetUrl).match(/^data:([^;]+);/i) || [])[1] || 'image/png'; |
| 261 | + } else { |
| 262 | + // Try to fetch via CORS from the page's origin |
| 263 | + const res = await fetch(targetUrl, { credentials: 'include' }); |
| 264 | + if (res.ok) { |
| 265 | + const blob = await res.blob(); |
| 266 | + mimeType = inferMime(blob.type, targetUrl); |
| 267 | + dataUrl = await blobToDataUrl(blob); |
| 268 | + } |
| 269 | + } |
| 270 | + } catch (e) { |
| 271 | + // If fetch fails (CORS), try canvas approach via img element |
| 272 | + } |
| 273 | +
|
| 274 | + // Fallback: draw img to canvas |
| 275 | + if (!dataUrl && img && img instanceof HTMLImageElement) { |
| 276 | + try { |
| 277 | + const canvas = document.createElement('canvas'); |
| 278 | + canvas.width = img.naturalWidth || img.width || 512; |
| 279 | + canvas.height = img.naturalHeight || img.height || 512; |
| 280 | + const ctx = canvas.getContext('2d'); |
| 281 | + if (ctx) { |
| 282 | + ctx.drawImage(img, 0, 0); |
| 283 | + dataUrl = canvas.toDataURL('image/png'); |
| 284 | + mimeType = 'image/png'; |
| 285 | + } |
| 286 | + } catch (e) { } |
| 287 | + } |
| 288 | +
|
| 289 | + if (dataUrl) { |
| 290 | + results.push({ url: String(targetUrl), dataUrl, mimeType, width, height }); |
| 291 | + } |
| 292 | + } |
| 293 | +
|
| 294 | + return results; |
| 295 | + })(${urlsJson}) |
| 296 | + `, urls); |
| 297 | +} |
0 commit comments