Skip to content

Commit 4d1fa8a

Browse files
asimons81Tony Simonsjackwener
authored
feat(clis/chatgptweb): add ChatGPT web image generation command (#973)
* feat(clis/chatgptweb): add ChatGPT web image generation command Add `opencli chatgptweb image` command that generates images using ChatGPT web (GPT-4o image generation) and saves them locally. Features: - Navigates to chatgpt.com/new with full page reload to ensure clean state - Uses Playwright's page.type() for reliable text input in TipTap editor - Closes sidebar if open (covers the chat composer on some layouts) - Polls for response completion (handles thinking/throttling states) - Extracts generated images from DOM (backend-api/estuary/content URLs) - Downloads and saves as PNG/JPEG files to user-specified directory - Supports --op for output directory and --sd to skip download Files: - clis/chatgptweb/image.js: CLI command definition - clis/chatgptweb/utils.js: DOM helpers, send/wait/export functions Works cross-platform (Linux/macOS/Windows) via OpenCLI browser automation. * fix(chatgptweb): stabilize image generation flow * docs(chatgptweb): add browser adapter guide --------- Co-authored-by: Tony Simons <tony@tonysimons.dev> Co-authored-by: jackwener <jakevingoo@gmail.com>
1 parent aa47de7 commit 4d1fa8a

File tree

3 files changed

+443
-0
lines changed

3 files changed

+443
-0
lines changed

clis/chatgptweb/image.js

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import * as os from 'node:os';
2+
import * as path from 'node:path';
3+
import { cli, Strategy } from '@jackwener/opencli/registry';
4+
import { saveBase64ToFile } from '@jackwener/opencli/utils';
5+
import { getChatGPTVisibleImageUrls, sendChatGPTMessage, waitForChatGPTImages, getChatGPTImageAssets } from './utils.js';
6+
7+
const CHATGPT_DOMAIN = 'chatgpt.com';
8+
9+
function extFromMime(mime) {
10+
if (mime.includes('png')) return '.png';
11+
if (mime.includes('webp')) return '.webp';
12+
if (mime.includes('gif')) return '.gif';
13+
return '.jpg';
14+
}
15+
16+
function normalizeBooleanFlag(value) {
17+
if (typeof value === 'boolean') return value;
18+
const normalized = String(value ?? '').trim().toLowerCase();
19+
return normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on';
20+
}
21+
22+
function displayPath(filePath) {
23+
const home = os.homedir();
24+
return filePath.startsWith(home) ? `~${filePath.slice(home.length)}` : filePath;
25+
}
26+
27+
async function currentChatGPTLink(page) {
28+
const url = await page.evaluate('window.location.href').catch(() => '');
29+
return typeof url === 'string' && url ? url : 'https://chatgpt.com';
30+
}
31+
32+
export const imageCommand = cli({
33+
site: 'chatgptweb',
34+
name: 'image',
35+
description: 'Generate images with ChatGPT web and save them locally',
36+
domain: CHATGPT_DOMAIN,
37+
strategy: Strategy.COOKIE,
38+
browser: true,
39+
navigateBefore: false,
40+
defaultFormat: 'plain',
41+
timeoutSeconds: 240,
42+
args: [
43+
{ name: 'prompt', positional: true, required: true, help: 'Image prompt to send to ChatGPT' },
44+
{ name: 'op', default: path.join(os.homedir(), 'Pictures', 'chatgpt'), help: 'Output directory' },
45+
{ name: 'sd', type: 'boolean', default: false, help: 'Skip download shorthand; only show ChatGPT link' },
46+
],
47+
columns: ['status', 'file', 'link'],
48+
func: async (page, kwargs) => {
49+
const prompt = kwargs.prompt;
50+
const outputDir = kwargs.op || path.join(os.homedir(), 'Pictures', 'chatgpt');
51+
const skipDownloadRaw = kwargs.sd;
52+
const skipDownload = skipDownloadRaw === '' || skipDownloadRaw === true || normalizeBooleanFlag(skipDownloadRaw);
53+
const timeout = 120;
54+
55+
// Navigate to chatgpt.com/new with full reload to clear React sidebar state
56+
await page.goto(`https://${CHATGPT_DOMAIN}/new`, { settleMs: 2000 });
57+
58+
const beforeUrls = await getChatGPTVisibleImageUrls(page);
59+
60+
// Send the image generation prompt - must be explicit
61+
const sent = await sendChatGPTMessage(page, `Generate an image of: ${prompt}`);
62+
if (!sent) {
63+
return [{ status: '⚠️ send-failed', file: '📁 -', link: `🔗 ${await currentChatGPTLink(page)}` }];
64+
}
65+
66+
// Wait for response and images
67+
const urls = await waitForChatGPTImages(page, beforeUrls, timeout);
68+
const link = await currentChatGPTLink(page);
69+
70+
if (!urls.length) {
71+
return [{ status: '⚠️ no-images', file: '📁 -', link: `🔗 ${link}` }];
72+
}
73+
74+
if (skipDownload) {
75+
return [{ status: '🎨 generated', file: '📁 -', link: `🔗 ${link}` }];
76+
}
77+
78+
// Export and save images
79+
const assets = await getChatGPTImageAssets(page, urls);
80+
if (!assets.length) {
81+
return [{ status: '⚠️ export-failed', file: '📁 -', link: `🔗 ${link}` }];
82+
}
83+
84+
const stamp = Date.now();
85+
const results = [];
86+
for (let index = 0; index < assets.length; index += 1) {
87+
const asset = assets[index];
88+
const base64 = asset.dataUrl.replace(/^data:[^;]+;base64,/, '');
89+
const suffix = assets.length > 1 ? `_${index + 1}` : '';
90+
const ext = extFromMime(asset.mimeType);
91+
const filePath = path.join(outputDir, `chatgpt_${stamp}${suffix}${ext}`);
92+
await saveBase64ToFile(base64, filePath);
93+
results.push({ status: '✅ saved', file: `📁 ${displayPath(filePath)}`, link: `🔗 ${link}` });
94+
}
95+
return results;
96+
},
97+
});

clis/chatgptweb/utils.js

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
/**
2+
* ChatGPT web browser automation helpers for image generation.
3+
* Cross-platform: works on Linux/macOS/Windows via OpenCLI's CDP browser automation.
4+
*/
5+
6+
export const CHATGPT_DOMAIN = 'chatgpt.com';
7+
export const CHATGPT_URL = 'https://chatgpt.com';
8+
9+
// Selectors
10+
const COMPOSER_SELECTOR = '[aria-label="Chat with ChatGPT"]';
11+
const SEND_BTN_SELECTOR = 'button[aria-label="Send prompt"]';
12+
13+
function buildComposerLocatorScript() {
14+
const selectorsJson = JSON.stringify([COMPOSER_SELECTOR]);
15+
const markerAttr = 'data-opencli-chatgpt-composer';
16+
return `
17+
const isVisible = (el) => {
18+
if (!(el instanceof HTMLElement)) return false;
19+
const style = window.getComputedStyle(el);
20+
if (style.display === 'none' || style.visibility === 'hidden') return false;
21+
const rect = el.getBoundingClientRect();
22+
return rect.width > 0 && rect.height > 0;
23+
};
24+
25+
const markerAttr = ${JSON.stringify(markerAttr)};
26+
const clearMarkers = (active) => {
27+
document.querySelectorAll('[' + markerAttr + ']').forEach(node => {
28+
if (node !== active) node.removeAttribute(markerAttr);
29+
});
30+
};
31+
32+
const findComposer = () => {
33+
const marked = document.querySelector('[' + markerAttr + '="1"]');
34+
if (marked instanceof HTMLElement && isVisible(marked)) return marked;
35+
36+
for (const selector of ${JSON.stringify([COMPOSER_SELECTOR])}) {
37+
const node = Array.from(document.querySelectorAll(selector)).find(c => c instanceof HTMLElement && isVisible(c));
38+
if (node instanceof HTMLElement) {
39+
node.setAttribute(markerAttr, '1');
40+
return node;
41+
}
42+
}
43+
return null;
44+
};
45+
46+
findComposer.toString = () => 'findComposer';
47+
return { findComposer, markerAttr };
48+
`;
49+
}
50+
51+
/**
52+
* Send a message to the ChatGPT composer and submit it.
53+
* Returns true if the message was sent successfully.
54+
*/
55+
export async function sendChatGPTMessage(page, text) {
56+
// Close sidebar if open (it can cover the chat composer)
57+
await page.evaluate(`
58+
(() => {
59+
const closeBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Close sidebar');
60+
if (closeBtn) closeBtn.click();
61+
})()
62+
`);
63+
await page.wait(0.5);
64+
65+
// Wait for composer to be ready and use Playwright's type()
66+
await page.wait(1.5);
67+
68+
const typeResult = await page.evaluate(`
69+
(() => {
70+
${buildComposerLocatorScript()}
71+
const composer = findComposer();
72+
if (!composer) return false;
73+
composer.focus();
74+
composer.textContent = '';
75+
return true;
76+
})()
77+
`);
78+
79+
if (!typeResult) return false;
80+
81+
// Use page.type() which is Playwright's native method
82+
try {
83+
if (page.nativeType) {
84+
await page.nativeType(text);
85+
} else {
86+
throw new Error('nativeType unavailable');
87+
}
88+
} catch (e) {
89+
// Fallback: use execCommand
90+
await page.evaluate(`
91+
(() => {
92+
const composer = document.querySelector('[aria-label="Chat with ChatGPT"]');
93+
if (!composer) return;
94+
composer.focus();
95+
document.execCommand('insertText', false, ${JSON.stringify(text)});
96+
})()
97+
`);
98+
}
99+
100+
// Wait for send button to appear (it only shows when there's text)
101+
await page.wait(1.5);
102+
103+
// Click send button
104+
const sent = await page.evaluate(`
105+
(() => {
106+
const btns = Array.from(document.querySelectorAll('button'));
107+
const sendBtn = btns.find(b => b.getAttribute('aria-label') === 'Send prompt');
108+
return { sendBtnFound: !!sendBtn };
109+
})()
110+
`);
111+
112+
if (!sent || !sent.sendBtnFound) {
113+
return false;
114+
}
115+
116+
await page.evaluate(`
117+
(() => {
118+
const sendBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Send prompt');
119+
if (sendBtn) sendBtn.click();
120+
})()
121+
`);
122+
return true;
123+
}
124+
125+
/**
126+
* Check if ChatGPT is still generating a response.
127+
*/
128+
export async function isGenerating(page) {
129+
return await page.evaluate(`
130+
(() => {
131+
return Array.from(document.querySelectorAll('button')).some(b => {
132+
const label = b.getAttribute('aria-label') || '';
133+
return label === 'Stop generating' || label.includes('Thinking');
134+
});
135+
})()
136+
`);
137+
}
138+
139+
/**
140+
* Get visible image URLs from the ChatGPT page (excluding profile/avatar images).
141+
*/
142+
export async function getChatGPTVisibleImageUrls(page) {
143+
return await page.evaluate(`
144+
(() => {
145+
const isVisible = (el) => {
146+
if (!(el instanceof HTMLElement)) return false;
147+
const style = window.getComputedStyle(el);
148+
if (style.display === 'none' || style.visibility === 'hidden') return false;
149+
const rect = el.getBoundingClientRect();
150+
return rect.width > 32 && rect.height > 32;
151+
};
152+
153+
const imgs = Array.from(document.querySelectorAll('img')).filter(img =>
154+
img instanceof HTMLImageElement && isVisible(img)
155+
);
156+
157+
const urls = [];
158+
const seen = new Set();
159+
160+
for (const img of imgs) {
161+
const src = img.currentSrc || img.src || '';
162+
const alt = (img.getAttribute('alt') || '').toLowerCase();
163+
const cls = (img.className || '').toLowerCase();
164+
const width = img.naturalWidth || img.width || 0;
165+
const height = img.naturalHeight || img.height || 0;
166+
167+
if (!src) continue;
168+
if (alt.includes('avatar') || alt.includes('profile') || alt.includes('logo') || alt.includes('icon')) continue;
169+
if (cls.includes('avatar') || cls.includes('profile') || cls.includes('icon')) continue;
170+
if (width < 128 && height < 128) continue;
171+
if (seen.has(src)) continue;
172+
173+
seen.add(src);
174+
urls.push(src);
175+
}
176+
return urls;
177+
})()
178+
`);
179+
}
180+
181+
/**
182+
* Wait for new images to appear after sending a prompt.
183+
*/
184+
export async function waitForChatGPTImages(page, beforeUrls, timeoutSeconds) {
185+
const beforeSet = new Set(beforeUrls);
186+
const pollIntervalSeconds = 3;
187+
const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds));
188+
let lastUrls = [];
189+
let stableCount = 0;
190+
191+
for (let i = 0; i < maxPolls; i++) {
192+
await page.wait(i === 0 ? 3 : pollIntervalSeconds);
193+
194+
// Check if still generating
195+
const generating = await isGenerating(page);
196+
if (generating) continue;
197+
198+
const urls = (await getChatGPTVisibleImageUrls(page)).filter(url => !beforeSet.has(url));
199+
if (urls.length === 0) continue;
200+
201+
const key = urls.join('\n');
202+
const prevKey = lastUrls.join('\n');
203+
if (key === prevKey) {
204+
stableCount += 1;
205+
} else {
206+
lastUrls = urls;
207+
stableCount = 1;
208+
}
209+
210+
if (stableCount >= 2 || i === maxPolls - 1) {
211+
return lastUrls;
212+
}
213+
}
214+
return lastUrls;
215+
}
216+
217+
/**
218+
* Export images by URL: fetch from ChatGPT backend API and convert to base64 data URLs.
219+
*/
220+
export async function getChatGPTImageAssets(page, urls) {
221+
const urlsJson = JSON.stringify(urls);
222+
return await page.evaluate(`
223+
(async (targetUrls) => {
224+
const blobToDataUrl = (blob) => new Promise((resolve, reject) => {
225+
const reader = new FileReader();
226+
reader.onloadend = () => resolve(String(reader.result || ''));
227+
reader.onerror = () => reject(new Error('Failed to read blob'));
228+
reader.readAsDataURL(blob);
229+
});
230+
231+
const inferMime = (value, fallbackUrl) => {
232+
if (value) return value;
233+
const lower = String(fallbackUrl || '').toLowerCase();
234+
if (lower.includes('.png')) return 'image/png';
235+
if (lower.includes('.webp')) return 'image/webp';
236+
if (lower.includes('.gif')) return 'image/gif';
237+
return 'image/jpeg';
238+
};
239+
240+
const results = [];
241+
242+
for (const targetUrl of targetUrls) {
243+
let dataUrl = '';
244+
let mimeType = 'image/jpeg';
245+
let width = 0;
246+
let height = 0;
247+
248+
// Try to find the img element for size info
249+
const img = Array.from(document.querySelectorAll('img')).find(el =>
250+
(el.currentSrc || el.src || '') === targetUrl
251+
);
252+
if (img) {
253+
width = img.naturalWidth || img.width || 0;
254+
height = img.naturalHeight || img.height || 0;
255+
}
256+
257+
try {
258+
if (String(targetUrl).startsWith('data:')) {
259+
dataUrl = String(targetUrl);
260+
mimeType = (String(targetUrl).match(/^data:([^;]+);/i) || [])[1] || 'image/png';
261+
} else {
262+
// Try to fetch via CORS from the page's origin
263+
const res = await fetch(targetUrl, { credentials: 'include' });
264+
if (res.ok) {
265+
const blob = await res.blob();
266+
mimeType = inferMime(blob.type, targetUrl);
267+
dataUrl = await blobToDataUrl(blob);
268+
}
269+
}
270+
} catch (e) {
271+
// If fetch fails (CORS), try canvas approach via img element
272+
}
273+
274+
// Fallback: draw img to canvas
275+
if (!dataUrl && img && img instanceof HTMLImageElement) {
276+
try {
277+
const canvas = document.createElement('canvas');
278+
canvas.width = img.naturalWidth || img.width || 512;
279+
canvas.height = img.naturalHeight || img.height || 512;
280+
const ctx = canvas.getContext('2d');
281+
if (ctx) {
282+
ctx.drawImage(img, 0, 0);
283+
dataUrl = canvas.toDataURL('image/png');
284+
mimeType = 'image/png';
285+
}
286+
} catch (e) { }
287+
}
288+
289+
if (dataUrl) {
290+
results.push({ url: String(targetUrl), dataUrl, mimeType, width, height });
291+
}
292+
}
293+
294+
return results;
295+
})(${urlsJson})
296+
`, urls);
297+
}

0 commit comments

Comments
 (0)