Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion app/api/generate/scene-content/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
generateSceneContent,
buildVisionUserContent,
} from '@/lib/generation/generation-pipeline';
import type { Locale } from '@/lib/i18n/types';
import type { AgentInfo } from '@/lib/generation/generation-pipeline';
import type { SceneOutline, PdfImage, ImageMapping } from '@/lib/types/generation';
import { createLogger } from '@/lib/logger';
Expand Down Expand Up @@ -69,7 +70,7 @@ export async function POST(req: NextRequest) {
// Ensure outline has language from stageInfo (fallback for older outlines)
const outline: SceneOutline = {
...rawOutline,
language: rawOutline.language || (stageInfo?.language as 'zh-CN' | 'en-US') || 'zh-CN',
language: rawOutline.language || (stageInfo?.language as Locale) || 'zh-CN',
};

// ── Model resolution from request headers ──
Expand Down
19 changes: 14 additions & 5 deletions app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import { useTheme } from '@/lib/hooks/use-theme';
import { nanoid } from 'nanoid';
import { storePdfBlob } from '@/lib/utils/image-storage';
import type { UserRequirements } from '@/lib/types/generation';
import type { Locale } from '@/lib/i18n/types';
import { supportedLocales } from '@/lib/i18n/locales';
import { useSettingsStore } from '@/lib/store/settings';
import { useUserProfileStore, AVATAR_OPTIONS } from '@/lib/store/user-profile';
import {
Expand All @@ -58,7 +60,7 @@ const RECENT_OPEN_STORAGE_KEY = 'recentClassroomsOpen';
interface FormState {
pdfFile: File | null;
requirement: string;
language: 'zh-CN' | 'en-US';
language: Locale;
webSearch: boolean;
}

Expand Down Expand Up @@ -98,14 +100,21 @@ function HomePage() {
}
try {
const savedWebSearch = localStorage.getItem(WEB_SEARCH_STORAGE_KEY);
const savedLanguage = localStorage.getItem(LANGUAGE_STORAGE_KEY);
const savedLanguage = localStorage.getItem(LANGUAGE_STORAGE_KEY) as Locale | null;
const updates: Partial<FormState> = {};
if (savedWebSearch === 'true') updates.webSearch = true;
if (savedLanguage === 'zh-CN' || savedLanguage === 'en-US') {

const isSupported = (lang: string | null): lang is Locale =>
!!lang && supportedLocales.some((l) => l.code === lang);

if (isSupported(savedLanguage)) {
updates.language = savedLanguage;
} else {
const detected = navigator.language?.startsWith('zh') ? 'zh-CN' : 'en-US';
updates.language = detected;
const browserLang = navigator.language;
const matched =
supportedLocales.find((l) => browserLang.startsWith(l.code.split('-')[0]))?.code ||
(browserLang.startsWith('zh') ? 'zh-CN' : 'en-US');
updates.language = matched as Locale;
}
if (Object.keys(updates).length > 0) {
setForm((prev) => ({ ...prev, ...updates }));
Expand Down
24 changes: 17 additions & 7 deletions components/generation/generation-toolbar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@ import type { ProviderId } from '@/lib/ai/providers';
import type { SettingsSection } from '@/lib/types/settings';
import { MediaPopover } from '@/components/generation/media-popover';

import type { Locale } from '@/lib/i18n/types';
import { supportedLocales } from '@/lib/i18n/locales';

// ─── Constants ───────────────────────────────────────────────
const MAX_PDF_SIZE_MB = 50;
const MAX_PDF_SIZE_BYTES = MAX_PDF_SIZE_MB * 1024 * 1024;

// ─── Types ───────────────────────────────────────────────────
export interface GenerationToolbarProps {
language: 'zh-CN' | 'en-US';
onLanguageChange: (lang: 'zh-CN' | 'en-US') => void;
language: Locale;
onLanguageChange: (lang: Locale) => void;
webSearch: boolean;
onWebSearchChange: (v: boolean) => void;
onSettingsOpen: (section?: SettingsSection) => void;
Expand Down Expand Up @@ -64,6 +67,16 @@ export function GenerationToolbar({
const fileInputRef = useRef<HTMLInputElement>(null);
const [isDragging, setIsDragging] = useState(false);

// Cycle language among supported locales
const cycleLanguage = () => {
const currentIndex = supportedLocales.findIndex((l) => l.code === language);
const nextIndex = (currentIndex + 1) % supportedLocales.length;
onLanguageChange(supportedLocales[nextIndex].code);
};

const currentLocaleInfo =
supportedLocales.find((l) => l.code === language) || supportedLocales[0];

// Check if the selected web search provider has a valid config (API key or server-configured)
const webSearchProvider = WEB_SEARCH_PROVIDERS[webSearchProviderId];
const webSearchConfig = webSearchProvidersConfig[webSearchProviderId];
Expand Down Expand Up @@ -360,12 +373,9 @@ export function GenerationToolbar({
{/* ── Language pill ── */}
<Tooltip>
<TooltipTrigger asChild>
<button
onClick={() => onLanguageChange(language === 'zh-CN' ? 'en-US' : 'zh-CN')}
className={pillMuted}
>
<button onClick={cycleLanguage} className={pillMuted}>
<Globe className="size-3.5" />
<span>{language === 'zh-CN' ? '中文' : 'EN'}</span>
<span>{currentLocaleInfo.shortLabel}</span>
</button>
</TooltipTrigger>
<TooltipContent>{t('toolbar.languageHint')}</TooltipContent>
Expand Down
134 changes: 4 additions & 130 deletions components/generation/media-popover.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use client';

import { useState, useCallback, useMemo, useEffect, Fragment } from 'react';

Check warning on line 3 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'useEffect' is defined but never used. Allowed unused vars must match /^_/u

Check warning on line 3 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'useCallback' is defined but never used. Allowed unused vars must match /^_/u
import type { LucideIcon } from 'lucide-react';
import {
Image as ImageIcon,
Expand All @@ -9,10 +9,8 @@
Mic,
SlidersHorizontal,
ChevronRight,
Play,
Loader2,
} from 'lucide-react';
import { toast } from 'sonner';

Check warning on line 13 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'toast' is defined but never used. Allowed unused vars must match /^_/u
import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
import {
Select,
Expand All @@ -24,18 +22,17 @@
SelectTrigger,
SelectValue,
} from '@/components/ui/select';
import { Slider } from '@/components/ui/slider';
import { Switch } from '@/components/ui/switch';
import { cn } from '@/lib/utils';
import { useI18n } from '@/lib/hooks/use-i18n';
import { useSettingsStore } from '@/lib/store/settings';
import { useTTSPreview } from '@/lib/audio/use-tts-preview';
import { IMAGE_PROVIDERS } from '@/lib/media/image-providers';
import { VIDEO_PROVIDERS } from '@/lib/media/video-providers';
import { TTS_PROVIDERS, getTTSVoices } from '@/lib/audio/constants';
import { TTS_PROVIDERS } from '@/lib/audio/constants';

Check warning on line 32 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'TTS_PROVIDERS' is defined but never used. Allowed unused vars must match /^_/u
import { ASR_PROVIDERS, getASRSupportedLanguages } from '@/lib/audio/constants';
import type { ImageProviderId, VideoProviderId } from '@/lib/media/types';
import type { TTSProviderId, ASRProviderId } from '@/lib/audio/types';
import type { ASRProviderId } from '@/lib/audio/types';
import type { SettingsSection } from '@/lib/types/settings';

interface MediaPopoverProps {
Expand All @@ -59,7 +56,7 @@

type TabId = 'image' | 'video' | 'tts' | 'asr';

const LANG_LABELS: Record<string, string> = {

Check warning on line 59 in components/generation/media-popover.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

'LANG_LABELS' is assigned a value but never used. Allowed unused vars must match /^_/u
zh: '中文',
en: 'English',
ja: '日本語',
Expand All @@ -81,35 +78,11 @@
{ id: 'asr', icon: Mic, label: 'ASR' },
];

/** Localized TTS provider name (mirrors audio-settings.tsx) */
function getTTSProviderName(providerId: TTSProviderId, t: (key: string) => string): string {
const names: Record<TTSProviderId, string> = {
'openai-tts': t('settings.providerOpenAITTS'),
'azure-tts': t('settings.providerAzureTTS'),
'glm-tts': t('settings.providerGLMTTS'),
'qwen-tts': t('settings.providerQwenTTS'),
'doubao-tts': t('settings.providerDoubaoTTS'),
'elevenlabs-tts': t('settings.providerElevenLabsTTS'),
'minimax-tts': t('settings.providerMiniMaxTTS'),
'browser-native-tts': t('settings.providerBrowserNativeTTS'),
};
return names[providerId] || providerId;
}

/** Extract the English name from voice name format "ChineseName (English)" */
function getVoiceDisplayName(name: string, lang: string): string {
if (lang === 'en-US') {
const match = name.match(/\(([^)]+)\)/);
return match ? match[1] : name;
}
return name;
}

export function MediaPopover({ onSettingsOpen }: MediaPopoverProps) {
const { t, locale } = useI18n();
const { t } = useI18n();
const [open, setOpen] = useState(false);
const [activeTab, setActiveTab] = useState<TabId>('image');
const { previewing, startPreview, stopPreview } = useTTSPreview();
const { stopPreview } = useTTSPreview();

// ─── Store ───
const imageGenerationEnabled = useSettingsStore((s) => s.imageGenerationEnabled);
Expand All @@ -133,14 +106,6 @@
const setVideoProvider = useSettingsStore((s) => s.setVideoProvider);
const setVideoModelId = useSettingsStore((s) => s.setVideoModelId);

const ttsProviderId = useSettingsStore((s) => s.ttsProviderId);
const ttsVoice = useSettingsStore((s) => s.ttsVoice);
const ttsSpeed = useSettingsStore((s) => s.ttsSpeed);
const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
const setTTSProvider = useSettingsStore((s) => s.setTTSProvider);
const setTTSVoice = useSettingsStore((s) => s.setTTSVoice);
const setTTSSpeed = useSettingsStore((s) => s.setTTSSpeed);

const asrProviderId = useSettingsStore((s) => s.asrProviderId);
const asrLanguage = useSettingsStore((s) => s.asrLanguage);
const asrProvidersConfig = useSettingsStore((s) => s.asrProvidersConfig);
Expand All @@ -167,18 +132,6 @@
needsKey: boolean,
) => !needsKey || !!configs[id]?.apiKey || !!configs[id]?.isServerConfigured;

const ttsSpeedRange = TTS_PROVIDERS[ttsProviderId]?.speedRange;

// ─── Dynamic browser voices ───
const [browserVoices, setBrowserVoices] = useState<SpeechSynthesisVoice[]>([]);
useEffect(() => {
if (typeof window === 'undefined' || !window.speechSynthesis) return;
const load = () => setBrowserVoices(window.speechSynthesis.getVoices());
load();
window.speechSynthesis.addEventListener('voiceschanged', load);
return () => window.speechSynthesis.removeEventListener('voiceschanged', load);
}, []);

// ─── Grouped select data (only available providers) ───
const imageGroups = useMemo(
() =>
Expand Down Expand Up @@ -214,85 +167,6 @@
[videoProvidersConfig],
);

// TTS: grouped by provider, voices as items (matching Image/Video pattern)
// Browser-native voices are split into sub-groups by language.
const ttsGroups = useMemo(() => {
const groups: SelectGroupData[] = [];

for (const p of Object.values(TTS_PROVIDERS)) {
if (p.requiresApiKey && !cfgOk(ttsProvidersConfig, p.id, p.requiresApiKey)) continue;

const providerName = getTTSProviderName(p.id, t);

// For browser-native-tts, split voices by language
if (p.id === 'browser-native-tts' && browserVoices.length > 0) {
const byLang = new Map<string, SpeechSynthesisVoice[]>();
for (const v of browserVoices) {
const langKey = v.lang.split('-')[0]; // "zh-CN" → "zh"
if (!byLang.has(langKey)) byLang.set(langKey, []);
byLang.get(langKey)!.push(v);
}
for (const [langKey, voices] of byLang) {
const langLabel = LANG_LABELS[langKey] || langKey;
groups.push({
groupId: p.id,
groupName: `${providerName} · ${langLabel}`,
groupIcon: p.icon,
available: true,
items: voices.map((v) => ({ id: v.voiceURI, name: v.name })),
});
}
continue;
}

groups.push({
groupId: p.id,
groupName: providerName,
groupIcon: p.icon,
available: true,
items: getTTSVoices(p.id).map((v) => ({
id: v.id,
name: getVoiceDisplayName(v.name, locale),
})),
});
}

return groups;
}, [ttsProvidersConfig, locale, browserVoices, t]);

// TTS preview
const handlePreview = useCallback(async () => {
if (previewing) {
stopPreview();
return;
}
try {
const providerConfig = ttsProvidersConfig[ttsProviderId];
await startPreview({
text: t('settings.ttsTestTextDefault'),
providerId: ttsProviderId,
modelId: providerConfig?.modelId,
voice: ttsVoice,
speed: ttsSpeed,
apiKey: providerConfig?.apiKey,
baseUrl: providerConfig?.baseUrl,
});
} catch (error) {
const message =
error instanceof Error && error.message ? error.message : t('settings.ttsTestFailed');
toast.error(message);
}
}, [
previewing,
startPreview,
stopPreview,
t,
ttsProviderId,
ttsProvidersConfig,
ttsSpeed,
ttsVoice,
]);

// ASR: only available providers
const asrGroups = useMemo(
() =>
Expand Down
3 changes: 3 additions & 0 deletions components/roundtable/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@
setIsInputOpen(false);
};

const handleToggleInput = () => {

Check warning on line 369 in components/roundtable/index.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

The 'handleToggleInput' function makes the dependencies of useEffect Hook (at line 453) change on every render. To fix this, wrap the definition of 'handleToggleInput' in its own useCallback() Hook
if (isSendCooldown) return;
if (!isInputOpen) {
onInputActivate?.();
Expand All @@ -379,7 +379,7 @@
}
};

const handleToggleVoice = () => {

Check warning on line 382 in components/roundtable/index.tsx

View workflow job for this annotation

GitHub Actions / Lint, Typecheck & Unit Tests

The 'handleToggleVoice' function makes the dependencies of useEffect Hook (at line 453) change on every render. To fix this, wrap the definition of 'handleToggleVoice' in its own useCallback() Hook
if (isVoiceOpen) {
if (isRecording) {
stopRecording();
Expand Down Expand Up @@ -462,6 +462,9 @@
isVoiceOpen,
isRecording,
isProcessing,
cancelRecording,
handleToggleInput,
handleToggleVoice,
]);

const isPresentationInteractionActive = isInputOpen || isVoiceOpen || isRecording || isProcessing;
Expand Down
12 changes: 12 additions & 0 deletions lib/audio/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,18 @@ export const TTS_PROVIDERS: Record<TTSProviderId, TTSProviderConfig> = {
gender: 'female',
},
{ id: 'en-US-GuyNeural', name: 'Guy', language: 'en-US', gender: 'male' },
{
id: 'de-DE-KatjaNeural',
name: 'Katja (女)',
language: 'de-DE',
gender: 'female',
},
{
id: 'de-DE-ConradNeural',
name: 'Conrad (男)',
language: 'de-DE',
gender: 'male',
},
],
supportedFormats: ['mp3', 'wav', 'ogg'],
speedRange: { min: 0.5, max: 2.0, default: 1.0 },
Expand Down
9 changes: 7 additions & 2 deletions lib/audio/tts-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,16 @@ async function generateAzureTTS(
): Promise<TTSGenerationResult> {
const baseUrl = config.baseUrl || TTS_PROVIDERS['azure-tts'].defaultBaseUrl;

// Extract language from voice ID (e.g., "zh-CN-XiaoxiaoNeural" -> "zh-CN")
// Fallback to "zh-CN" if no match
const langMatch = config.voice.match(/^[a-z]{2}-[A-Z]{2}/);
const lang = langMatch ? langMatch[0] : 'zh-CN';

// Build SSML
const rate = config.speed ? `${((config.speed - 1) * 100).toFixed(0)}%` : '0%';
const ssml = `
<speak version='1.0' xml:lang='zh-CN'>
<voice xml:lang='zh-CN' name='${config.voice}'>
<speak version='1.0' xml:lang='${lang}'>
<voice xml:lang='${lang}' name='${config.voice}'>
<prosody rate='${rate}'>${escapeXml(text)}</prosody>
</voice>
</speak>
Expand Down
21 changes: 17 additions & 4 deletions lib/generation/outline-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ export async function generateSceneOutlinesFromRequirements(
},
): Promise<GenerationResult<SceneOutline[]>> {
// Build available images description for the prompt
let availableImagesText =
requirements.language === 'zh-CN' ? '无可用图片' : 'No images available';
let availableImagesText = 'No images available';
if (requirements.language === 'zh-CN') availableImagesText = '无可用图片';
if (requirements.language === 'ja-JP') availableImagesText = '利用可能な画像はありません';
if (requirements.language === 'de-DE') availableImagesText = 'Keine Bilder verfügbar';
let visionImages: Array<{ id: string; src: string }> | undefined;

if (pdfImages && pdfImages.length > 0) {
Expand Down Expand Up @@ -103,12 +105,23 @@ export async function generateSceneOutlinesFromRequirements(
? pdfText.substring(0, MAX_PDF_CONTENT_CHARS)
: requirements.language === 'zh-CN'
? '无'
: 'None',
: requirements.language === 'ja-JP'
? 'なし'
: requirements.language === 'de-DE'
? 'Keine'
: 'None',
availableImages: availableImagesText,
userProfile: userProfileText,
mediaGenerationPolicy,
researchContext:
options?.researchContext || (requirements.language === 'zh-CN' ? '无' : 'None'),
options?.researchContext ||
(requirements.language === 'zh-CN'
? '无'
: requirements.language === 'ja-JP'
? 'なし'
: requirements.language === 'de-DE'
? 'Keine'
: 'None'),
// Server-side generation populates this via options; client-side populates via formatTeacherPersonaForPrompt
teacherContext: options?.teacherContext || '',
});
Expand Down
Loading
Loading