Skip to content

Commit c2e41e6

Browse files
committed
el: Convert input language
1 parent 02f1cde commit c2e41e6

6 files changed

Lines changed: 97 additions & 5 deletions

File tree

.github/copilot-instructions.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
## Code Minimalism
1212
- Avoid defensive code unless there is concrete evidence it is necessary.
1313
- Avoid redundant logic and repeated calls; keep only the minimal behavior required for correctness.
14+
- Do not add tests unless explicitly requested by the user.

core/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,6 @@ derive_more = { workspace = true }
1111

1212
serde = { workspace = true }
1313
# For function calling parameters.
14-
serde_json = { workspace = true }
14+
serde_json = { workspace = true }
15+
isolang = "2.4.0"
16+
oxilangtag = "0.1.5"

core/src/language.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
use std::fmt;
2+
3+
use isolang::Language;
4+
use oxilangtag::LanguageTag;
5+
6+
#[derive(Debug, Clone, PartialEq, Eq)]
7+
pub enum LanguageCodeError {
8+
InvalidBcp47Tag { tag: String, message: String },
9+
UnsupportedLanguage { language: String },
10+
}
11+
12+
impl fmt::Display for LanguageCodeError {
13+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
14+
match self {
15+
LanguageCodeError::InvalidBcp47Tag { tag, message } => {
16+
write!(f, "Invalid BCP 47 tag '{tag}': {message}")
17+
}
18+
LanguageCodeError::UnsupportedLanguage { language } => {
19+
write!(f, "Unsupported language subtag '{language}'")
20+
}
21+
}
22+
}
23+
}
24+
25+
impl std::error::Error for LanguageCodeError {}
26+
27+
/// Converts a BCP 47 language tag into its ISO 639-3 language code.
28+
///
29+
/// The conversion uses the primary language subtag only and ignores script, region, variant,
30+
/// and extension subtags.
31+
pub fn bcp47_to_iso639_3(tag: &str) -> Result<&'static str, LanguageCodeError> {
32+
let parsed = LanguageTag::parse(tag).map_err(|error| LanguageCodeError::InvalidBcp47Tag {
33+
tag: tag.to_string(),
34+
message: error.to_string(),
35+
})?;
36+
37+
let primary_language = parsed.primary_language();
38+
let language = match primary_language.len() {
39+
2 => Language::from_639_1(primary_language),
40+
3 => Language::from_639_3(primary_language),
41+
_ => None,
42+
};
43+
44+
language
45+
.map(|x| x.to_639_3())
46+
.ok_or_else(|| LanguageCodeError::UnsupportedLanguage {
47+
language: primary_language.to_string(),
48+
})
49+
}
50+
51+
#[cfg(test)]
52+
mod tests {
53+
use super::*;
54+
55+
#[test]
56+
fn bcp47_to_iso639_3_for_primary_language_tags() {
57+
assert_eq!(bcp47_to_iso639_3("en").unwrap(), "eng");
58+
assert_eq!(bcp47_to_iso639_3("de").unwrap(), "deu");
59+
assert_eq!(bcp47_to_iso639_3("fr").unwrap(), "fra");
60+
}
61+
62+
#[test]
63+
fn bcp47_to_iso639_3_ignores_non_primary_subtags() {
64+
assert_eq!(bcp47_to_iso639_3("en-US").unwrap(), "eng");
65+
assert_eq!(bcp47_to_iso639_3("zh-Hant-TW").unwrap(), "zho");
66+
}
67+
68+
#[test]
69+
fn bcp47_to_iso639_3_rejects_malformed_tags() {
70+
let err = bcp47_to_iso639_3("en--US").unwrap_err();
71+
assert!(matches!(err, LanguageCodeError::InvalidBcp47Tag { .. }));
72+
}
73+
74+
#[test]
75+
fn bcp47_to_iso639_3_rejects_unsupported_primary_language() {
76+
let err = bcp47_to_iso639_3("qaa").unwrap_err();
77+
assert_eq!(
78+
err,
79+
LanguageCodeError::UnsupportedLanguage {
80+
language: "qaa".to_string(),
81+
}
82+
);
83+
}
84+
}

core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ pub mod billing_collector;
33
mod billing_context;
44
pub mod conversation;
55
mod duration;
6+
pub mod language;
67
mod protocol;
78
mod registry;
89
pub mod service;

examples/elevenlabs-transcribe.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use context_switch_core::{
1717
service::Service,
1818
};
1919

20-
const LANGUAGE: &str = "de";
20+
const LANGUAGE: &str = "de-DE";
2121

2222
#[tokio::main]
2323
async fn main() -> Result<()> {

services/elevenlabs/src/transcribe.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use url::Url;
2121
use context_switch_core::{
2222
AudioFormat, OutputPath, Service,
2323
conversation::{Conversation, ConversationInput, ConversationOutput, Input},
24+
language::bcp47_to_iso639_3,
2425
};
2526

2627
const DEFAULT_REALTIME_HOST: &str = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
@@ -38,7 +39,7 @@ pub struct Params {
3839
pub model: Option<String>,
3940
/// Optional websocket endpoint override.
4041
pub host: Option<String>,
41-
/// Optional language hint (ISO 639-1 or ISO 639-3).
42+
/// Optional language hint in BCP 47 format (for example `en-US`).
4243
pub language: Option<String>,
4344
/// Include detected language in timestamped output.
4445
/// When omitted, this integration defaults it to `false`.
@@ -262,8 +263,11 @@ fn build_endpoint(params: &Params, audio_encoding: AudioEncoding) -> Result<Url>
262263
q.append_pair("audio_format", audio_encoding.as_str());
263264
q.append_pair("commit_strategy", "vad");
264265

265-
if let Some(language) = &params.language {
266-
q.append_pair("language_code", language);
266+
if let Some(language) = params.language.as_deref() {
267+
let language_code = bcp47_to_iso639_3(language).map_err(|error| {
268+
anyhow!("Invalid ElevenLabs params.language '{language}': {error}")
269+
})?;
270+
q.append_pair("language_code", language_code);
267271
}
268272
if let Some(vad_silence_threshold_secs) = params.vad_silence_threshold_secs {
269273
q.append_pair(

0 commit comments

Comments
 (0)