diff --git a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs index 2ea0f4271..6d16d02d5 100644 --- a/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ArpasingPlusPhonemizer.cs @@ -31,6 +31,15 @@ public ArpasingPlusPhonemizer() { "eu", "oe", "yw", "yx", "wx", "ox", "ex", "ea", "ia", "oa", "ua", "ean", "eam", "eang" }; this.consonants = "b,ch,d,dh,dr,dx,f,g,hh,jh,k,l,m,n,ng,p,q,r,s,sh,t,th,tr,v,w,y,z".Split(','); + this.diphthongTails = new Dictionary() { + { "ay", "y" }, + { "ey", "y" }, + { "oy", "y" }, + { "aw", "w" }, + { "ow", "w" }, + { "er", "r" }, + { "iy", "y" }, + }; } protected override string[] GetVowels() => vowels; @@ -72,28 +81,6 @@ public ArpasingPlusPhonemizer() { //{"er","ah"}, }; - private readonly Dictionary vvDiphthongExceptions = - new Dictionary() { - {"aw","ah"}, - {"ow","ao"}, - {"uw","uh"}, - {"ay","ah"}, - {"ey","eh"}, - {"oy","ao"}, - }; - - private readonly Dictionary vvExceptions = - new Dictionary() { - {"aw","w"}, - {"ow","w"}, - {"uw","w"}, - {"ay","y"}, - {"ey","y"}, - {"oy","y"}, - {"iy","y"}, - {"er","r"}, - }; - private readonly string[] ccvException = { "ng", "dh" }; private readonly string[] RomajiException = { "a", "e", "i", "o", "u" }; @@ -175,28 +162,8 @@ protected override string[] GetSymbols(Note note) { return finalProcessedPhonemes.ToArray(); } - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - // LOAD DICTIONARY FROM FOLDER - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - // LOAD DICTIONARY FROM SINGER FOLDER - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - g2ps.Add(new ArpabetPlusG2p()); - return new G2pFallbacks(g2ps.ToArray()); + protected override IG2p[] GetBaseG2ps() { + return new IG2p[] { new ArpabetPlusG2p() }; } public override void SetSinger(USinger singer) { @@ -279,37 +246,49 @@ protected override List ProcessSyllable(Syllable syllable) { // [V V] or [V C][C V]/[V] else if (syllable.IsVV) { if (!CanMakeAliasExtension(syllable)) { - basePhoneme = $"{prevV} {v}"; - if (!HasOto(basePhoneme, syllable.vowelTone) && !HasOto(ValidateAlias(basePhoneme), syllable.vowelTone) && vvExceptions.ContainsKey(prevV) && prevV != v) { - // VV IS NOT PRESENT, CHECKS VVEXCEPTIONS LOGIC - //var vc = $"{prevV}{vvExceptions[prevV]}"; - var vc = AliasFormat($"{vvExceptions[prevV]}", "vcEx", syllable.vowelTone, prevV); - phonemes.Add(vc); - basePhoneme = ValidateAlias(AliasFormat($"{vvExceptions[prevV]} {v}", "dynMid", syllable.vowelTone, "")); + if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV} {v}"; + } else if (HasOto($"{prevV}{v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV}{v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV}{v}"; + } + + // Diphthong Fallbacks + else if (diphthongSplits.ContainsKey(prevV) || diphthongTails.ContainsKey(prevV)) { + string cv = ""; + if (diphthongSplits.ContainsKey(prevV)) { + var splitOverride = diphthongSplits[prevV]; + var vc = AliasFormat(splitOverride[0].Replace("{v}", v), "vcEx", syllable.tone, prevV); + cv = AliasFormat(splitOverride[1].Replace("{v}", v), "dynMid", syllable.vowelTone, ""); + TryAddPhoneme(phonemes, syllable.tone, vc, ValidateAlias(vc)); + } + else { // Default YAML diphthong logic + var tail = diphthongTails[prevV]; + var vcSpace = AliasFormat($"{prevV} {tail}", "vcEx", syllable.tone, prevV); + var vcNoSpace = AliasFormat($"{prevV}{tail}", "vcEx", syllable.tone, prevV); + cv = AliasFormat($"{tail} {v}", "dynMid", syllable.vowelTone, ""); + TryAddPhoneme(phonemes, syllable.tone, vcSpace, ValidateAlias(vcSpace), vcNoSpace, ValidateAlias(vcNoSpace)); + } + + if (HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone)) { + basePhoneme = cv; + } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = ValidateAlias(AliasFormat($"- {v}", "dynMid", syllable.vowelTone, "")); + phonemes.Add(AliasFormat($"{prevV} -", "dynMid", syllable.tone, "")); + } } else { - { - if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV} {v}"; - } else if (HasOto($"{prevV}{v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV}{v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV}{v}"; - } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { - basePhoneme = v; - } else { - basePhoneme = AliasFormat($"- {v}", "dynMid", syllable.vowelTone, ""); - phonemes.Add(AliasFormat($"{prevV} -", "dynMid", syllable.vowelTone, "")); - } + if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = ValidateAlias(AliasFormat($"- {v}", "dynMid", syllable.vowelTone, "")); + phonemes.Add(AliasFormat($"{prevV} -", "dynMid", syllable.tone, "")); } } - // EXTEND AS [V] - } else if (HasOto($"{v}", syllable.vowelTone) && HasOto(ValidateAlias($"{v}"), syllable.vowelTone) || missingVphonemes.ContainsKey(prevV)) { - basePhoneme = v; - } else if (!HasOto(v, syllable.vowelTone) && !HasOto(ValidateAlias(v), syllable.vowelTone) && vvDiphthongExceptions.ContainsKey(prevV)) { - basePhoneme = $"{vvDiphthongExceptions[prevV]} {vvDiphthongExceptions[prevV]}"; - } else { - // PREVIOUS ALIAS WILL EXTEND as [V V] + } + else { basePhoneme = null; } - // [- CV/C V] or [- C][CV/C V] } else if (syllable.IsStartingCVWithOneConsonant) { var rcv = $"- {cc[0]} {v}"; @@ -510,6 +489,9 @@ protected override List ProcessSyllable(Syllable syllable) { cc1 = $"{cc[i]} {string.Join("", cc.Skip(i + 1))}"; lastC = i; } + if (liquid.Contains(cc[i + 1]) || semivowel.Contains(cc[i + 1])) { + glides(cc1); + } // CV } else if (CurrentWordCc.Length == 1 && PreviousWordCc.Length == 1) { basePhoneme = (AliasFormat($"{cc.Last()} {v}", "dynMid", syllable.vowelTone, "")); @@ -571,6 +553,9 @@ protected override List ProcessSyllable(Syllable syllable) { if (!phoneticHint && (HasOto($"{cc[i]} {string.Join("", cc.Skip(i + 1))}", syllable.tone))) { cc1 = $"{cc[i]} {string.Join("", cc.Skip(i + 1))}"; } + if (liquid.Contains(cc[i + 1]) || semivowel.Contains(cc[i + 1])) { + glides(cc1); + } // CV } else if (CurrentWordCc.Length == 1 && PreviousWordCc.Length == 1) { basePhoneme = (AliasFormat($"{cc.Last()} {v}", "dynMid", syllable.vowelTone, "")); @@ -862,7 +847,6 @@ protected override List ProcessEnding(Ending ending) { } private string AliasFormat(string alias, string type, int tone, string prevV, string t = "-") { var aliasFormats = new Dictionary { - // Define alias formats for different types { "dynStart", new string[] { "" } }, { "dynMid", new string[] { "" } }, { "dynMid_vv", new string[] { "" } }, @@ -883,12 +867,10 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st { "cc1_mix", new string[] { "", " -", "-", " R", "_", "- ", "-" } }, }; - // Check if the given type exists in the aliasFormats dictionary if (!aliasFormats.ContainsKey(type) && !type.Contains("dynamic")) { return alias; } - // Handle dynamic variations when type contains "dynamic" if (type.Contains("dynStart")) { string consonant = ""; string vowel = ""; @@ -900,10 +882,7 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st } else { consonant = alias; } - - // Handle the alias with space and without space var dynamicVariations = new List { - // Variations with space, dash, and underscore $"- {consonant}{vowel}", // "- CV" $"- {consonant} {vowel}", // "- C V" $"-{consonant} {vowel}", // "-C V" @@ -911,10 +890,12 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st $"-{consonant}_{vowel}", // "-C_V" $"- {consonant}_{vowel}", // "- C_V" }; - // Check each dynamically generated format + foreach (var variation in dynamicVariations) { - if (HasOto(variation, tone) || HasOto(ValidateAlias(variation), tone)) { + if (HasOto(variation, tone)) { return variation; + } else if (HasOto(ValidateAlias(variation), tone)) { + return ValidateAlias(variation); } } } @@ -922,7 +903,7 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st if (type.Contains("dynMid")) { string consonant = ""; string vowel = ""; - // If the alias contains a space, split it into consonant and vowel + if (alias.Contains(" ")) { var parts = alias.Split(' '); consonant = parts[0]; @@ -935,10 +916,12 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st $"{consonant} {vowel}", // "C V" $"{consonant}_{vowel}", // "C_V" }; - // Check each dynamically generated format + foreach (var variation1 in dynamicVariations1) { - if (HasOto(variation1, tone) || HasOto(ValidateAlias(variation1), tone)) { + if (HasOto(variation1, tone)) { return variation1; + } else if (HasOto(ValidateAlias(variation1), tone)) { + return ValidateAlias(variation1); } } } @@ -946,7 +929,7 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st if (type.Contains("dynEnd")) { string consonant = ""; string vowel = ""; - // If the alias contains a space, split it into consonant and vowel + if (alias.Contains(" ")) { var parts = alias.Split(' '); consonant = parts[1]; @@ -960,10 +943,12 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st $"{vowel}{consonant}-", // "VC-" $"{vowel} {consonant} -", // "V C -" }; - // Check each dynamically generated format + foreach (var variation1 in dynamicVariations1) { - if (HasOto(variation1, tone) || HasOto(ValidateAlias(variation1), tone)) { + if (HasOto(variation1, tone)) { return variation1; + } else if (HasOto(ValidateAlias(variation1), tone)) { + return ValidateAlias(variation1); } } } @@ -981,9 +966,11 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st } else { aliasFormat = $"{format}{alias}"; } - // Check if the formatted alias exists - if (HasOto(aliasFormat, tone) || HasOto(ValidateAlias(aliasFormat), tone)) { + + if (HasOto(aliasFormat, tone)) { return aliasFormat; + } else if (HasOto(ValidateAlias(aliasFormat), tone)) { + return ValidateAlias(aliasFormat); } } return alias; @@ -1699,7 +1686,6 @@ protected override string ValidateAlias(string alias) { } return base.ValidateAlias(alias); } - bool PhonemeIsPresent(string alias, string phoneme) { if (string.IsNullOrEmpty(alias) || string.IsNullOrEmpty(phoneme)) return false; @@ -1711,112 +1697,22 @@ bool PhonemeIsPresent(string alias, string phoneme) { return alias.EndsWith(phoneme); } - private bool PhonemeHasEndingSuffix(string alias, string phoneme) { - var escapedPhoneme = Regex.Escape(phoneme); - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b\s*-") || - Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b-")) { - return true; - } - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b R")) { - return true; - } - return false; - } - - protected override double GetTransitionBasicLengthMs(string alias = "") { - //I wish these were automated instead :') - double transitionMultiplier = 1.0; // Default multiplier - - var fricative_def = 2.3; - var aspirate_def = 1.3; - var semivowel_def = 1.2; - var liquid_def = 1.5; - var nasal_def = 1.5; - var stop_def = 1.8; - var tap_def = 0.5; - var affricate_def = 1.5; - - var allConsonants = fricative.Concat(aspirate) - .Concat(semivowel) - .Concat(liquid) - .Concat(nasal) - .Concat(stop) - .Concat(tap) - .Concat(affricate) - .Distinct(); // Ensure no duplicates - - foreach (var c in allConsonants) { - if (PhonemeHasEndingSuffix(alias, c)) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - foreach (var v in vowels) { - if (alias.EndsWith("-")) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - // consonant timings + // Endings has 50 ticks gap + protected override bool NoGap => true; + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); var sortedOverrides = PhonemeOverrides.OrderByDescending(kv => kv.Key.Length); foreach (var kvp in sortedOverrides) { - var overridePhoneme = kvp.Key; - var overrideValue = kvp.Value; - if (PhonemeIsPresent(alias, overridePhoneme)) { - return base.GetTransitionBasicLengthMs() * overrideValue; - } - } - - foreach (var c in fricative) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * fricative_def; - } - } - - foreach (var c in aspirate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * aspirate_def; - } - } - - foreach (var c in semivowel) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * semivowel_def; - } - } - - foreach (var c in liquid) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * liquid_def; - } - } - - foreach (var c in nasal) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * nasal_def; - } - } - - foreach (var c in stop) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * stop_def; - } - } - - foreach (var c in tap) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * tap_def; - } - } + var symbol = kvp.Key; + var value = kvp.Value; - foreach (var c in affricate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * affricate_def; + if (Regex.IsMatch(alias, $@"(? true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + var parts = alias.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); + bool isVcv = false; + + if (parts.Length == 2) { + var startingVowels = new[] { "a", "i", "u", "e", "o", "n", "N", "-" }; + var endingVowels = vowels; + + // First part must be a vowel (or a rest) + if (startingVowels.Contains(parts[0])) { + string cv = parts[1]; + + // Second part must end in a vowel (Romaji CV) OR be Japanese (Hiragana/Katakana) + bool isRomajiVcv = endingVowels.Contains(cv.Last().ToString()); + bool isJapaneseVcv = cv.Any(c => c > 0xFF); + + if (isRomajiVcv || isJapaneseVcv) { + isVcv = true; + } + } + } + + if (isVcv) { + return GetTransitionBasicLengthMsByConstant() * 1.0; + } + + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/EStoJAPhonemizer.cs b/OpenUtau.Plugin.Builtin/EStoJAPhonemizer.cs index 8fe5cca1d..2e837ed4f 100644 --- a/OpenUtau.Plugin.Builtin/EStoJAPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EStoJAPhonemizer.cs @@ -669,5 +669,39 @@ private string ToHiragana(string romaji) { hiragana = hiragana.Replace("ゔ", "ヴ"); return hiragana; } + + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + var parts = alias.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); + bool isVcv = false; + + if (parts.Length == 2) { + var startingVowels = new[] { "a", "i", "u", "e", "o", "n", "N", "-" }; + var endingVowels = vowels; + + // First part must be a vowel (or a rest) + if (startingVowels.Contains(parts[0])) { + string cv = parts[1]; + + // Second part must end in a vowel (Romaji CV) OR be Japanese (Hiragana/Katakana) + bool isRomajiVcv = endingVowels.Contains(cv.Last().ToString()); + bool isJapaneseVcv = cv.Any(c => c > 0xFF); + + if (isRomajiVcv || isJapaneseVcv) { + isVcv = true; + } + } + } + + if (isVcv) { + return GetTransitionBasicLengthMsByConstant() * 1.0; + } + + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs index 3c9f81aaa..e4dda5465 100644 --- a/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs @@ -173,38 +173,21 @@ public EnXSampaPhonemizer() { {"@u","u"}, {"3", "r"} }; - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - - // Load dictionary from plugin folder. - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - - // Load dictionary from singer folder. - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(new ArpabetG2p()); - return new G2pFallbacks(g2ps.ToArray()); + protected override IG2p[] GetBaseG2ps() { + return new IG2p[] { new ArpabetG2p() }; } - protected override string[] GetSymbols(Note note) { string[] original = base.GetSymbols(note); if (original == null) { return null; } List finalProcessedPhonemes = new List(); + + for (int i = 0; i < original.Length; i++) { + if (dictionaryReplacements.TryGetValue(original[i], out string replaced)) { + original[i] = replaced; + } + } // Splits diphthongs and affricates if not present in the bank string[] diphthongs = new[] { "aI", "eI", "OI", "aU", "oU", "VI", "VU", "@U", "ai", "ei", "Oi", "au", "ou", "Ou", "@u", }; @@ -227,10 +210,6 @@ private string ReplacePhoneme(string phoneme, int tone) { if (HasOto(phoneme, tone) || HasOto(ValidateAlias(phoneme), tone)) { return phoneme; } - // Otherwise, try to apply the dictionary replacement. - if (dictionaryReplacements.TryGetValue(phoneme, out var replaced)) { - return replaced; - } return phoneme; } @@ -307,38 +286,49 @@ protected override List ProcessSyllable(Syllable syllable) { } } else if (syllable.IsVV) { if (!CanMakeAliasExtension(syllable)) { - var vv = $"{prevV} {v}"; - basePhoneme = vv; - if (!HasOto(vv, syllable.vowelTone) && !HasOto(ValidateAlias(vv), syllable.vowelTone) && (vvExceptions.ContainsKey(prevV) && prevV != v || Delta5vvExceptions.ContainsKey(prevV) && prevV != v)) { - // VV splits to [V C][CV] or [V][V] - var delta5vc = $"{Delta5vvExceptions[prevV]}"; - bool CV = false; - if ((!HasOto(delta5vc, syllable.vowelTone) && !HasOto(ValidateAlias(delta5vc), syllable.vowelTone))) { - delta5vc = $"{prevV} {vvExceptions[prevV]}"; - CV = true; + if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV} {v}"; + } else if (HasOto($"{prevV}{v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV}{v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV}{v}"; + } + + // Diphthong Fallbacks + else if (diphthongSplits.ContainsKey(prevV) || diphthongTails.ContainsKey(prevV)) { + string cv = ""; + if (diphthongSplits.ContainsKey(prevV)) { + var splitOverride = diphthongSplits[prevV]; + var vc = splitOverride[0].Replace("{v}", v); + cv = splitOverride[1].Replace("{v}", v); + TryAddPhoneme(phonemes, syllable.tone, vc, ValidateAlias(vc)); + } + else { // Default YAML diphthong logic + var tail = diphthongTails[prevV]; + var vcSpace = $"{prevV} {tail}"; + var vcNoSpace = $"{prevV}{tail}"; + cv = $"{tail}{v}"; + TryAddPhoneme(phonemes, syllable.tone, vcSpace, ValidateAlias(vcSpace), vcNoSpace, ValidateAlias(vcNoSpace)); } - phonemes.Add(delta5vc); - // if delta5 vc is not available, turn v to cv - var cv = $"{vvExceptions[prevV]}{v}"; - basePhoneme = v; - if (CV && (HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone))) { + + if (HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone)) { basePhoneme = cv; + } else if (!HasOto(cv, syllable.vowelTone) || !HasOto(ValidateAlias(cv), syllable.vowelTone)) { + basePhoneme = $"{diphthongTails[prevV]} {v}"; + } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = ValidateAlias($"- {v}"); + phonemes.Add($"{prevV} -"); } } else { - // VV to V - if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV} {v}"; - } else if (HasOto($"{prevV}{v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV}{v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV}{v}"; - } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { basePhoneme = v; + } else { + basePhoneme = ValidateAlias($"- {v}"); + phonemes.Add($"{prevV} -"); } } - // EXTEND AS [V] - } else if (HasOto($"{v}", syllable.vowelTone) && HasOto(ValidateAlias($"{v}"), syllable.vowelTone)) { - basePhoneme = v; - } else { - // PREVIOUS ALIAS WILL EXTEND as [V V] + } + else { basePhoneme = null; } } else if (syllable.IsStartingCVWithOneConsonant) { @@ -492,6 +482,11 @@ protected override List ProcessSyllable(Syllable syllable) { if (CurrentWordCc.Length >= 2 && !PreviousWordCc.Contains(cc1)) { cc1 = $"{string.Join("", cc.Skip(i))}"; } + if (CurrentWordCc.Length >= 2) { + if (liquid.Contains(cc[i + 1]) || semivowel.Contains(cc[i + 1])) { + glides(cc1); + } + } if (!HasOto(cc1, syllable.tone)) { cc1 = ValidateAlias(cc1); } @@ -525,6 +520,11 @@ protected override List ProcessSyllable(Syllable syllable) { if (!HasOto(cc2, syllable.tone)) { cc2 = ValidateAlias(cc2); } + if (CurrentWordCc.Length >= 2) { + if (liquid.Contains(cc[i + 1]) || semivowel.Contains(cc[i + 1])) { + glides(cc1); + } + } // Use [C2C3] when current word has 2 consonants or more and [C2C3C4...] does not exist if (!HasOto(cc2, syllable.tone) && CurrentWordCc.Length >= 2 && CurrentWordCc.Contains(cc2)) { cc2 = $"{cc[i + 1]}{cc[i + 2]}"; @@ -833,123 +833,23 @@ protected override string ValidateAlias(string alias) { return alias; } - bool PhonemeIsPresent(string alias, string phoneme) { - if (string.IsNullOrEmpty(alias) || string.IsNullOrEmpty(phoneme)) - return false; - - // Exact token match - if (alias == phoneme) - return true; - - return alias.EndsWith(phoneme); - } - - private bool PhonemeHasEndingSuffix(string alias, string phoneme) { - var escapedPhoneme = Regex.Escape(phoneme); - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b\s*-") || - Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b-")) { - return true; - } - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b R")) { - return true; - } - return false; - } - - protected override double GetTransitionBasicLengthMs(string alias = "") { - //I wish these were automated instead :') - double transitionMultiplier = 1.0; // Default multiplier - - var fricative_def = 2.3; - var aspirate_def = 1.3; - var semivowel_def = 1.2; - var liquid_def = 1.5; - var nasal_def = 1.5; - var stop_def = 1.8; - var tap_def = 0.5; - var affricate_def = 1.5; + // Endings has 50 ticks gap + protected override bool NoGap => true; - var allConsonants = fricative.Concat(aspirate) - .Concat(semivowel) - .Concat(liquid) - .Concat(nasal) - .Concat(stop) - .Concat(tap) - .Concat(affricate) - .Distinct(); // Ensure no duplicates - - foreach (var c in allConsonants) { - if (PhonemeHasEndingSuffix(alias, c)) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - foreach (var v in vowels) { - if (alias.EndsWith("-")) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - // consonant timings + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); var sortedOverrides = PhonemeOverrides.OrderByDescending(kv => kv.Key.Length); foreach (var kvp in sortedOverrides) { - var overridePhoneme = kvp.Key; - var overrideValue = kvp.Value; - if (PhonemeIsPresent(alias, overridePhoneme)) { - return base.GetTransitionBasicLengthMs() * overrideValue; - } - } - - foreach (var c in fricative) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * fricative_def; - } - } - - foreach (var c in aspirate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * aspirate_def; - } - } - - foreach (var c in semivowel) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * semivowel_def; - } - } - - foreach (var c in liquid) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * liquid_def; - } - } - - foreach (var c in nasal) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * nasal_def; - } - } - - foreach (var c in stop) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * stop_def; - } - } - - foreach (var c in tap) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * tap_def; - } - } + var symbol = kvp.Key; + var value = kvp.Value; - foreach (var c in affricate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * affricate_def; + if (Regex.IsMatch(alias, $@"(?(); - - } - private string[] diphthongs = Array.Empty(); - private static string[] c_cR = { "n" }; - - protected override bool IsGroupKeyword(string rulePhoneme) { - string baseGroup = rulePhoneme.Split(new[] { '!', '+' })[0]; - return base.IsGroupKeyword(rulePhoneme) || new[] { "affricate", "fricative", "aspirate", "semivowel", "liquid", "nasal", "stop", "tap", "diphthong" }.Contains(baseGroup); - } - protected override bool IsGroupMatch(string rulePhoneme, string actualPhoneme) { - if (base.IsGroupMatch(rulePhoneme, actualPhoneme)) return true; - string baseGroup = rulePhoneme.Split(new[] { '!', '+' })[0]; + this.diphthongTails = new Dictionary() { + { "ay", "ay-" }, + { "ey", "ey-" }, + { "oy", "oy-" }, + { "aw", "aw-" }, + { "ow", "ow-" } + }; - if (rulePhoneme.Contains("!")) { - string[] exceptions = rulePhoneme.Split('!')[1].Split(','); - if (exceptions.Contains(actualPhoneme)) return false; - } - if (rulePhoneme.Contains("+")) { - string[] inclusions = rulePhoneme.Split('+')[1].Split(','); - if (!inclusions.Contains(actualPhoneme)) return false; - } - switch (baseGroup) { - case "affricate": return affricate.Contains(actualPhoneme); - case "fricative": return fricative.Contains(actualPhoneme); - case "aspirate": return aspirate.Contains(actualPhoneme); - case "semivowel": return semivowel.Contains(actualPhoneme); - case "liquid": return liquid.Contains(actualPhoneme); - case "nasal": return nasal.Contains(actualPhoneme); - case "stop": return stop.Contains(actualPhoneme); - case "tap": return tap.Contains(actualPhoneme); - case "diphthong": return diphthongs.Contains(actualPhoneme); - default: return false; - } } + private static string[] c_cR = Array.Empty(); protected override string[] GetVowels() => vowels; protected override string[] GetConsonants() => consonants; @@ -94,7 +69,7 @@ protected override bool IsGroupMatch(string rulePhoneme, string actualPhoneme) { .ToDictionary(parts => parts[0], parts => parts[1]); private bool isTimitPhonemes = false; - private Dictionary DiphthongExceptions = new Dictionary() { + private Dictionary diphthongTails = new Dictionary() { { "ay", "ay-" }, { "ey", "ey-" }, { "oy", "oy-" }, { "aw", "aw-" }, { "ow", "ow-" } }; @@ -185,28 +160,8 @@ protected override string[] GetSymbols(Note note) { return finalProcessedPhonemes.ToArray(); } - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - // LOAD DICTIONARY FROM FOLDER - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - // LOAD DICTIONARY FROM SINGER FOLDER - if (singer != null || singer.Found || singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - g2ps.Add(new ArpabetPlusG2p()); - return new G2pFallbacks(g2ps.ToArray()); + protected override IG2p[] GetBaseG2ps() { + return new IG2p[] { new ArpabetPlusG2p() }; } public override void SetSinger(USinger singer) { @@ -214,49 +169,49 @@ public override void SetSinger(USinger singer) { if (this.singer != null && this.singer.Loaded) { - string file = Path.Combine(this.singer.Location, YamlFileName); - if (!File.Exists(file)) { - file = Path.Combine(PluginDir, YamlFileName); - } + string globalFile = Path.Combine(PluginDir, YamlFileName); + string singerFile = Path.Combine(this.singer.Location, YamlFileName); - if (File.Exists(file)) { + var filesToParse = new List(); + if (File.Exists(globalFile)) filesToParse.Add(globalFile); + if (File.Exists(singerFile) && globalFile != singerFile) filesToParse.Add(singerFile); + + c_cR = Array.Empty(); + + foreach (var file in filesToParse) { try { - var data = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(file)); - - if (data?.diphthongs != null && data.diphthongs.Any()) { - DiphthongExceptions.Clear(); - foreach (var df in data.diphthongs) { - if (!string.IsNullOrEmpty(df.from) && !string.IsNullOrEmpty(df.to)) { - DiphthongExceptions[df.from] = df.to; - } - } - } - - this.diphthongs = data.symbols?.Where(s => s.type == "diphthong").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - DiphthongExceptions.Clear(); - foreach (var d in this.diphthongs) { - DiphthongExceptions[d] = d + "-"; - } - - if (data?.diphthongs != null && data.diphthongs.Any()) { - foreach (var df in data.diphthongs) { - if (!string.IsNullOrEmpty(df.from) && !string.IsNullOrEmpty(df.to)) { - DiphthongExceptions[df.from] = df.to; + var data = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(file)); + + if (data?.symbols != null) { + + string[] targetTypes = { "nasal", "liquid", "semivowel", "fricative", "aspirate" }; + var newCcR = data.symbols + .Where(s => targetTypes.Contains(s.type?.ToLower())) + .Select(s => s.symbol) + .ToArray(); + + c_cR = c_cR.Concat(newCcR).Distinct().ToArray(); + + var yamlDiphthongs = data.symbols + .Where(s => s.type?.ToLower() == "diphthong") + .Select(s => s.symbol) + .Distinct() + .ToArray(); + + foreach (var d in yamlDiphthongs) { + if (!diphthongSplits.ContainsKey(d)) { + diphthongTails[d] = d + "-"; } } } } catch (Exception ex) { - Log.Error($"Failed to parse custom diphthongs from {YamlFileName}: {ex.Message}"); + Log.Error($"Failed to parse symbols from {file}: {ex.Message}"); } } } } - public class ArpabetYAMLData: YAMLData { - public Fallbacks[] diphthongs { get; set; } = Array.Empty(); - } - private string ReplacePhoneme(string phoneme, int tone) { // If the original phoneme has an OTO, use it directly. if (HasOto(phoneme, tone) || HasOto(ValidateAlias(phoneme), tone)) { @@ -323,28 +278,53 @@ protected override List ProcessSyllable(Syllable syllable) { // [V V] or [V C][- C/C][V]/[V] else if (syllable.IsVV) { if (!CanMakeAliasExtension(syllable)) { - basePhoneme = $"{prevV} {v}"; - if (!HasOto(basePhoneme, syllable.vowelTone) && !HasOto(ValidateAlias(basePhoneme), syllable.vowelTone) && DiphthongExceptions.ContainsKey(prevV)) { - // VV IS NOT PRESENT, CHECKS DiphthongExceptions LOGIC - var vc = $"{prevV} {DiphthongExceptions[prevV]}"; - if (!HasOto(vc, syllable.vowelTone) && !HasOto(ValidateAlias(vc), syllable.vowelTone)) { - vc = AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", syllable.vowelTone, ""); - } - TryAddPhoneme(phonemes, syllable.tone, vc, ValidateAlias(vc)); - basePhoneme = AliasFormat(v, "vv", syllable.vowelTone, ""); - } else { - { - if (!HasOto($"{prevV} {v}", syllable.vowelTone) || !HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { - basePhoneme = AliasFormat(v, "vv", syllable.vowelTone, ""); + if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV} {v}"; + } + else if (diphthongSplits.ContainsKey(prevV) || diphthongTails.ContainsKey(prevV)) { + string cv = ""; + + if (diphthongSplits.ContainsKey(prevV)) { + var splitOverride = diphthongSplits[prevV]; + var vc = AliasFormat(splitOverride[0].Replace("{v}", v), "vcEx", syllable.tone, prevV); + cv = AliasFormat(splitOverride[1].Replace("{v}", v), "vv", syllable.vowelTone, ""); + TryAddPhoneme(phonemes, syllable.tone, vc, ValidateAlias(vc)); + } + else { + var tail = diphthongTails[prevV]; // gets e.g., "ay-" + var vcSpace = $"{prevV} {tail}"; + var vcNoSpace = $"{prevV}{tail}"; + var vcMix = AliasFormat(tail, "diph_mix", syllable.vowelTone, ""); + + if (HasOto(vcSpace, syllable.vowelTone) || HasOto(ValidateAlias(vcSpace), syllable.vowelTone)) { + TryAddPhoneme(phonemes, syllable.tone, vcSpace, ValidateAlias(vcSpace)); + } else if (HasOto(vcNoSpace, syllable.vowelTone) || HasOto(ValidateAlias(vcNoSpace), syllable.vowelTone)) { + TryAddPhoneme(phonemes, syllable.tone, vcNoSpace, ValidateAlias(vcNoSpace)); } else { - basePhoneme = $"{prevV} {v}"; + TryAddPhoneme(phonemes, syllable.tone, vcMix, ValidateAlias(vcMix)); } + cv = AliasFormat(v, "vv", syllable.vowelTone, ""); + } + + if (HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone)) { + basePhoneme = cv; + } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = AliasFormat(v, "vv", syllable.vowelTone, ""); + } + } + else { + if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = AliasFormat(v, "vv", syllable.vowelTone, ""); } } - } else { + } + else { basePhoneme = null; } - } else if (syllable.IsStartingCVWithOneConsonant) { /// [- C/-C/C] basePhoneme = AliasFormat(v, "cv", syllable.vowelTone, ""); @@ -383,13 +363,13 @@ protected override List ProcessSyllable(Syllable syllable) { TryAddPhoneme(phonemes, syllable.tone, AliasFormat($"{cc[0]}", "cc", syllable.tone, "")); break; /// use vowel ending - } else if (DiphthongExceptions.ContainsKey(prevV) && ((HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone) || (HasOto(vr1, syllable.tone) || HasOto(ValidateAlias(vr1), syllable.tone)) && !HasOto(vc, syllable.tone)))) { - TryAddPhoneme(phonemes, syllable.vowelTone, AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", syllable.vowelTone, "")); + } else if (diphthongTails.ContainsKey(prevV) && ((HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone) || (HasOto(vr1, syllable.tone) || HasOto(ValidateAlias(vr1), syllable.tone)) && !HasOto(vc, syllable.tone)))) { + TryAddPhoneme(phonemes, syllable.vowelTone, AliasFormat($"{diphthongTails[prevV]}", "diph_mix", syllable.vowelTone, "")); TryAddPhoneme(phonemes, syllable.tone, AliasFormat($"{cc[0]}", "cc", syllable.tone, "")); break; /// use consonants for diphthongs if the vb doesn't have vowel endings - } else if (DiphthongExceptions.ContainsKey(prevV) && (!(HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone) || (HasOto(vr1, syllable.tone) || HasOto(ValidateAlias(vr1), syllable.tone)) && !HasOto(vc, syllable.tone)))) { - TryAddPhoneme(phonemes, syllable.vowelTone, AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", syllable.vowelTone, "")); + } else if (diphthongTails.ContainsKey(prevV) && (!(HasOto(vr, syllable.tone) || HasOto(ValidateAlias(vr), syllable.tone) || (HasOto(vr1, syllable.tone) || HasOto(ValidateAlias(vr1), syllable.tone)) && !HasOto(vc, syllable.tone)))) { + TryAddPhoneme(phonemes, syllable.vowelTone, AliasFormat($"{diphthongTails[prevV]}", "diph_mix", syllable.vowelTone, "")); TryAddPhoneme(phonemes, syllable.tone, AliasFormat($"{cc[0]}", "cc", syllable.tone, "")); break; @@ -433,6 +413,9 @@ protected override List ProcessSyllable(Syllable syllable) { if (HasOto(AliasFormat($"{string.Join("", cc.Skip(i + 1))}", "cc", syllable.tone, ""), syllable.vowelTone)) { cc1 = AliasFormat($"{string.Join("", cc.Skip(i + 1))}", "cc", syllable.tone, ""); } + if (liquid.Contains(cc[i + 1]) || semivowel.Contains(cc[i + 1])) { + glides(cc1); + } // CV } else if (syllable.CurrentWordCc.Length == 1 && syllable.PreviousWordCc.Length == 1) { basePhoneme = AliasFormat(v, "cv", syllable.vowelTone, ""); @@ -465,6 +448,9 @@ protected override List ProcessSyllable(Syllable syllable) { if (HasOto(AliasFormat($"{string.Join("", cc.Skip(i + 1))}", "cc", syllable.tone, ""), syllable.vowelTone)) { cc1 = AliasFormat($"{string.Join("", cc.Skip(i + 1))}", "cc", syllable.tone, ""); } + if (liquid.Contains(cc[i + 1]) || semivowel.Contains(cc[i + 1])) { + glides(cc1); + } // CV } else if (syllable.CurrentWordCc.Length == 1 && syllable.PreviousWordCc.Length == 1) { basePhoneme = AliasFormat(v, "cv", syllable.vowelTone, ""); @@ -508,8 +494,8 @@ protected override List ProcessEnding(Ending ending) { if (HasOto(vR, ending.tone) || HasOto(ValidateAlias(vR), ending.tone) || (HasOto(vR2, ending.tone) || HasOto(ValidateAlias(vR2), ending.tone))) { TryAddPhoneme(phonemes, ending.tone, AliasFormat(v, "ending", ending.tone, "", t)); /// split diphthong vowels - } else if (DiphthongExceptions.ContainsKey(prevV) && !(HasOto(vR, ending.tone) && HasOto(ValidateAlias(vR), ending.tone) && (HasOto(vR2, ending.tone) || HasOto(ValidateAlias(vR2), ending.tone)))) { - TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{DiphthongExceptions[prevV]}", "cv", ending.tone, "", t)); + } else if (diphthongTails.ContainsKey(prevV) && !(HasOto(vR, ending.tone) && HasOto(ValidateAlias(vR), ending.tone) && (HasOto(vR2, ending.tone) || HasOto(ValidateAlias(vR2), ending.tone)))) { + TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{diphthongTails[prevV]}", "cv", ending.tone, "", t)); } } else if (ending.IsEndingVCWithOneConsonant) { var vc = $"{v} {cc[0]}"; @@ -523,17 +509,17 @@ protected override List ProcessEnding(Ending ending) { } else if (!HasOto(vcr, ending.tone) && !HasOto(ValidateAlias(vcr), ending.tone) && (HasOto(vcr2, ending.tone) || HasOto(ValidateAlias(vcr2), ending.tone))) { TryAddPhoneme(phonemes, ending.tone, vcr2); // double the consonants if has [C -]/[C-] - } else if (DiphthongExceptions.ContainsKey(prevV) && (c_cR.Contains(cc.Last())) && ((HasOto(AliasFormat(v, "ending_mix", ending.tone, ""), ending.tone) && (HasOto($"{c_cR[0]} {t}", ending.tone) || (HasOto($"{c_cR[0]}{t}", ending.tone)))))) { + } else if (diphthongTails.ContainsKey(prevV) && (c_cR.Contains(cc.Last())) && ((HasOto(AliasFormat(v, "ending_mix", ending.tone, ""), ending.tone) && (HasOto($"{c_cR[0]} {t}", ending.tone) || (HasOto($"{c_cR[0]}{t}", ending.tone)))))) { // ex: [ow][ow-][z][z -] - TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", ending.tone, "", t)); + TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{diphthongTails[prevV]}", "diph_mix", ending.tone, "", t)); TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{cc[0]}", "cc1_mix", ending.tone, "", t)); TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{cc[0]}", "cc_mix", ending.tone, "", t)); - } else if (DiphthongExceptions.ContainsKey(prevV) && ((HasOto(AliasFormat(v, "ending_mix", ending.tone, ""), ending.tone)) && !HasOto(vc, ending.tone))) { - TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", ending.tone, "", t)); + } else if (diphthongTails.ContainsKey(prevV) && ((HasOto(AliasFormat(v, "ending_mix", ending.tone, ""), ending.tone)) && !HasOto(vc, ending.tone))) { + TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{diphthongTails[prevV]}", "diph_mix", ending.tone, "", t)); TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{cc[0]}", "cc_mix", ending.tone, "", t)); /// use consonants for diphthongs if the vb doesn't have vowel endings - } else if (DiphthongExceptions.ContainsKey(prevV) && (!(HasOto(AliasFormat(v, "ending_mix", ending.tone, "", t), ending.tone) && !HasOto(vc, ending.tone)))) { - TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", ending.tone, "", t)); + } else if (diphthongTails.ContainsKey(prevV) && (!(HasOto(AliasFormat(v, "ending_mix", ending.tone, "", t), ending.tone) && !HasOto(vc, ending.tone)))) { + TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{diphthongTails[prevV]}", "diph_mix", ending.tone, "", t)); if (c_cR.Contains(cc.Last())) { if (HasOto(AliasFormat($"{c_cR[0]}", "cc_mix", ending.tone, ""), ending.tone)) { TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{cc[0]}", "cc1_mix", ending.tone, "", t)); @@ -616,12 +602,12 @@ protected override List ProcessEnding(Ending ending) { } firstC = 1; break; - } else if (DiphthongExceptions.ContainsKey(prevV) && (HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone)) || (HasOto(vr1, ending.tone) || HasOto(ValidateAlias(vr1), ending.tone)) && !HasOto(vc, ending.tone)) { + } else if (diphthongTails.ContainsKey(prevV) && (HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone)) || (HasOto(vr1, ending.tone) || HasOto(ValidateAlias(vr1), ending.tone)) && !HasOto(vc, ending.tone)) { TryAddPhoneme(phonemes, ending.tone, vr1, vr); break; /// use consonants for diphthongs if the vb doesn't have vowel endings - } else if (DiphthongExceptions.ContainsKey(prevV) && (!(HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone) || (HasOto(vr1, ending.tone) || HasOto(ValidateAlias(vr1), ending.tone)) && !HasOto(vc, ending.tone)))) { - TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{DiphthongExceptions[prevV]}", "diph_mix", ending.tone, "", t)); + } else if (diphthongTails.ContainsKey(prevV) && (!(HasOto(vr, ending.tone) || HasOto(ValidateAlias(vr), ending.tone) || (HasOto(vr1, ending.tone) || HasOto(ValidateAlias(vr1), ending.tone)) && !HasOto(vc, ending.tone)))) { + TryAddPhoneme(phonemes, ending.tone, AliasFormat($"{diphthongTails[prevV]}", "diph_mix", ending.tone, "", t)); break; } else { TryAddPhoneme(phonemes, ending.tone, vc); @@ -733,28 +719,110 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st }; - // Check if the given type exists in the aliasFormats dictionary - if (!aliasFormats.ContainsKey(type)) { + if (!aliasFormats.ContainsKey(type) && !type.Contains("dynamic")) { return alias; } - // Get the array of possible alias formats for the specified type + + if (type.Contains("dynStart")) { + string consonant = ""; + string vowel = ""; + // If the alias contains a space, split it into consonant and vowel + if (alias.Contains(" ")) { + var parts = alias.Split(' '); + consonant = parts[0]; + vowel = parts[1]; + } else { + consonant = alias; + } + var dynamicVariations = new List { + $"- {consonant}{vowel}", // "- CV" + $"- {consonant} {vowel}", // "- C V" + $"-{consonant} {vowel}", // "-C V" + $"-{consonant}{vowel}", // "-CV" + $"-{consonant}_{vowel}", // "-C_V" + $"- {consonant}_{vowel}", // "- C_V" + }; + + foreach (var variation in dynamicVariations) { + if (HasOto(variation, tone)) { + return variation; + } else if (HasOto(ValidateAlias(variation), tone)) { + return ValidateAlias(variation); + } + } + } + + if (type.Contains("dynMid")) { + string consonant = ""; + string vowel = ""; + + if (alias.Contains(" ")) { + var parts = alias.Split(' '); + consonant = parts[0]; + vowel = parts[1]; + } else { + consonant = alias; + } + var dynamicVariations1 = new List { + $"{consonant}{vowel}", // "CV" + $"{consonant} {vowel}", // "C V" + $"{consonant}_{vowel}", // "C_V" + }; + + foreach (var variation1 in dynamicVariations1) { + if (HasOto(variation1, tone)) { + return variation1; + } else if (HasOto(ValidateAlias(variation1), tone)) { + return ValidateAlias(variation1); + } + } + } + + if (type.Contains("dynEnd")) { + string consonant = ""; + string vowel = ""; + + if (alias.Contains(" ")) { + var parts = alias.Split(' '); + consonant = parts[1]; + vowel = parts[0]; + } else { + consonant = alias; + } + var dynamicVariations1 = new List { + $"{vowel}{consonant} -", // "VC -" + $"{vowel} {consonant}-", // "V C-" + $"{vowel}{consonant}-", // "VC-" + $"{vowel} {consonant} -", // "V C -" + }; + + foreach (var variation1 in dynamicVariations1) { + if (HasOto(variation1, tone)) { + return variation1; + } else if (HasOto(ValidateAlias(variation1), tone)) { + return ValidateAlias(variation1); + } + } + } + + // Get the array of possible alias formats for the specified type if not dynamic var formatsToTry = aliasFormats[type]; int counter = 0; foreach (var format in formatsToTry) { string aliasFormat; if (type.Contains("mix") && counter < 4) { - // Alternate between alias + format and format + alias for the first 4 iterations - aliasFormat = (counter % 2 == 0) ? alias + format : format + alias; + aliasFormat = (counter % 2 == 0) ? $"{alias}{format}" : $"{format}{alias}"; counter++; - } else if (type.Contains("end")) { - aliasFormat = alias + format; + } else if (type.Contains("end") || type.Contains("End") && !(type.Contains("dynEnd"))) { + aliasFormat = $"{alias}{format}"; } else { - aliasFormat = format + alias; + aliasFormat = $"{format}{alias}"; } - // Check if the formatted alias exists using HasOto and ValidateAlias - if (HasOto(aliasFormat, tone) || HasOto(ValidateAlias(aliasFormat), tone)) { - alias = aliasFormat; - return alias; + + if (HasOto(aliasFormat, tone)) { + return aliasFormat; + } else if (HasOto(ValidateAlias(aliasFormat), tone)) { + return ValidateAlias(aliasFormat); } } return alias; @@ -799,114 +867,50 @@ bool PhonemeIsPresent(string alias, string phoneme) { return alias.EndsWith(phoneme); } - private bool PhonemeHasEndingSuffix(string alias, string phoneme) { - var escapedPhoneme = Regex.Escape(phoneme); - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b\s*-") || - Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b-")) { - return true; - } - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b R")) { - return true; - } - return false; - } - protected override double GetTransitionBasicLengthMs(string alias = "") { - //I wish these were automated instead :') - double transitionMultiplier = 1.0; // Default multiplier + protected override bool NoGap => true; + + protected override double GetTransitionMultiplier(string alias) { + double baseMultiplier = base.GetTransitionMultiplier(alias); + if (baseMultiplier != 1.0) { + return baseMultiplier; + } var fricative_def = 2.3; var aspirate_def = 1.3; var semivowel_def = 1.2; var liquid_def = 1.5; var nasal_def = 1.5; - var stop_def = 1.8; + var stop_def = 1.4; var tap_def = 0.5; var affricate_def = 1.5; - var allConsonants = fricative.Concat(aspirate) - .Concat(semivowel) - .Concat(liquid) - .Concat(nasal) - .Concat(stop) - .Concat(tap) - .Concat(affricate) - .Distinct(); // Ensure no duplicates - - - - // consonant timings - - var sortedOverrides = PhonemeOverrides.OrderByDescending(kv => kv.Key.Length); - foreach (var kvp in sortedOverrides) { - var overridePhoneme = kvp.Key; - var overrideValue = kvp.Value; - if (PhonemeIsPresent(alias, overridePhoneme)) { - return base.GetTransitionBasicLengthMs() * overrideValue; - } - } - - foreach (var c in allConsonants) { - if (PhonemeHasEndingSuffix(alias, c)) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - foreach (var v in vowels) { - if (alias.EndsWith("-")) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - foreach (var c in fricative) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * fricative_def; - } + if (PhonemeIsPresent(alias, c)) return fricative_def; } - foreach (var c in aspirate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * aspirate_def; - } + if (PhonemeIsPresent(alias, c)) return aspirate_def; } - foreach (var c in semivowel) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * semivowel_def; - } + if (PhonemeIsPresent(alias, c)) return semivowel_def; } - foreach (var c in liquid) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * liquid_def; - } + if (PhonemeIsPresent(alias, c)) return liquid_def; } - foreach (var c in nasal) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * nasal_def; - } + if (PhonemeIsPresent(alias, c)) return nasal_def; } - foreach (var c in stop) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * stop_def; - } + if (PhonemeIsPresent(alias, c)) return stop_def; } - foreach (var c in tap) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * tap_def; - } + if (PhonemeIsPresent(alias, c)) return tap_def; } - foreach (var c in affricate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * affricate_def; - } + if (PhonemeIsPresent(alias, c)) return affricate_def; } - return base.GetTransitionBasicLengthMs() * transitionMultiplier; + return 1.0; } } } diff --git a/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs index d8f7952bc..0b0c21ba8 100644 --- a/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/EnglishVCCVPhonemizer.cs @@ -122,30 +122,8 @@ public EnglishVCCVPhonemizer() { protected override string[] GetVowels() => vowels; protected override string[] GetConsonants() => consonants; protected override string GetDictionaryName() => ""; - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - - // Load dictionary from plugin folder. - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - - // Load dictionary from singer folder. - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(new ArpabetG2p()); - return new G2pFallbacks(g2ps.ToArray()); + protected override IG2p[] GetBaseG2ps() { + return new IG2p[] { new ArpabetG2p() }; } protected override string[] GetSymbols(Note note) { @@ -156,6 +134,11 @@ protected override string[] GetSymbols(Note note) { if (original == null) { return null; } + for (int i = 0; i < original.Length; i++) { + if (dictionaryReplacements.TryGetValue(original[i], out string replaced)) { + original[i] = replaced; + } + } List finalProcessedPhonemes = new List(); string[] tr_dr = new[] { "tr", "dr"}; foreach (string s in original) { @@ -175,7 +158,7 @@ protected override string[] GetSymbols(Note note) { public override void SetSinger(USinger singer) { base.SetSinger(singer); - if (this.singer == null) return; + if (this.singer == null || !this.singer.Loaded) return; string file = null; if (singer != null && singer.Found && singer.Loaded && !string.IsNullOrEmpty(singer.Location)) { @@ -184,24 +167,31 @@ public override void SetSinger(USinger singer) { file = Path.Combine(PluginDir, YamlFileName); } - if (string.IsNullOrEmpty(file) || !File.Exists(file)) return; + string yamlContent = null; + if (!string.IsNullOrEmpty(file) && File.Exists(file)) { + yamlContent = File.ReadAllText(file); + } else if (YamlTemplate != null) { + yamlContent = System.Text.Encoding.UTF8.GetString(YamlTemplate); + } - try { - var data = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(file)); - if (data?.vcvowels != null) { - vcVowels.Clear(); - foreach (var kvp in data.vcvowels) { - if (!string.IsNullOrEmpty(kvp.Key) && !string.IsNullOrEmpty(kvp.Value)) { - vcVowels[kvp.Key] = kvp.Value; + if (!string.IsNullOrEmpty(yamlContent)) { + try { + var data = Core.Yaml.DefaultDeserializer.Deserialize(yamlContent); + if (data?.vcvowels != null) { + vcVowels.Clear(); + foreach (var kvp in data.vcvowels) { + if (!string.IsNullOrEmpty(kvp.Key) && !string.IsNullOrEmpty(kvp.Value)) { + vcVowels[kvp.Key] = kvp.Value; + } } } + } catch (Exception ex) { + Log.Error($"Failed to load vcvowels from {YamlFileName}: {ex.Message}"); } - } catch (Exception ex) { - Log.Error($"Failed to load vcvowels from {YamlFileName}: {ex.Message}"); } } - private class VcVowelYAMLData { + private class VcVowelYAMLData: YAMLData { public Dictionary vcvowels { get; set; } = new Dictionary(); } // prioritize yaml replacements over dictionary replacements @@ -210,10 +200,6 @@ private string ReplacePhoneme(string phoneme, int tone) { if (HasOto(phoneme, tone) || HasOto(ValidateAlias(phoneme), tone)) { return phoneme; } - // Otherwise, try to apply the dictionary replacement. - if (dictionaryReplacements.TryGetValue(phoneme, out var replaced)) { - return replaced; - } return phoneme; } @@ -900,5 +886,13 @@ protected override string ValidateAlias(string alias) { return alias; } + + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/FILtoJAPhonemizer.cs b/OpenUtau.Plugin.Builtin/FILtoJAPhonemizer.cs index 02e2e62b1..cd20f323b 100644 --- a/OpenUtau.Plugin.Builtin/FILtoJAPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/FILtoJAPhonemizer.cs @@ -513,5 +513,39 @@ private string ToHiragana(string romaji) { hiragana = hiragana.Replace("ゔ", "ヴ"); return hiragana; } + + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + var parts = alias.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); + bool isVcv = false; + + if (parts.Length == 2) { + var startingVowels = new[] { "a", "i", "u", "e", "o", "n", "N", "-" }; + var endingVowels = vowels; + + // First part must be a vowel (or a rest) + if (startingVowels.Contains(parts[0])) { + string cv = parts[1]; + + // Second part must end in a vowel (Romaji CV) OR be Japanese (Hiragana/Katakana) + bool isRomajiVcv = endingVowels.Contains(cv.Last().ToString()); + bool isJapaneseVcv = cv.Any(c => c > 0xFF); + + if (isRomajiVcv || isJapaneseVcv) { + isVcv = true; + } + } + } + + if (isVcv) { + return GetTransitionBasicLengthMsByConstant() * 1.0; + } + + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/FilipinoPhonemizer.cs b/OpenUtau.Plugin.Builtin/FilipinoPhonemizer.cs index 96171865e..46546542d 100644 --- a/OpenUtau.Plugin.Builtin/FilipinoPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/FilipinoPhonemizer.cs @@ -23,6 +23,16 @@ public FilipinoPhonemizer() { "a", "e", "i", "o", "u", "ay", "ey", "oy", "uy", "aw", "ew", "ow", "iw" }; this.consonants = Array.Empty(); + this.diphthongTails = new Dictionary() { + { "ay", "y" }, + { "ey", "y" }, + { "oy", "y" }, + { "uy", "y" }, + { "aw", "w" }, + { "ew", "w" }, + { "ow", "w" }, + { "iw", "w" }, + }; } protected override string[] GetVowels() => vowels; @@ -31,8 +41,6 @@ public FilipinoPhonemizer() { List consExceptions = new List(); - string[] diphthongs = new[] { "ay", "ey", "oy", "uy", "aw", "ew", "ow", "iw" }; - // For banks with missing vowels private readonly Dictionary missingVphonemes = "ax=a".Split(',') .Select(entry => entry.Split('=')) @@ -51,35 +59,8 @@ public FilipinoPhonemizer() { .ToDictionary(parts => parts[0], parts => parts[1]); private bool isMissingCPhonemes = false; private bool cPV_FallBack = false; - - private readonly Dictionary vvDiphthongExceptions = - new Dictionary() { - {"aw","a"}, - {"ow","o"}, - {"iw","i"}, - {"ay","a"}, - {"ey","e"}, - {"oy","o"}, - {"uy","u"}, - {"ew","e"}, - }; - - private readonly Dictionary vvExceptions = - new Dictionary() { - {"aw","w"}, - {"ow","w"}, - {"iw","w"}, - {"ay","y"}, - {"ey","y"}, - {"oy","y"}, - {"uy","y"}, - {"ew","w"}, - }; - private readonly string[] ccvException = { "ch", "dh", "dx", "fh", "gh", "hh", "jh", "kh", "ph", "ng", "sh", "th", "vh", "wh", "zh" }; private readonly string[] RomajiException = { "a", "e", "i", "o", "u" }; - private static readonly string[] FinalConsonants = { "w", "y", "r", "l", "m", "n", "ng" }; - protected override string[] GetSymbols(Note note) { string[] original = base.GetSymbols(note); @@ -156,29 +137,6 @@ protected override string[] GetSymbols(Note note) { return finalProcessedPhonemes.ToArray(); } - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - // LOAD DICTIONARY FROM FOLDER - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - // LOAD DICTIONARY FROM SINGER FOLDER - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - //g2ps.Add(new ArpabetPlusG2p()); - return new G2pFallbacks(g2ps.ToArray()); - } public override void SetSinger(USinger singer) { base.SetSinger(singer); @@ -248,35 +206,48 @@ protected override List ProcessSyllable(Syllable syllable) { } // [V V] or [V C][C V]/[V] else if (syllable.IsVV) { - if (!CanMakeAliasExtension(syllable)) { - basePhoneme = $"{prevV} {v}"; - if (!HasOto(basePhoneme, syllable.vowelTone) && !HasOto(ValidateAlias(basePhoneme), syllable.vowelTone) && vvExceptions.ContainsKey(prevV) && prevV != v) { - // VV IS NOT PRESENT, CHECKS VVEXCEPTIONS LOGIC - //var vc = $"{prevV}{vvExceptions[prevV]}"; - var vc = AliasFormat($"{vvExceptions[prevV]}", "vcEx", syllable.vowelTone, prevV); - TryAddPhoneme(phonemes, syllable.vowelTone, vc); - basePhoneme = AliasFormat($"{vvExceptions[prevV]} {v}", "dynMid", syllable.vowelTone, ""); + if (CanMakeAliasExtension(syllable)) { + if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV} {v}"; + } else if (HasOto($"{prevV}{v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV}{v}"), syllable.vowelTone)) { + basePhoneme = $"{prevV}{v}"; + } + + // Diphthong Fallbacks + else if (diphthongSplits.ContainsKey(prevV) || diphthongTails.ContainsKey(prevV)) { + string cv = ""; + if (diphthongSplits.ContainsKey(prevV)) { + var splitOverride = diphthongSplits[prevV]; + var vc = AliasFormat(splitOverride[0].Replace("{v}", v), "vcEx", syllable.tone, prevV); + cv = AliasFormat(splitOverride[1].Replace("{v}", v), "dynMid", syllable.vowelTone, ""); + TryAddPhoneme(phonemes, syllable.tone, vc, ValidateAlias(vc)); + } + else { // Default YAML diphthong logic + var tail = diphthongTails[prevV]; + var vcSpace = AliasFormat($"{prevV} {tail}", "vcEx", syllable.tone, prevV); + var vcNoSpace = AliasFormat($"{prevV}{tail}", "vcEx", syllable.tone, prevV); + cv = AliasFormat($"{tail} {v}", "dynMid", syllable.vowelTone, ""); + TryAddPhoneme(phonemes, syllable.tone, vcSpace, ValidateAlias(vcSpace), vcNoSpace, ValidateAlias(vcNoSpace)); + } + + if (HasOto(cv, syllable.vowelTone) || HasOto(ValidateAlias(cv), syllable.vowelTone)) { + basePhoneme = cv; + } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = ValidateAlias(AliasFormat($"- {v}", "dynMid", syllable.vowelTone, "")); + phonemes.Add(AliasFormat($"{prevV} -", "dynMid", syllable.tone, "")); + } } else { - { - if (HasOto($"{prevV} {v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV} {v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV} {v}"; - } else if (HasOto($"{prevV}{v}", syllable.vowelTone) || HasOto(ValidateAlias($"{prevV}{v}"), syllable.vowelTone)) { - basePhoneme = $"{prevV}{v}"; - } else if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { - basePhoneme = v; - } else { - basePhoneme = AliasFormat($"- {v}", "dynMid", syllable.vowelTone, ""); - TryAddPhoneme(phonemes, syllable.vowelTone, AliasFormat($"{prevV} -", "dynMid", syllable.vowelTone, "")); - } + if (HasOto(v, syllable.vowelTone) || HasOto(ValidateAlias(v), syllable.vowelTone)) { + basePhoneme = v; + } else { + basePhoneme = ValidateAlias(AliasFormat($"- {v}", "dynMid", syllable.vowelTone, "")); + phonemes.Add(AliasFormat($"{prevV} -", "dynMid", syllable.tone, "")); } } - // EXTEND AS [V] - } else if (HasOto($"{v}", syllable.vowelTone) && HasOto(ValidateAlias($"{v}"), syllable.vowelTone) || missingVphonemes.ContainsKey(prevV)) { - basePhoneme = v; - } else if (!HasOto(v, syllable.vowelTone) && !HasOto(ValidateAlias(v), syllable.vowelTone) && vvDiphthongExceptions.ContainsKey(prevV)) { - basePhoneme = $"{vvDiphthongExceptions[prevV]} {vvDiphthongExceptions[prevV]}"; - } else { - // PREVIOUS ALIAS WILL EXTEND as [V V] + } + else { basePhoneme = null; } @@ -809,12 +780,10 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st { "cc1_mix", new string[] { "", " -", "-", " R", "_", "- ", "-" } }, }; - // Check if the given type exists in the aliasFormats dictionary if (!aliasFormats.ContainsKey(type) && !type.Contains("dynamic")) { return alias; } - // Handle dynamic variations when type contains "dynamic" if (type.Contains("dynStart")) { string consonant = ""; string vowel = ""; @@ -826,10 +795,7 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st } else { consonant = alias; } - - // Handle the alias with space and without space var dynamicVariations = new List { - // Variations with space, dash, and underscore $"- {consonant}{vowel}", // "- CV" $"- {consonant} {vowel}", // "- C V" $"-{consonant} {vowel}", // "-C V" @@ -837,10 +803,12 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st $"-{consonant}_{vowel}", // "-C_V" $"- {consonant}_{vowel}", // "- C_V" }; - // Check each dynamically generated format + foreach (var variation in dynamicVariations) { - if (HasOto(variation, tone) || HasOto(ValidateAlias(variation), tone)) { + if (HasOto(variation, tone)) { return variation; + } else if (HasOto(ValidateAlias(variation), tone)) { + return ValidateAlias(variation); } } } @@ -848,7 +816,7 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st if (type.Contains("dynMid")) { string consonant = ""; string vowel = ""; - // If the alias contains a space, split it into consonant and vowel + if (alias.Contains(" ")) { var parts = alias.Split(' '); consonant = parts[0]; @@ -861,10 +829,12 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st $"{consonant} {vowel}", // "C V" $"{consonant}_{vowel}", // "C_V" }; - // Check each dynamically generated format + foreach (var variation1 in dynamicVariations1) { - if (HasOto(variation1, tone) || HasOto(ValidateAlias(variation1), tone)) { + if (HasOto(variation1, tone)) { return variation1; + } else if (HasOto(ValidateAlias(variation1), tone)) { + return ValidateAlias(variation1); } } } @@ -872,7 +842,7 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st if (type.Contains("dynEnd")) { string consonant = ""; string vowel = ""; - // If the alias contains a space, split it into consonant and vowel + if (alias.Contains(" ")) { var parts = alias.Split(' '); consonant = parts[1]; @@ -886,10 +856,12 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st $"{vowel}{consonant}-", // "VC-" $"{vowel} {consonant} -", // "V C -" }; - // Check each dynamically generated format + foreach (var variation1 in dynamicVariations1) { - if (HasOto(variation1, tone) || HasOto(ValidateAlias(variation1), tone)) { + if (HasOto(variation1, tone)) { return variation1; + } else if (HasOto(ValidateAlias(variation1), tone)) { + return ValidateAlias(variation1); } } } @@ -907,9 +879,11 @@ private string AliasFormat(string alias, string type, int tone, string prevV, st } else { aliasFormat = $"{format}{alias}"; } - // Check if the formatted alias exists - if (HasOto(aliasFormat, tone) || HasOto(ValidateAlias(aliasFormat), tone)) { + + if (HasOto(aliasFormat, tone)) { return aliasFormat; + } else if (HasOto(ValidateAlias(aliasFormat), tone)) { + return ValidateAlias(aliasFormat); } } return alias; @@ -937,123 +911,23 @@ protected override string ValidateAlias(string alias) { return base.ValidateAlias(alias); } - bool PhonemeIsPresent(string alias, string phoneme) { - if (string.IsNullOrEmpty(alias) || string.IsNullOrEmpty(phoneme)) - return false; + // Endings has 50 ticks gap + protected override bool NoGap => true; - // Exact token match - if (alias == phoneme) - return true; - - return alias.EndsWith(phoneme); - } - - private bool PhonemeHasEndingSuffix(string alias, string phoneme) { - var escapedPhoneme = Regex.Escape(phoneme); - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b\s*-") || - Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b-")) { - return true; - } - if (Regex.IsMatch(alias, $@"\b{escapedPhoneme}\b R")) { - return true; - } - return false; - } - - protected override double GetTransitionBasicLengthMs(string alias = "") { - //I wish these were automated instead :') - double transitionMultiplier = 1.0; // Default multiplier - - var fricative_def = 2.3; - var aspirate_def = 1.3; - var semivowel_def = 1.2; - var liquid_def = 1.5; - var nasal_def = 1.5; - var stop_def = 1.8; - var tap_def = 0.5; - var affricate_def = 1.5; - - var allConsonants = fricative.Concat(aspirate) - .Concat(semivowel) - .Concat(liquid) - .Concat(nasal) - .Concat(stop) - .Concat(tap) - .Concat(affricate) - .Distinct(); // Ensure no duplicates - - foreach (var c in allConsonants) { - if (PhonemeHasEndingSuffix(alias, c)) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - foreach (var v in vowels) { - if (alias.EndsWith("-")) { - return base.GetTransitionBasicLengthMs() * 0.5; - } - } - - // consonant timings + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); var sortedOverrides = PhonemeOverrides.OrderByDescending(kv => kv.Key.Length); foreach (var kvp in sortedOverrides) { - var overridePhoneme = kvp.Key; - var overrideValue = kvp.Value; - if (PhonemeIsPresent(alias, overridePhoneme)) { - return base.GetTransitionBasicLengthMs() * overrideValue; - } - } - - foreach (var c in fricative) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * fricative_def; - } - } - - foreach (var c in aspirate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * aspirate_def; - } - } - - foreach (var c in semivowel) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * semivowel_def; - } - } - - foreach (var c in liquid) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * liquid_def; - } - } - - foreach (var c in nasal) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * nasal_def; - } - } - - foreach (var c in stop) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * stop_def; - } - } - - foreach (var c in tap) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * tap_def; - } - } + var symbol = kvp.Key; + var value = kvp.Value; - foreach (var c in affricate) { - if (PhonemeIsPresent(alias, c)) { - return base.GetTransitionBasicLengthMs() * affricate_def; + if (Regex.IsMatch(alias, $@"(? true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + return otoLength; } private string CheckAliasFormatting(string alias, string type, int tone, string prevV) { diff --git a/OpenUtau.Plugin.Builtin/FrenchVCCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/FrenchVCCVPhonemizer.cs index 15a36996e..9d03ca2f2 100644 --- a/OpenUtau.Plugin.Builtin/FrenchVCCVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/FrenchVCCVPhonemizer.cs @@ -253,19 +253,13 @@ protected override List ProcessEnding(Ending ending) { return phonemes; } + // Endings has 50 ticks gap + protected override bool NoGap => true; - protected override double GetTransitionBasicLengthMs(string alias = "") { - foreach (var c in shortConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 0.75; - } - } - foreach (var c in longConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 1.5; - } - } - return base.GetTransitionBasicLengthMs() * 1.25; + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; } protected override string[] GetSymbols(Note note) { diff --git a/OpenUtau.Plugin.Builtin/GermanVCCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/GermanVCCVPhonemizer.cs index 242b03a5c..56ca18db2 100644 --- a/OpenUtau.Plugin.Builtin/GermanVCCVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/GermanVCCVPhonemizer.cs @@ -35,32 +35,8 @@ public GermanVCCVPhonemizer() { protected override string GetDictionaryName() => "cmudict_de.txt"; private bool isYamlFallbacks = false; - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - - // Load dictionary from plugin folder. - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - - // Load dictionary from singer folder. - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - - // Load base g2p. - g2ps.Add(new GermanG2p()); - return new G2pFallbacks(g2ps.ToArray()); + protected override IG2p[] GetBaseG2ps() { + return new IG2p[] { new GermanG2p() }; } protected override string[] GetSymbols(Note note) { @@ -69,6 +45,12 @@ protected override string[] GetSymbols(Note note) { return null; } List finalProcessedPhonemes = new List(); + + for (int i = 0; i < original.Length; i++) { + if (dictionaryReplacements.TryGetValue(original[i], out string replaced)) { + original[i] = replaced; + } + } string[] diphthongs = new[] { "aU", "OY", "aI" }; foreach (string s in original) { @@ -87,10 +69,6 @@ private string ReplacePhoneme(string phoneme, int tone) { if (HasOto(phoneme, tone) || HasOto(ValidateAlias(phoneme), tone)) { return phoneme; } - // Otherwise, try to apply the dictionary replacement. - if (dictionaryReplacements.TryGetValue(phoneme, out var replaced)) { - return replaced; - } return phoneme; } @@ -456,9 +434,12 @@ protected override string ValidateAlias(string alias) { return alias; } + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); - protected override double GetTransitionBasicLengthMs(string alias = "") { - return base.GetTransitionBasicLengthMs(); + return otoLength; } } } \ No newline at end of file diff --git a/OpenUtau.Plugin.Builtin/ItalianSyllableBasedPhonemizer.cs b/OpenUtau.Plugin.Builtin/ItalianSyllableBasedPhonemizer.cs index 6cd8f0c2c..fea1e2304 100644 --- a/OpenUtau.Plugin.Builtin/ItalianSyllableBasedPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/ItalianSyllableBasedPhonemizer.cs @@ -226,5 +226,14 @@ protected override string ValidateAlias(string alias) { } return alias; } + + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/PolishCVCPhonemizer.cs b/OpenUtau.Plugin.Builtin/PolishCVCPhonemizer.cs index ed6d96598..bb292b8c5 100644 --- a/OpenUtau.Plugin.Builtin/PolishCVCPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/PolishCVCPhonemizer.cs @@ -66,5 +66,14 @@ protected override List ProcessEnding(Ending ending) { return phonemes; } + + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/RussianCVCPhonemizer.cs b/OpenUtau.Plugin.Builtin/RussianCVCPhonemizer.cs index 46f70d4ad..1a62dfaf8 100644 --- a/OpenUtau.Plugin.Builtin/RussianCVCPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/RussianCVCPhonemizer.cs @@ -105,18 +105,13 @@ protected override string ValidateAlias(string alias) { return aliasesFallback.ContainsKey(alias) ? aliasesFallback[alias] : alias; } - protected override double GetTransitionBasicLengthMs(string alias = "") { - foreach (var c in shortConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 0.75; - } - } - foreach (var c in longConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 1.5; - } - } - return base.GetTransitionBasicLengthMs(); + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; } } diff --git a/OpenUtau.Plugin.Builtin/RussianVCCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/RussianVCCVPhonemizer.cs index 515bbed72..505246560 100644 --- a/OpenUtau.Plugin.Builtin/RussianVCCVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/RussianVCCVPhonemizer.cs @@ -124,18 +124,13 @@ protected override string ValidateAlias(string alias) { return alias; } - protected override double GetTransitionBasicLengthMs(string alias = "") { - foreach (var c in shortConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 0.75; - } - } - foreach (var c in longConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 1.5; - } - } - return base.GetTransitionBasicLengthMs(); + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; } } } diff --git a/OpenUtau.Plugin.Builtin/SpanishMakkusanPhonemizer.cs b/OpenUtau.Plugin.Builtin/SpanishMakkusanPhonemizer.cs index ea5244d7e..9ff7e6357 100644 --- a/OpenUtau.Plugin.Builtin/SpanishMakkusanPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/SpanishMakkusanPhonemizer.cs @@ -248,5 +248,14 @@ protected override string ValidateAlias(string alias) { } return alias; } + + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; + } } } diff --git a/OpenUtau.Plugin.Builtin/SpanishSyllableBasedPhonemizer.cs b/OpenUtau.Plugin.Builtin/SpanishSyllableBasedPhonemizer.cs index e350abf21..76ac43fc0 100644 --- a/OpenUtau.Plugin.Builtin/SpanishSyllableBasedPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/SpanishSyllableBasedPhonemizer.cs @@ -460,18 +460,13 @@ protected override string ValidateAlias(string alias) { return alias; } - protected override double GetTransitionBasicLengthMs(string alias = "") { - foreach (var c in longConsonants) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 2.0; - } - } - foreach (var c in new[] { "r" }) { - if (alias.EndsWith(c)) { - return base.GetTransitionBasicLengthMs() * 0.75; - } - } - return base.GetTransitionBasicLengthMs(); + // Endings has 50 ticks gap + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; } } } diff --git a/OpenUtau.Plugin.Builtin/SpanishVCCVPhonemizer.cs b/OpenUtau.Plugin.Builtin/SpanishVCCVPhonemizer.cs index 98fd877d5..f3563c2bb 100644 --- a/OpenUtau.Plugin.Builtin/SpanishVCCVPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/SpanishVCCVPhonemizer.cs @@ -37,30 +37,8 @@ public SpanishVCCVPhonemizer() { protected override string[] GetVowels() => vowels; protected override string[] GetConsonants() => consonants; protected override string GetDictionaryName() => "cmudict_es.txt"; - protected override IG2p LoadBaseDictionary() { - var g2ps = new List(); - - // Load dictionary from plugin folder. - string path = Path.Combine(PluginDir, YamlFileName); - if (!File.Exists(path)) { - Directory.CreateDirectory(PluginDir); - File.WriteAllBytes(path, YamlTemplate); - } - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); - - // Load dictionary from singer folder. - if (singer != null && singer.Found && singer.Loaded) { - string file = Path.Combine(singer.Location, YamlFileName); - if (File.Exists(file)) { - try { - g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); - } catch (Exception e) { - Log.Error(e, $"Failed to load {file}"); - } - } - } - g2ps.Add(new SpanishG2p()); - return new G2pFallbacks(g2ps.ToArray()); + protected override IG2p[] GetBaseG2ps() { + return new IG2p[] { new SpanishG2p() }; } protected override string[] GetSymbols(Note note) { @@ -68,6 +46,13 @@ protected override string[] GetSymbols(Note note) { if (original == null) { return null; } + + for (int i = 0; i < original.Length; i++) { + if (dictionaryReplacements.TryGetValue(original[i], out string replaced)) { + original[i] = replaced; + } + } + List finalProcessedPhonemes = new List(); foreach (string s in original) { switch (s) { @@ -85,10 +70,6 @@ private string ReplacePhoneme(string phoneme, int tone) { if (HasOto(phoneme, tone) || HasOto(ValidateAlias(phoneme), tone)) { return phoneme; } - // Otherwise, try to apply the dictionary replacement. - if (dictionaryReplacements.TryGetValue(phoneme, out var replaced)) { - return replaced; - } return phoneme; } @@ -477,8 +458,12 @@ protected override string ValidateAlias(string alias) { return base.ValidateAlias(alias); } - protected override double GetTransitionBasicLengthMs(string alias = "") { - return base.GetTransitionBasicLengthMs(); + protected override bool NoGap => true; + + protected override double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + double otoLength = GetTransitionBasicLengthMsByOto(alias, tone, attr); + + return otoLength; } } } diff --git a/OpenUtau.Plugin.Builtin/SyllableBasedPhonemizer.cs b/OpenUtau.Plugin.Builtin/SyllableBasedPhonemizer.cs index c576430f0..4120dbbca 100644 --- a/OpenUtau.Plugin.Builtin/SyllableBasedPhonemizer.cs +++ b/OpenUtau.Plugin.Builtin/SyllableBasedPhonemizer.cs @@ -159,6 +159,8 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN if (hasDictionary && isDictionaryLoading) { return MakeSimpleResult(""); } + + runtimeGlides.Clear(); var syllables = MakeSyllables(notes, MakeEnding(prevNeighbours)); if (syllables == null) { @@ -166,6 +168,8 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN } var phonemes = new List(); + int globalPhonemeIndex = 0; // Track the exact index for OpenUtau's UI + foreach (var syllable in syllables) { var modifiedSyllable = ApplyBoundaryReplacements(syllable); @@ -183,11 +187,15 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN var endingPhonemes = ProcessEnding(ending); if (endingPhonemes != null) { - phonemes.AddRange(MakePhonemes(endingPhonemes, modifiedSyllable.duration, modifiedSyllable.position, false)); + phonemes.AddRange(MakePhonemes(endingPhonemes, modifiedSyllable.duration, modifiedSyllable.position, false, modifiedSyllable.tone, mainNote.phonemeAttributes, globalPhonemeIndex)); + globalPhonemeIndex += endingPhonemes.Count; } continue; } - phonemes.AddRange(MakePhonemes(ProcessSyllable(modifiedSyllable), modifiedSyllable.duration, modifiedSyllable.position, false)); + + var syllablePhonemes = ProcessSyllable(modifiedSyllable); + phonemes.AddRange(MakePhonemes(syllablePhonemes, modifiedSyllable.duration, modifiedSyllable.position, false, modifiedSyllable.tone, mainNote.phonemeAttributes, globalPhonemeIndex)); + globalPhonemeIndex += syllablePhonemes.Count; } if (!nextNeighbour.HasValue) { @@ -203,13 +211,17 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN var endingPhonemes = ProcessEnding(modifiedEnding); if (endingPhonemes != null) { - phonemes.AddRange(MakePhonemes(endingPhonemes, modifiedEnding.duration, modifiedEnding.position, true)); + phonemes.AddRange(MakePhonemes(endingPhonemes, modifiedEnding.duration, modifiedEnding.position, true, ending.tone, mainNote.phonemeAttributes, globalPhonemeIndex)); + globalPhonemeIndex += endingPhonemes.Count; } } } + var phonemesArray = phonemes.ToArray(); + CustomParameters(notes, prev, next, prevNeighbour, nextNeighbour, prevNeighbours, phonemesArray); + var finalPhonemes = AssignAllAffixes(phonemesArray.ToList(), notes, prevNeighbours); return new Result() { - phonemes = AssignAllAffixes(phonemes, notes, prevNeighbours) + phonemes = finalPhonemes }; } @@ -224,11 +236,17 @@ protected virtual Phoneme[] AssignAllAffixes(List phonemes, Note[] note while (noteIndex < notes.Length - 1 && notes[noteIndex].position - notes[0].position < phoneme.position) { noteIndex++; } - var noteStartPosition = notes[noteIndex].position - notes[0].position; - int tone = (prevs != null && prevs.Length > 0 && phoneme.position < noteStartPosition) ? - prevs.Last().tone : (noteIndex > 0 && phoneme.position < noteStartPosition) ? - notes[noteIndex - 1].tone : notes[noteIndex].tone; + var noteStartPosition = notes[noteIndex].position - notes[0].position; + int tone; + if (phoneme.position < noteStartPosition) { + tone = (noteIndex > 0) ? notes[noteIndex - 1].tone : + (prevs != null && prevs.Length > 0) ? prevs.Last().tone : + notes[noteIndex].tone; + } else { + tone = notes[noteIndex].tone; + } + var validatedAlias = phoneme.phoneme; if (validatedAlias != null) { validatedAlias = ValidateAliasIfNeeded(validatedAlias, tone + toneShift); @@ -284,21 +302,21 @@ public override void SetSinger(USinger singer) { return; } - string file = null; - if (singer != null && singer.Found && singer.Loaded && !string.IsNullOrEmpty(singer.Location)) { - file = Path.Combine(singer.Location, YamlFileName); - } else if (!string.IsNullOrEmpty(PluginDir)) { - file = Path.Combine(PluginDir, YamlFileName); - } + // file paths + string globalFile = Path.Combine(PluginDir, YamlFileName); + string singerFile = (singer != null && singer.Found && singer.Loaded && !string.IsNullOrEmpty(singer.Location)) + ? Path.Combine(singer.Location, YamlFileName) + : null; - if (!string.IsNullOrEmpty(file)) { + // template creation/backup for the Global File ONLY + if (!string.IsNullOrEmpty(globalFile)) { bool shouldWriteTemplate = false; bool shouldBackupOldFile = false; - if (File.Exists(file)) { + if (File.Exists(globalFile)) { if (YamlTemplate != null && !string.IsNullOrEmpty(YamlVersion)) { try { - var checkData = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(file)); + var checkData = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(globalFile)); string currentVersion = checkData?.version?.Trim() ?? ""; if (string.IsNullOrEmpty(currentVersion) || currentVersion != YamlVersion) { @@ -306,7 +324,7 @@ public override void SetSinger(USinger singer) { shouldBackupOldFile = true; } } catch (Exception ex) { - Log.Error(ex, $"Failed to read version from '{file}'. Backing up and resetting to template..."); + Log.Error(ex, $"Failed to read version from '{globalFile}'. Backing up and resetting to template..."); shouldWriteTemplate = true; shouldBackupOldFile = true; } @@ -315,11 +333,11 @@ public override void SetSinger(USinger singer) { shouldWriteTemplate = true; } - if (shouldBackupOldFile && File.Exists(file)) { + if (shouldBackupOldFile && File.Exists(globalFile)) { try { - string backupFile = Path.Combine(Path.GetDirectoryName(file), $"{Path.GetFileNameWithoutExtension(YamlFileName)}_backup{Path.GetExtension(YamlFileName)}"); + string backupFile = Path.Combine(Path.GetDirectoryName(globalFile), $"{Path.GetFileNameWithoutExtension(YamlFileName)}_backup{Path.GetExtension(YamlFileName)}"); if (File.Exists(backupFile)) File.Delete(backupFile); - File.Move(file, backupFile); + File.Move(globalFile, backupFile); Log.Information($"Old {YamlFileName} backed up to {backupFile}"); } catch (Exception e) { Log.Error(e, $"Failed to back up {YamlFileName}"); @@ -328,74 +346,130 @@ public override void SetSinger(USinger singer) { if (shouldWriteTemplate) { try { - File.WriteAllBytes(file, YamlTemplate); - Log.Information($"'{file}' created or updated to version {YamlVersion ?? "default"}"); + File.WriteAllBytes(globalFile, YamlTemplate); + Log.Information($"'{globalFile}' created or updated to version {YamlVersion ?? "default"}"); } catch (Exception e) { - Log.Error(e, $"Failed to write template to {file}"); + Log.Error(e, $"Failed to write template to {globalFile}"); } } + } - if (File.Exists(file)) { - try { - var data = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(file)); - - if (backupVowels == null) backupVowels = GetVowels() ?? Array.Empty(); - if (backupConsonants == null) backupConsonants = GetConsonants() ?? Array.Empty(); - - var yamlVowels = data.symbols?.Where(s => s.type == "vowel" || s.type == "diphthong").Select(s => s.symbol).ToArray() ?? Array.Empty(); - vowels = backupVowels.Concat(yamlVowels).Distinct().ToArray(); + // add to parsing list (Global first, Singer second) + var filesToParse = new List(); + if (File.Exists(globalFile)) filesToParse.Add(globalFile); + if (!string.IsNullOrEmpty(singerFile) && File.Exists(singerFile)) filesToParse.Add(singerFile); + + // backups of hardcoded defaults exist + if (backupVowels == null) backupVowels = GetVowels() ?? Array.Empty(); + if (backupConsonants == null) backupConsonants = GetConsonants() ?? Array.Empty(); + if (backupDictionaryReplacements == null) backupDictionaryReplacements = new Dictionary(dictionaryReplacements); + if (backupDiphthongTails == null) backupDiphthongTails = new Dictionary(diphthongTails); + if (backupDiphthongSplits == null) backupDiphthongSplits = new Dictionary(diphthongSplits); + + // reset live arrays/lists back to defaults before stacking + vowels = backupVowels; + consonants = backupConsonants; + tails = "-,R".Split(','); + + fricative = Array.Empty(); + aspirate = Array.Empty(); + semivowel = Array.Empty(); + liquid = Array.Empty(); + nasal = Array.Empty(); + stop = Array.Empty(); + tap = Array.Empty(); + affricate = Array.Empty(); + + dictionaryReplacements.Clear(); + foreach (var kvp in backupDictionaryReplacements) dictionaryReplacements[kvp.Key] = kvp.Value; + + diphthongTails.Clear(); + foreach (var kvp in backupDiphthongTails) diphthongTails[kvp.Key] = kvp.Value; + + diphthongSplits.Clear(); + foreach (var kvp in backupDiphthongSplits) diphthongSplits[kvp.Key] = kvp.Value; + + mergingReplacements.Clear(); + splittingReplacements.Clear(); + yamlFallbacks.Clear(); + PhonemeOverrides.Clear(); + + // parse the files sequentially (Singer configs seamlessly overwrite global configs) + foreach (var file in filesToParse) { + try { + var data = Core.Yaml.DefaultDeserializer.Deserialize(File.ReadAllText(file)); + + // SYMBOLS + var yamlVowels = data.symbols?.Where(s => s.type == "vowel" || s.type == "diphthong").Select(s => s.symbol).ToArray() ?? Array.Empty(); + vowels = vowels.Concat(yamlVowels).Distinct().ToArray(); - tails = (tails ?? Array.Empty()).Concat(data.symbols?.Where(s => s.type == "tail").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); - - fricative = data.symbols?.Where(s => s.type == "fricative").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - aspirate = data.symbols?.Where(s => s.type == "aspirate").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - semivowel = data.symbols?.Where(s => s.type == "semivowel").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - liquid = data.symbols?.Where(s => s.type == "liquid").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - nasal = data.symbols?.Where(s => s.type == "nasal").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - stop = data.symbols?.Where(s => s.type == "stop").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - tap = data.symbols?.Where(s => s.type == "tap").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - affricate = data.symbols?.Where(s => s.type == "affricate").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); - - var yamlConsonants = fricative.Concat(aspirate).Concat(semivowel).Concat(liquid).Concat(nasal).Concat(stop).Concat(tap).Concat(affricate).ToArray(); - consonants = backupConsonants.Concat(yamlConsonants).Distinct().ToArray(); - - PhonemeOverrides = data.timings?.ToDictionary(t => t.symbol, t => t.value) ?? new Dictionary(); - if (backupDictionaryReplacements == null) { - backupDictionaryReplacements = new Dictionary(dictionaryReplacements); + tails = tails.Concat(data.symbols?.Where(s => s.type == "tail").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + if (data?.isglides != null) enableGlides = data.isglides.Value; + + fricative = fricative.Concat(data.symbols?.Where(s => s.type == "fricative").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + aspirate = aspirate.Concat(data.symbols?.Where(s => s.type == "aspirate").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + semivowel = semivowel.Concat(data.symbols?.Where(s => s.type == "semivowel").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + liquid = liquid.Concat(data.symbols?.Where(s => s.type == "liquid").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + nasal = nasal.Concat(data.symbols?.Where(s => s.type == "nasal").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + stop = stop.Concat(data.symbols?.Where(s => s.type == "stop").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + tap = tap.Concat(data.symbols?.Where(s => s.type == "tap").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + affricate = affricate.Concat(data.symbols?.Where(s => s.type == "affricate").Select(s => s.symbol) ?? Array.Empty()).Distinct().ToArray(); + + var yamlConsonants = fricative.Concat(aspirate).Concat(semivowel).Concat(liquid).Concat(nasal).Concat(stop).Concat(tap).Concat(affricate).ToArray(); + consonants = consonants.Concat(yamlConsonants).Distinct().ToArray(); + + // DIPHTHONG AUTO-TAIL DETECTION + var yamlDiphthongs = data.symbols?.Where(s => s.type == "diphthong").Select(s => s.symbol).Distinct().ToArray() ?? Array.Empty(); + var dynamicTails = consonants.OrderByDescending(c => c.Length).ToArray(); + + foreach (var d in yamlDiphthongs) { + if (!diphthongTails.ContainsKey(d) && !diphthongSplits.ContainsKey(d)) { + foreach (var tail in dynamicTails) { + if (d.EndsWith(tail) && d != tail) { + diphthongTails[d] = tail; + break; + } + } } - dictionaryReplacements.Clear(); - foreach (var kvp in backupDictionaryReplacements) { - dictionaryReplacements[kvp.Key] = kvp.Value; + } + + // OVERRIDES & DICTIONARIES (Singer keys overwrite global keys) + if (data?.timings != null) { + foreach (var t in data.timings) PhonemeOverrides[t.symbol] = t.value; + } + + if (data?.replacements != null) { + foreach (var replacement in data.replacements) { + string ruleScope = string.IsNullOrEmpty(replacement.where) ? "inside" : replacement.where.ToLowerInvariant(); + if (replacement.from is IEnumerable fromList) { + string[] fromArray = fromList.Select(item => item.ToString() ?? "null").ToArray(); + if (replacement.to is string toString) mergingReplacements.Add(new Replacement { from = fromArray, to = toString, where = ruleScope }); + else if (replacement.to is IEnumerable toList) splittingReplacements.Add(new Replacement { from = fromArray, to = toList.Select(item => item.ToString()).ToArray(), where = ruleScope }); + } else if (replacement.from is string fromString) { + if (replacement.to is string toString) dictionaryReplacements[fromString] = toString; + else if (replacement.to is IEnumerable toList) splittingReplacements.Add(new Replacement { from = fromString, to = toList.Select(item => item.ToString()).ToArray(), where = ruleScope }); + } } + } - mergingReplacements.Clear(); - splittingReplacements.Clear(); - - if (data?.replacements != null && data.replacements.Any()) { - foreach (var replacement in data.replacements) { - string ruleScope = string.IsNullOrEmpty(replacement.where) ? "inside" : replacement.where.ToLowerInvariant(); - if (replacement.from is IEnumerable fromList) { - string[] fromArray = fromList.Select(item => item.ToString()).ToArray(); - if (replacement.to is string toString) mergingReplacements.Add(new Replacement { from = fromArray, to = toString, where = ruleScope }); - else if (replacement.to is IEnumerable toList) splittingReplacements.Add(new Replacement { from = fromArray, to = toList.Select(item => item.ToString()).ToArray(), where = ruleScope }); - } else if (replacement.from is string fromString) { - if (replacement.to is string toString) dictionaryReplacements[fromString] = toString; - else if (replacement.to is IEnumerable toList) splittingReplacements.Add(new Replacement { from = fromString, to = toList.Select(item => item.ToString()).ToArray(), where = ruleScope }); - } + if (data?.fallbacks != null) { + foreach (var df in data.fallbacks) { + if (!string.IsNullOrEmpty(df.from) && !string.IsNullOrEmpty(df.to)) { + yamlFallbacks[df.from] = df.to; } } + } - if (data?.fallbacks != null) { - yamlFallbacks.Clear(); - foreach (var df in data.fallbacks) { - if (!string.IsNullOrEmpty(df.from) && !string.IsNullOrEmpty(df.to)) { - yamlFallbacks[df.from] = df.to; - } + if (data?.diphthongs != null) { + foreach (var d in data.diphthongs) { + if (!string.IsNullOrEmpty(d.from) && !string.IsNullOrEmpty(d.to)) { + diphthongTails[d.from] = d.to; } } - } catch (Exception ex) { - Log.Error($"Failed to parse {YamlFileName}: {ex.Message}"); } + + } catch (Exception ex) { + Log.Error($"Failed to parse {file}: {ex.Message}"); } } @@ -419,6 +493,20 @@ public override void SetSinger(USinger singer) { private readonly string[] wordSeparators = new[] { " ", "_" }; private readonly string[] wordSeparator = new[] { " " }; + /// + /// A tracker to identify which phonemes were marked as glides dynamically. + /// + protected HashSet runtimeGlides = new HashSet(); + + /// + /// Flag a specific generated string as a glide during your ProcessSyllable / ProcessEnding loops. + /// + protected void glides(string alias) { + runtimeGlides.Add(alias); + } + + protected bool enableGlides = true; + /// /// Returns list of vowels /// @@ -488,7 +576,6 @@ string[] getSymbolsRaw(string lyrics) { foreach (var subword in note.lyric.Trim().ToLowerInvariant().Split(wordSeparators, StringSplitOptions.RemoveEmptyEntries)) { var subResult = dictionary.Query(subword); if (subResult == null) { - Log.Warning($"Subword '{subword}' from word '{note.lyric}' can't be found in the dictionary"); subResult = HandleWordNotFound(note); if (subResult == null) { return null; @@ -511,6 +598,16 @@ string[] getSymbolsRaw(string lyrics) { } } + /// + /// Defines whether a consonant (like a liquid or semi-vowel etc) should be placed ON the note (anchor) + /// instead of pushing backward. Will return true if dynamically flagged using glides() or TryAddPhoneme(). + /// + protected virtual bool IsGlide(string alias) { + return runtimeGlides.Contains(alias) && enableGlides; + } + + protected virtual bool NoGap => true; + /// /// Instead of changing symbols in cmudict itself for each reclist, /// you may leave it be and provide symbol replacements with this method. @@ -521,6 +618,8 @@ protected virtual Dictionary GetDictionaryPhonemesReplacement() } private string[] backupVowels = null; private string[] backupConsonants = null; + private Dictionary backupDiphthongTails = null; + private Dictionary backupDiphthongSplits = null; private Dictionary backupDictionaryReplacements = null; /// @@ -745,6 +844,45 @@ protected double GetTransitionBasicLengthMsByConstant() { return TransitionBasicLengthMs * GetTempoNoteLengthFactor(); } + protected virtual double GetTransitionMultiplier(string alias) { + if (alias != null && PhonemeOverrides != null && PhonemeOverrides.TryGetValue(alias, out double overrideRatio)) { + return overrideRatio; + } + return 1.0; + } + + /// + /// Uses Preutterance length + /// + protected virtual double GetTransitionBasicLengthMs(string alias, int tone, PhonemeAttributes attr) { + return GetTransitionBasicLengthMs(alias); + } + + /// + /// OTO HELPER: Calculates transition length based on the mapped Oto's Preutterance. + /// + protected double GetTransitionBasicLengthMsByOto(string alias, int tone = 0, PhonemeAttributes attr = default) { + if (string.IsNullOrEmpty(alias)) return GetTransitionBasicLengthMsByConstant(); + + string color = attr.voiceColor ?? string.Empty; + string alt = attr.alternate?.ToString() ?? string.Empty; + int toneShift = attr.toneShift; + + var validatedAlias = ValidateAliasIfNeeded(alias, tone + toneShift); + var mappedAlias = MapPhoneme(validatedAlias, tone + toneShift, color, alt, singer); + + if (singer.TryGetMappedOto(mappedAlias, tone + toneShift, out var oto)) { + // If overlap is negative, add that absolute duration to the preutterance + // to ensure the entire consonant timing is preserved. + if (oto.Overlap < 0) { + return oto.Preutter - oto.Overlap; + } + return oto.Preutter; + } + + return GetTransitionBasicLengthMsByConstant(); + } + /// /// a note length modifier, from 1 to 0.3. Used to make transition notes shorter on high tempo /// @@ -753,22 +891,68 @@ protected double GetTempoNoteLengthFactor() { return (300 - Math.Clamp(bpm, 90, 300)) / (300 - 90) / 3 + 0.33; } + protected virtual IG2p[] GetBaseG2ps() { + return Array.Empty(); + } + protected virtual IG2p LoadBaseDictionary() { - var dictionaryName = GetDictionaryName(); - var filename = Path.Combine(DictionariesPath, dictionaryName); - var dictionaryText = File.ReadAllText(filename); - var builder = G2pDictionary.NewBuilder(); - var vowels = GetVowels(); - foreach (var vowel in vowels) { - builder.AddSymbol(vowel, true); + var g2ps = new List(); + + // Native YAML Dictionary Logic + if (!string.IsNullOrEmpty(YamlFileName)) { + string path = Path.Combine(PluginDir, YamlFileName); + + // Write template if missing + if (!File.Exists(path) && YamlTemplate != null) { + Directory.CreateDirectory(PluginDir); + File.WriteAllBytes(path, YamlTemplate); + } + + // Load dictionary from Singer Folder (Highest Priority) + if (singer != null && singer.Found && singer.Loaded) { + string file = Path.Combine(singer.Location, YamlFileName); + if (File.Exists(file)) { + try { + g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build()); + } catch (Exception e) { + Log.Error(e, $"Failed to load {file}"); + } + } + } + + // Load dictionary from Plugin Folder (Fallback Priority) + if (File.Exists(path)) { + try { + g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build()); + } catch (Exception e) { + Log.Error(e, $"Failed to load {path}"); + } + } + } + // Legacy Text Dictionary Logic (if child uses GetDictionaryName instead of YAML) + else { + var dictionaryName = GetDictionaryName(); + if (!string.IsNullOrEmpty(dictionaryName)) { + var filename = Path.Combine(DictionariesPath, dictionaryName); + if (File.Exists(filename)) { + var dictionaryText = File.ReadAllText(filename); + var builder = G2pDictionary.NewBuilder(); + foreach (var vowel in GetVowels()) builder.AddSymbol(vowel, true); + foreach (var consonant in GetConsonants()) builder.AddSymbol(consonant, false); + builder.AddEntry("a", new string[] { "a" }); + ParseDictionary(dictionaryText, builder); + g2ps.Add(builder.Build()); + } + } } - var consonants = GetConsonants(); - foreach (var consonant in consonants) { - builder.AddSymbol(consonant, false); + + // Append the Child-Specific G2P Models (e.g., ArpabetPlusG2p) + var childG2ps = GetBaseG2ps(); + if (childG2ps != null && childG2ps.Any()) { + g2ps.AddRange(childG2ps); } - builder.AddEntry("a", new string[] { "a" }); - ParseDictionary(dictionaryText, builder); - return builder.Build(); + + return new G2pFallbacks(g2ps.ToArray()); } /// @@ -811,6 +995,14 @@ protected bool IsShort(Ending ending) { return TickToMs(ending.duration) < GetTransitionBasicLengthMs() * 2; } + /// + /// Native API for child phonemizers to automatically apply expressions (vel, alt, clr, etc.) + /// This is called internally after all phonemes are generated and aligned, right before returning to the engine. + /// + protected virtual void CustomParameters(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours, Phoneme[] phonemes) { + // Base implementation does nothing. Child classes override this to implement custom logic. + } + /// /// Checks if mapped and validated alias exists in oto /// @@ -838,6 +1030,20 @@ protected bool TryAddPhoneme(List sourcePhonemes, int tone, params strin return false; } + /// + /// Appends a phoneme and optionally marks it as a glide simultaneously. + /// + protected bool TryAddPhoneme(List sourcePhonemes, int tone, bool isGlide, params string[] targetPhonemes) { + foreach (var phoneme in targetPhonemes) { + if (HasOto(phoneme, tone)) { + sourcePhonemes.Add(phoneme); + if (isGlide) glides(phoneme); + return true; + } + } + return false; + } + /// /// if true, you can put phoneme as null so the previous alias will be extended /// @@ -894,16 +1100,22 @@ protected bool AreTonesFromTheSameSubbank(int tone1, int tone2) { protected Dictionary yamlFallbacks = new Dictionary(); protected List consExceptions = new List(); + protected Dictionary diphthongTails = new Dictionary(); + protected Dictionary diphthongSplits = new Dictionary(); + public class YAMLData { public string version { get; set; } + public bool? isglides { get; set; } public SymbolData[] symbols { get; set; } = Array.Empty(); public Replacement[] replacements { get; set; } = Array.Empty(); public Fallbacks[] fallbacks { get; set; } = Array.Empty(); public Timings[] timings { get; set; } = Array.Empty(); + public DiphthongData[] diphthongs { get; set; } = Array.Empty(); public struct SymbolData { public string symbol { get; set; } public string type { get; set; } } public struct Fallbacks { public string from { get; set; } public string to { get; set; } } public struct Timings { public string symbol { get; set; } public double value { get; set; } } + public struct DiphthongData { public string from { get; set; } public string to { get; set; } } } public class Replacement { @@ -914,7 +1126,7 @@ public class Replacement { public List FromList { get { if (from is string s) return new List { s }; - if (from is IEnumerable list) return list.Select(x => x.ToString()).ToList(); + if (from is IEnumerable list) return list.Select(x => x.ToString() ?? "null").ToList(); return new List(); } } @@ -922,7 +1134,7 @@ public List FromList { public List ToList { get { if (to is string s) return new List { s }; - if (to is IEnumerable list) return list.Select(x => x.ToString()).ToList(); + if (to is IEnumerable list) return list.Select(x => x.ToString() ?? "null").ToList(); return new List(); } } @@ -932,17 +1144,21 @@ public List ToList { protected List splittingReplacements = new List(); protected virtual bool IsGroupKeyword(string rulePhoneme) { - string baseGroup = rulePhoneme.Split(new[] { '!', '=', '+' })[0]; + // Trim parentheses so "(vowel)" evaluates identically to "vowel" + string cleanRule = rulePhoneme.Trim('(', ')'); + string baseGroup = cleanRule.Split(new[] { '!', '=', '&' })[0]; return new[] { "vowel", "vowels", "consonant", "consonants", "affricate", "fricative", "aspirate", "semivowel", "liquid", "nasal", "stop", "tap" }.Contains(baseGroup); } protected virtual bool IsGroupMatch(string rulePhoneme, string actualPhoneme) { - string baseGroup = rulePhoneme.Split(new[] { '!', '=', '+' })[0]; - if (rulePhoneme.Contains("+")) { - string added = rulePhoneme.Substring(rulePhoneme.IndexOf('+') + 1).Split(new[] { '!', '=' })[0]; - // If it matches another group name, or a literal letter, it passes + string cleanRule = rulePhoneme.Trim('(', ')'); + string baseGroup = cleanRule.Split(new[] { '!', '=', '&' })[0]; + + // Replaced '+' with '&' for group addition + if (cleanRule.Contains("&")) { + string added = cleanRule.Substring(cleanRule.IndexOf('&') + 1).Split(new[] { '!', '=' })[0]; foreach (string inc in added.Split(',')) { if (IsGroupKeyword(inc) ? IsGroupMatch(inc, actualPhoneme) : inc == actualPhoneme) { return true; @@ -950,7 +1166,6 @@ protected virtual bool IsGroupMatch(string rulePhoneme, string actualPhoneme) { } } - // BASE GROUP: If it wasn't an addition, it must belong to the base group. bool inBaseGroup = false; switch (baseGroup) { case "vowel": case "vowels": inBaseGroup = GetVowels().Contains(actualPhoneme); break; @@ -967,15 +1182,13 @@ protected virtual bool IsGroupMatch(string rulePhoneme, string actualPhoneme) { if (!inBaseGroup) return false; - // EXCLUSIONS (!): Reject if it's in the excluded list. - if (rulePhoneme.Contains("!")) { - string excluded = rulePhoneme.Substring(rulePhoneme.IndexOf('!') + 1).Split(new[] { '=', '+' })[0]; + if (cleanRule.Contains("!")) { + string excluded = cleanRule.Substring(cleanRule.IndexOf('!') + 1).Split(new[] { '=', '&' })[0]; if (excluded.Split(',').Contains(actualPhoneme)) return false; } - // RESTRICTIONS (=): Reject if an equals list exists, and the phoneme isn't in it. - if (rulePhoneme.Contains("=")) { - string restricted = rulePhoneme.Substring(rulePhoneme.IndexOf('=') + 1).Split(new[] { '!', '+' })[0]; + if (cleanRule.Contains("=")) { + string restricted = cleanRule.Substring(cleanRule.IndexOf('=') + 1).Split(new[] { '!', '&' })[0]; if (!restricted.Split(',').Contains(actualPhoneme)) return false; } @@ -998,25 +1211,23 @@ protected virtual List ApplyReplacements(List inputPhonemes, boo bool replaced = false; foreach (var rule in validRules) { - string[] fromArray = null; - if (rule.from is IList fromList) { - fromArray = fromList.Cast().Select(x => x?.ToString()).ToArray(); - } else if (rule.from is string[] strArr) { - fromArray = strArr; - } - - if (fromArray != null && fromArray.Length > 0 && idx + fromArray.Length <= inputPhonemes.Count) { + List fromArray = rule.FromList; + + if (fromArray != null && fromArray.Count > 0 && idx + fromArray.Count <= inputPhonemes.Count) { bool match = true; - var captures = new Dictionary>(); + var captures = new Dictionary>(); - for (int j = 0; j < fromArray.Length; j++) { + for (int j = 0; j < fromArray.Count; j++) { string rulePh = fromArray[j]; string actualPh = inputPhonemes[idx + j]; - if (IsGroupKeyword(rulePh)) { + string cleanRulePh = rulePh.Trim('(', ')'); + string baseRulePh = cleanRulePh.Split(new[] { '!', '=', '&' })[0]; + + if (IsGroupKeyword(baseRulePh)) { if (IsGroupMatch(rulePh, actualPh)) { - if (!captures.ContainsKey(rulePh)) captures[rulePh] = new Queue(); - captures[rulePh].Enqueue(actualPh); + if (!captures.ContainsKey(baseRulePh)) captures[baseRulePh] = new List(); + captures[baseRulePh].Add(actualPh); } else { match = false; break; } @@ -1026,56 +1237,112 @@ protected virtual List ApplyReplacements(List inputPhonemes, boo } if (match) { - string[] toArray = null; - if (rule.to is IList toList) { - toArray = toList.Cast().Select(x => x?.ToString()).ToArray(); - } else if (rule.to is string[] strArr) { - toArray = strArr; - } else if (rule.to is string toStr) { - toArray = new string[] { toStr }; - } + List toArray = rule.ToList; - if (toArray != null) { + if (toArray != null && toArray.Count > 0) { + var captureIndices = new Dictionary(); + foreach (string toPh in toArray) { - finalPhonemes.Add(IsGroupKeyword(toPh) && captures.ContainsKey(toPh) && captures[toPh].Count > 0 ? captures[toPh].Dequeue() : toPh); + // Split by + for concatenation + string[] parts = toPh.Split('+'); + string[] cleanParts = new string[parts.Length]; + string baseGroupTo = null; + + for (int k = 0; k < parts.Length; k++) { + // Strip parenthesis to find the base group cleanly + string partNoParens = parts[k].Trim('(', ')'); + int cutoff = partNoParens.IndexOfAny(new[] { '!', '=', '&' }); + string potentialGroup = cutoff >= 0 ? partNoParens.Substring(0, cutoff) : partNoParens; + + if (baseGroupTo == null && IsGroupKeyword(potentialGroup)) { + baseGroupTo = potentialGroup; + cleanParts[k] = potentialGroup; // Store just the base group name + } else { + cleanParts[k] = partNoParens; // Store literals + } + } + + if (baseGroupTo != null && captures.ContainsKey(baseGroupTo) && captures[baseGroupTo].Count > 0) { + if (!captureIndices.ContainsKey(baseGroupTo)) captureIndices[baseGroupTo] = 0; + int cIdx = captureIndices[baseGroupTo]; + if (cIdx >= captures[baseGroupTo].Count) cIdx = captures[baseGroupTo].Count - 1; + + string capturedPhoneme = captures[baseGroupTo][cIdx]; + + string reconstructed = ""; + for (int k = 0; k < cleanParts.Length; k++) { + if (cleanParts[k] == baseGroupTo) { + reconstructed += capturedPhoneme; + } else { + reconstructed += cleanParts[k]; + } + } + finalPhonemes.Add(reconstructed); + captureIndices[baseGroupTo]++; + } else { + finalPhonemes.Add(string.Join("", cleanParts)); + } } } - idx += fromArray.Length; + idx += fromArray.Count; replaced = true; break; } } } + // Fallback for single-phoneme splitting rules if (!replaced && validSplits.Any()) { string currentPhoneme = inputPhonemes[idx]; bool singleReplaced = false; foreach (var rule in validSplits) { - if (rule.from is IList || rule.from is string[]) continue; + List fromArray = rule.FromList; + if (fromArray == null || fromArray.Count != 1) continue; - string rulePh = rule.from?.ToString(); - if (rulePh == null) continue; + string rulePh = fromArray[0]; + string cleanRulePh = rulePh.Trim('(', ')'); + string baseRulePh = cleanRulePh.Split(new[] { '!', '=', '&' })[0]; - if (IsGroupKeyword(rulePh) ? IsGroupMatch(rulePh, currentPhoneme) : rulePh == currentPhoneme) { + if (IsGroupKeyword(baseRulePh) ? IsGroupMatch(rulePh, currentPhoneme) : rulePh == currentPhoneme) { - string[] toArray = null; - if (rule.to is IList toList) { - toArray = toList.Cast().Select(x => x?.ToString()).ToArray(); - } else if (rule.to is string[] strArr) { - toArray = strArr; - } + List toArray = rule.ToList; - if (toArray != null) { + if (toArray != null && toArray.Count > 0) { foreach(string toPh in toArray) { - finalPhonemes.Add(toPh == rulePh ? currentPhoneme : toPh); + string[] parts = toPh.Split('+'); + string[] cleanParts = new string[parts.Length]; + string baseGroupTo = null; + + for (int k = 0; k < parts.Length; k++) { + string partNoParens = parts[k].Trim('(', ')'); + int cutoff = partNoParens.IndexOfAny(new[] { '!', '=', '&' }); + string potentialGroup = cutoff >= 0 ? partNoParens.Substring(0, cutoff) : partNoParens; + + if (baseGroupTo == null && IsGroupKeyword(potentialGroup)) { + baseGroupTo = potentialGroup; + cleanParts[k] = potentialGroup; + } else { + cleanParts[k] = partNoParens; + } + } + + if (baseGroupTo != null) { + string reconstructed = ""; + for (int k = 0; k < cleanParts.Length; k++) { + if (cleanParts[k] == baseGroupTo) { + reconstructed += currentPhoneme; + } else { + reconstructed += cleanParts[k]; + } + } + finalPhonemes.Add(reconstructed); + } else { + finalPhonemes.Add(string.Join("", cleanParts)); + } } singleReplaced = true; break; - } else if (rule.to is string toStr) { - finalPhonemes.Add(toStr == rulePh ? currentPhoneme : toStr); - singleReplaced = true; - break; } } } @@ -1096,11 +1363,12 @@ private Syllable ApplyBoundaryReplacements(Syllable syllable) { bool hasPrevV = !string.IsNullOrEmpty(syllable.prevV); bool hasV = !string.IsNullOrEmpty(syllable.v); - if (hasPrevV) currentPhonemes.Add(syllable.prevV); + currentPhonemes.Add(hasPrevV ? syllable.prevV : "null"); + if (syllable.cc != null) currentPhonemes.AddRange(syllable.cc); if (hasV) currentPhonemes.Add(syllable.v); - bool isBoundary = hasPrevV && syllable.position == 0; + bool isBoundary = (hasPrevV && syllable.position == 0) || !hasPrevV; List finalPhonemes = ApplyReplacements(currentPhonemes, isBoundary); string newPrevV = ""; @@ -1108,8 +1376,13 @@ private Syllable ApplyBoundaryReplacements(Syllable syllable) { List newCc = new List(); if (finalPhonemes.Count > 0) { - if (hasPrevV) { - newPrevV = finalPhonemes[0]; + string firstPh = finalPhonemes[0]; + + if (firstPh == "null") { + newPrevV = ""; + finalPhonemes.RemoveAt(0); + } else { + newPrevV = firstPh; finalPhonemes.RemoveAt(0); } if (hasV && finalPhonemes.Count > 0) { @@ -1142,8 +1415,7 @@ private Ending ApplyBoundaryReplacements(Ending ending) { List currentPhonemes = new List(); bool hasPrevV = !string.IsNullOrEmpty(ending.prevV); - - if (hasPrevV) currentPhonemes.Add(ending.prevV); + currentPhonemes.Add(hasPrevV ? ending.prevV : "null"); if (ending.cc != null) currentPhonemes.AddRange(ending.cc); List finalPhonemes = ApplyReplacements(currentPhonemes, true); @@ -1152,8 +1424,12 @@ private Ending ApplyBoundaryReplacements(Ending ending) { List newCc = new List(); if (finalPhonemes.Count > 0) { - if (hasPrevV) { - newPrevV = finalPhonemes[0]; + string firstPh = finalPhonemes[0]; + if (firstPh == "null") { + newPrevV = ""; + finalPhonemes.RemoveAt(0); + } else { + newPrevV = firstPh; finalPhonemes.RemoveAt(0); } newCc.AddRange(finalPhonemes); @@ -1254,33 +1530,111 @@ private List ExtractVowels(string[] symbols) { } return vowelIds; } + + private Phoneme[] MakePhonemes(List phonemeSymbols, int containerLength, int position, bool isEnding, int tone = 0, PhonemeAttributes[] attributes = null, int globalStartIndex = 0) { + var phonemes = new Phoneme[phonemeSymbols.Count]; + + int[] trueLengths = new int[phonemeSymbols.Count]; + for (int i = 1; i < phonemeSymbols.Count; i++) { + var prevPhonemeI = phonemeSymbols.Count - i; + var currentPhonemeI = phonemeSymbols.Count - i - 1; + + var nextGlobalIndex = globalStartIndex + prevPhonemeI; + var nextPAttr = attributes?.FirstOrDefault(a => a.index == nextGlobalIndex) ?? default; + + string nextAlias = phonemeSymbols[prevPhonemeI]; + string currentAlias = phonemeSymbols[currentPhonemeI]; - private Phoneme[] MakePhonemes(List phonemeSymbols, int containerLength, int position, bool isEnding) { + double baseLengthMs; + double stretch = nextPAttr.consonantStretchRatio ?? 1.0; + + // Check if the alias has a YAML or Categorical multiplier + double overrideRatio = currentAlias != null ? GetTransitionMultiplier(currentAlias) : 1.0; + + if (overrideRatio != 1.0) { + // If there's a custom multiplier, use the Constant length to prevent giant envelopes + baseLengthMs = GetTransitionBasicLengthMsByConstant(); + stretch *= overrideRatio; + } else { + // Default behavior: use OTO preutterance + baseLengthMs = GetTransitionBasicLengthMsByOto(nextAlias, tone, nextPAttr); + } + + trueLengths[i] = MsToTick(baseLengthMs * stretch); + } + + // IsGlide + int anchorI = 0; + if (!isEnding) { + for (int i = 1; i < phonemeSymbols.Count; i++) { + var phonemeI = phonemeSymbols.Count - i - 1; + if (phonemeSymbols[phonemeI] != null && IsGlide(phonemeSymbols[phonemeI])) { + anchorI = i; + } else { + break; + } + } + } - var phonemes = new Phoneme[phonemeSymbols.Count]; for (var i = 0; i < phonemeSymbols.Count; i++) { var phonemeI = phonemeSymbols.Count - i - 1; - + var globalIndex = globalStartIndex + phonemeI; var validatedAlias = phonemeSymbols[phonemeI]; + if (validatedAlias != null) { - phonemes[phonemeI].phoneme = validatedAlias; - var transitionLengthTick = MsToTick(GetTransitionBasicLengthMs(phonemes[phonemeI].phoneme)); + phonemes[phonemeI] = new Phoneme { + phoneme = validatedAlias, + index = globalIndex + }; + if (i == 0) { - if (!isEnding) { - transitionLengthTick = 0; + if (isEnding) { + var pAttr = attributes?.FirstOrDefault(a => a.index == globalIndex) ?? default; + double baseLengthMs; + double stretch = pAttr.consonantStretchRatio ?? 1.0; + + double overrideRatio = phonemes[phonemeI].phoneme != null ? GetTransitionMultiplier(phonemes[phonemeI].phoneme) : 1.0; + + if (overrideRatio != 1.0) { + baseLengthMs = GetTransitionBasicLengthMsByConstant(); + stretch *= overrideRatio; + } else { + baseLengthMs = GetTransitionBasicLengthMsByOto(phonemes[phonemeI].phoneme, tone, pAttr); + } + + phonemes[phonemeI].position = MsToTick(baseLengthMs * stretch); + + if (NoGap) { + // Snapped mode: Use a visible 50-tick anchor capped at 1/3 of the note + int targetTicks = 50; + int maxAllowed = containerLength / 3; + phonemes[phonemeI].position = System.Math.Min(targetTicks, maxAllowed); + } else { + // Natural mode: Use the full Preutterance + phonemes[phonemeI].position = MsToTick(baseLengthMs); + } } else { - transitionLengthTick *= 2; + int sum = 0; + for (int k = 1; k <= anchorI; k++) { + sum += trueLengths[k]; + } + phonemes[phonemeI].position = -sum; } + } else { + // VC transitions keep their full stretched length + phonemes[phonemeI].position = trueLengths[i]; } - // yet it's actually a length; will became position in ScalePhonemes - phonemes[phonemeI].position = transitionLengthTick; } else { - phonemes[phonemeI].phoneme = null; - phonemes[phonemeI].position = 0; + // Initialize empty slots properly to avoid null crashes + phonemes[phonemeI] = new Phoneme { + phoneme = null, + position = 0, + index = globalIndex + }; } } - - return ScalePhonemes(phonemes, position, isEnding ? phonemeSymbols.Count : phonemeSymbols.Count - 1, containerLength); + + return ScalePhonemes(phonemes, position, isEnding ? phonemeSymbols.Count - 1 : phonemeSymbols.Count - 1, containerLength); } private string ValidateAliasIfNeeded(string alias, int tone) { @@ -1292,18 +1646,23 @@ private string ValidateAliasIfNeeded(string alias, int tone) { private Phoneme[] ScalePhonemes(Phoneme[] phonemes, int startPosition, int phonemesCount, int containerLengthTick = -1) { var offset = 0; - // reserved length for prev vowel, double length of a transition; - var containerSafeLengthTick = MsToTick(GetTransitionBasicLengthMsByConstant() * 2); var lengthModifier = 1.0; + if (containerLengthTick > 0) { var allTransitionsLengthTick = phonemes.Sum(n => n.position); - if (allTransitionsLengthTick + containerSafeLengthTick > containerLengthTick) { - lengthModifier = (double)containerLengthTick / (allTransitionsLengthTick + containerSafeLengthTick); + + // Instead of a fixed "Constant * 2", use a proportional limit. + // This allows transitions to occupy up to 80% of the note. + var maxAllowedConsonantTick = (int)(containerLengthTick * 0.8); + + if (allTransitionsLengthTick > maxAllowedConsonantTick) { + lengthModifier = (double)maxAllowedConsonantTick / allTransitionsLengthTick; } } for (var i = phonemes.Length - 1; i >= 0; i--) { - var finalLengthTick = (int)(phonemes[i].position * lengthModifier) / 5 * 5; + if (phonemes[i].phoneme == null) continue; + var finalLengthTick = (int)(phonemes[i].position * lengthModifier); phonemes[i].position = startPosition - finalLengthTick - offset; offset += finalLengthTick; } diff --git a/OpenUtau.Test/Plugins/DeVccvTest.cs b/OpenUtau.Test/Plugins/DeVccvTest.cs index 44789c6cb..6e7adf00e 100644 --- a/OpenUtau.Test/Plugins/DeVccvTest.cs +++ b/OpenUtau.Test/Plugins/DeVccvTest.cs @@ -23,7 +23,7 @@ protected override Phonemizer CreatePhonemizer() { [InlineData("de_vccv", new string[] { "Mond", "+", "+", "+", "Licht", "+" }, new string[] { "G3", "D3", "G3", "G3", "D3", "G3" }, - new string[] { "- moG3", "onG3", "nt -G3", "t lG3", "lID3", "ICG3", "Ct -G3" })] + new string[] { "- moG3", "onG3", "nt -G3", "t lG3", "lID3", "ICD3", "Ct -G3" })] public void PhonemizeTest(string singerName, string[] lyrics, string[] tones, string[] aliases) { RunPhonemizeTest(singerName, lyrics, RepeatString(lyrics.Length, ""), tones, RepeatString(lyrics.Length, ""), aliases); }