Skip to content

Commit be98305

Browse files
authored
[runtime] rename vocab to g2p prosody vocab (#239)
1 parent b917a5b commit be98305

3 files changed

Lines changed: 15 additions & 16 deletions

File tree

runtime/core/bin/tts_main.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ DEFINE_string(vits_flags, "", "vits flags file");
3333
DEFINE_string(tagger, "", "tagger fst file");
3434
DEFINE_string(verbalizer, "", "verbalizer fst file");
3535

36-
// Tokenizer
37-
DEFINE_string(vocab, "", "tokenizer vocab file");
38-
3936
// G2P for English
4037
DEFINE_string(cmudict, "", "cmudict for english words");
4138
DEFINE_string(g2p_en_model, "", "english g2p fst model for oov");
@@ -46,6 +43,7 @@ DEFINE_string(char2pinyin, "", "chinese character to pinyin");
4643
DEFINE_string(pinyin2id, "", "pinyin to id");
4744
DEFINE_string(pinyin2phones, "", "pinyin to phones");
4845
DEFINE_string(g2p_prosody_model, "", "g2p prosody model file");
46+
DEFINE_string(g2p_prosody_vocab, "", "g2p prosody vocab file");
4947

5048
// VITS
5149
DEFINE_string(speaker2id, "", "speaker to id");
@@ -77,8 +75,8 @@ int main(int argc, char* argv[]) {
7775
: nullptr;
7876

7977
auto g2p_prosody = std::make_shared<wetts::G2pProsody>(
80-
FLAGS_g2p_prosody_model, FLAGS_vocab, FLAGS_char2pinyin, FLAGS_pinyin2id,
81-
FLAGS_pinyin2phones, g2p_en);
78+
FLAGS_g2p_prosody_model, FLAGS_g2p_prosody_vocab, FLAGS_char2pinyin,
79+
FLAGS_pinyin2id, FLAGS_pinyin2phones, g2p_en);
8280
auto model = std::make_shared<wetts::TtsModel>(
8381
FLAGS_vits_encoder_model, FLAGS_vits_decoder_model, FLAGS_speaker2id,
8482
FLAGS_phone2id, FLAGS_sampling_rate, tn, g2p_prosody, FLAGS_chunk_size,

runtime/core/frontend/g2p_prosody.cc

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,17 @@
3131
namespace wetts {
3232

3333
G2pProsody::G2pProsody(const std::string& g2p_prosody_model,
34-
const std::string& vocab,
34+
const std::string& g2p_prosody_vocab,
3535
const std::string& lexicon_file,
3636
const std::string& pinyin2id,
3737
const std::string& pinyin2phones,
3838
std::shared_ptr<G2pEn> g2p_en)
3939
: g2p_en_(std::move(g2p_en)), model_(g2p_prosody_model) {
40-
std::ifstream in(vocab);
40+
std::ifstream in(g2p_prosody_vocab);
4141
std::string line;
4242
int id = 0;
4343
while (getline(in, line)) {
44-
vocab_[line] = id;
44+
g2p_vocab_[line] = id;
4545
id++;
4646
}
4747
lexicon_ = std::make_shared<Lexicon>(lexicon_file);
@@ -61,7 +61,7 @@ void G2pProsody::Tokenize(const std::vector<std::string>& words,
6161
std::vector<int64_t>* token_ids,
6262
std::vector<int>* token_offsets) {
6363
token_ids->clear();
64-
token_ids->emplace_back(vocab_.at(CLS_));
64+
token_ids->emplace_back(g2p_vocab_.at(CLS_));
6565
token_offsets->clear();
6666
int offset = 1; // 0 is taken by CLS_
6767
for (const std::string& word : words) {
@@ -71,20 +71,20 @@ void G2pProsody::Tokenize(const std::vector<std::string>& words,
7171
std::vector<std::string> chars;
7272
SplitUTF8StringToChars(word, &chars);
7373
for (const std::string& ch : chars) {
74-
token_ids->emplace_back(vocab_.at(ch));
74+
token_ids->emplace_back(g2p_vocab_.at(ch));
7575
offset++;
7676
}
7777
} else if (word[0] < 128 && std::isalnum(word[0])) {
7878
// English or digit word, Convert english word to UNK
79-
token_ids->emplace_back(vocab_.at(UNK_));
79+
token_ids->emplace_back(g2p_vocab_.at(UNK_));
8080
offset++;
8181
} else {
82-
std::string v = vocab_.find(word) != vocab_.end() ? word : UNK_;
83-
token_ids->emplace_back(vocab_.at(v));
82+
std::string v = g2p_vocab_.find(word) != g2p_vocab_.end() ? word : UNK_;
83+
token_ids->emplace_back(g2p_vocab_.at(v));
8484
offset++;
8585
}
8686
}
87-
token_ids->emplace_back(vocab_.at(SEP_));
87+
token_ids->emplace_back(g2p_vocab_.at(SEP_));
8888
}
8989

9090
void G2pProsody::Forward(const std::vector<std::string>& words,

runtime/core/frontend/g2p_prosody.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ namespace wetts {
3333
class G2pProsody {
3434
public:
3535
explicit G2pProsody(const std::string& g2p_prosody_model,
36-
const std::string& vocab, const std::string& lexicon_file,
36+
const std::string& g2p_prosody_vocab,
37+
const std::string& lexicon_file,
3738
const std::string& pinyin2id,
3839
const std::string& pinyin2phones,
3940
std::shared_ptr<G2pEn> g2p_en = nullptr);
@@ -52,7 +53,7 @@ class G2pProsody {
5253
const std::string SEP_ = "[SEP]";
5354
const std::string UNK_ = "[UNK]";
5455
OnnxModel model_;
55-
std::unordered_map<std::string, int> vocab_;
56+
std::unordered_map<std::string, int> g2p_vocab_;
5657
std::unordered_map<std::string, int> phones_;
5758
std::shared_ptr<G2pEn> g2p_en_;
5859
std::shared_ptr<Lexicon> lexicon_;

0 commit comments

Comments
 (0)