From 4de2fdbd3daa0c5cf4a95224d59ba962ff18cff9 Mon Sep 17 00:00:00 2001 From: Kevin Martin Jose Date: Sat, 7 Mar 2015 23:24:08 +0530 Subject: [PATCH 1/3] masking and inscript working --- api.h | 9 +++++++++ schemes/ml-inscript | 8 +++++++- symbol-table.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ symbol-table.h | 3 +++ varnam.c | 25 ++++++++++++++++++++++++ varnamc | 6 ++++++ varnamruby.rb | 3 ++- vtypes.h | 1 + 8 files changed, 99 insertions(+), 2 deletions(-) diff --git a/api.h b/api.h index a18e9df..de85a14 100644 --- a/api.h +++ b/api.h @@ -325,6 +325,15 @@ varnam_get_all_tokens( varray **tokens ); +/*Copies all word breakers in the symbol table to list +word breakers are used in the libvarnam-ibus project to denote the ending of a word. +However, each scheme file can use different set of characters as word breakers, as +specified in the scheme file. For example, see ml-inscript +*/ +int +varnam_word_breakers(varnam *handle, char *list, int max_count); + + /** * Enable logging. * diff --git a/schemes/ml-inscript b/schemes/ml-inscript index b4ae9c7..0b50191 100644 --- a/schemes/ml-inscript +++ b/schemes/ml-inscript @@ -16,7 +16,13 @@ infer_dead_consonants false $zwnj = "\u{200c}" $zwj = "\u{200d}" - +word_breakers "." => ".", + "," => ",", + "?" => "?", + "!" => "!", + "(" => "(", + ")" => ")" + vowels "D" => "അ", "E" => "ആ", "F" => "ഇ", diff --git a/symbol-table.c b/symbol-table.c index f748fed..9f4e16b 100644 --- a/symbol-table.c +++ b/symbol-table.c @@ -629,6 +629,52 @@ vst_add_metadata (varnam *handle, const char* key, const char* value) return VARNAM_SUCCESS; } +int +vst_get_word_breakers(varnam *handle, strbuf *list) +{ + int rc; + sqlite3 *db; + sqlite3_stmt *stmt; + char *sql = "select pattern from symbols where type=?1"; + + db = handle->internal->db; + + rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); + if(rc != SQLITE_OK) + { + set_last_error(handle, "Failed to prepare statement : %s", sqlite3_errmsg(db)); + sqlite3_finalize(stmt); + return VARNAM_ERROR; + } + + rc = sqlite3_bind_int(stmt, 1, VARNAM_WORD_BREAKER); + if(rc != SQLITE_OK) + { + sqlite3_finalize(stmt); + set_last_error(handle, "Could not bind : %s", sqlite3_errmsg(db)); + return VARNAM_ERROR; + } + + rc = sqlite3_step(stmt); + + while(rc == SQLITE_ROW) + { + strbuf_add(list, sqlite3_column_text(stmt, 0)); + printf("%s\n", strbuf_to_s(list)); + rc = sqlite3_step(stmt); + } + + if(rc != SQLITE_DONE) + { + set_last_error(handle, "%s", sqlite3_errmsg(db)); + sqlite3_finalize(stmt); + return VARNAM_ERROR; + } + + sqlite3_finalize(stmt); + return VARNAM_SUCCESS; +} + int vst_load_scheme_details(varnam *handle, vscheme_details *output) { diff --git a/symbol-table.h b/symbol-table.h index d1100d5..e8e9d7d 100644 --- a/symbol-table.h +++ b/symbol-table.h @@ -107,4 +107,7 @@ vst_stamp_version (varnam *handle); int vst_load_scheme_details(varnam *handle, vscheme_details *output); +int +vst_get_word_breakers(varnam *handle, strbuf *list); + #endif diff --git a/varnam.c b/varnam.c index 1267733..e1d5ad3 100644 --- a/varnam.c +++ b/varnam.c @@ -524,6 +524,31 @@ varnam_get_all_scheme_details() return schemeDetails; } +/*For use with ibus*/ +/*To Do : Document properly*/ +/*allocated - size already allocated to char *word_breakers*/ +int +varnam_word_breakers(varnam *handle, char *word_breakers, int allocated) +{ + int rc; + strbuf *list = get_pooled_string(handle); + + rc = vst_get_word_breakers(handle, list); + if(rc != VARNAM_SUCCESS) + { + set_last_error(handle, "Could not obtain word breakers"); + return VARNAM_ERROR; + } + else + { + if(list->length > allocated) + word_breakers = (char*)realloc(word_breakers, allocated + (list->length - allocated + 1)); + + strcpy(word_breakers, strbuf_to_s(list)); + return VARNAM_SUCCESS; + } +} + int varnam_get_scheme_details(varnam *handle, vscheme_details **details) { diff --git a/varnamc b/varnamc index 09e8988..691f258 100755 --- a/varnamc +++ b/varnamc @@ -1377,5 +1377,11 @@ def exceptions_stem(hash, options={}) end end end + +def word_breakers(options={}, hash) + _ensure_sanity(hash) + _create_token(hash, Varnam::VARNAM_WORD_BREAKER, options) +end + do_action diff --git a/varnamruby.rb b/varnamruby.rb index 3245c0d..f8ffbaf 100644 --- a/varnamruby.rb +++ b/varnamruby.rb @@ -83,7 +83,8 @@ module Varnam VARNAM_TOKEN_OTHER = 10 VARNAM_TOKEN_NON_JOINER = 11 VARNAM_TOKEN_JOINER = 12 - + VARNAM_WORD_BREAKER = 13 + VARNAM_MATCH_EXACT = 1 VARNAM_MATCH_POSSIBILITY = 2 diff --git a/vtypes.h b/vtypes.h index 5655fc9..a580981 100644 --- a/vtypes.h +++ b/vtypes.h @@ -38,6 +38,7 @@ #define VARNAM_TOKEN_OTHER 10 #define VARNAM_TOKEN_NON_JOINER 11 #define VARNAM_TOKEN_JOINER 12 +#define VARNAM_WORD_BREAKER 13 /* token flags */ #define VARNAM_TOKEN_FLAGS_MORE_MATCHES_FOR_PATTERN (1 << 0) From a431811af9641827554f69983a1f2309fa44215a Mon Sep 17 00:00:00 2001 From: Kevin Martin Jose Date: Wed, 25 Mar 2015 21:48:52 +0530 Subject: [PATCH 2/3] Added word breakers to schemes/ml --- schemes/ml | 7 +++++++ schemes/ml-inscript | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/schemes/ml b/schemes/ml index 6017f25..6e04715 100644 --- a/schemes/ml +++ b/schemes/ml @@ -17,6 +17,13 @@ virama "~" => "്" infer_dead_consonants true +word_breakers "." => ".", + "," => ",", + "?" => "?", + "!" => "!", + "(" => "(", + ")" => ")" + vowels "a" => "അ", [["a"], "aa", "A"] => ["ആ", "ാ"], "i" => ["ഇ", "ി"], diff --git a/schemes/ml-inscript b/schemes/ml-inscript index 0b50191..738e82e 100644 --- a/schemes/ml-inscript +++ b/schemes/ml-inscript @@ -16,6 +16,11 @@ infer_dead_consonants false $zwnj = "\u{200c}" $zwj = "\u{200d}" +#word_breakers are symbols that denote the end +#of the sentence the user is typing. When a word +#breaker is encountered, Ibus commits the typed word +#and begins a new word + word_breakers "." => ".", "," => ",", "?" => "?", From 318e6a778a2a6b8ec64d69b475bac9f9f4686949 Mon Sep 17 00:00:00 2001 From: Kevin Martin Jose Date: Wed, 15 Apr 2015 23:27:24 +0530 Subject: [PATCH 3/3] Hack to make inscript learn the chills --- schemes/ml-inscript | 21 +++++++++++++++++---- varnamc | 4 ++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/schemes/ml-inscript b/schemes/ml-inscript index 738e82e..82d0621 100644 --- a/schemes/ml-inscript +++ b/schemes/ml-inscript @@ -93,7 +93,18 @@ consonants "k" => "ക", "J" => "റ", "#" => "്ര", "&" => "ക്ഷ", - "=" => "ൃ" + "=" => "ൃ", + "ൻ" => "ൻ", + "ൺ" => "ൺ", + "ൽ" => "ൽ", + "ൾ" => "ൾ", + "ർ" => "ർ" +#The above chill maps are necessary due to a bug +#inscript treats atomic chill as a token +#However, the token is not in the vst symbols table +#This somehow makes varnam assign the type '10' (VARNAM_TOKEN_OTHER) to the chill +#If a word contains tokens of type 10, it is not learned. +#So the absurd non-sensical chills stay there for the time being numbers "1" => "൧", "2" => "൨", @@ -108,8 +119,10 @@ numbers "1" => "൧", symbols "_" => "ഃ" -others "]" => $zwj, - "\\" => $zwnj +#non-joiner "\\" => $zwnj +joiner "]" => $zwj + + @@ -128,4 +141,4 @@ others "]" => $zwj, - \ No newline at end of file + diff --git a/varnamc b/varnamc index e5a2bde..bbad1bd 100755 --- a/varnamc +++ b/varnamc @@ -830,13 +830,13 @@ end def non_joiner(hash) _ensure_sanity(hash) - _create_token(hash, Varnam::VARNAM_TOKEN_NON_JOINER); + _create_token(hash, Varnam::VARNAM_TOKEN_NON_JOINER) $overridden_default_symbols.push Varnam::VARNAM_TOKEN_NON_JOINER end def joiner(hash) _ensure_sanity(hash) - _create_token(hash, Varnam::VARNAM_TOKEN_JOINER); + _create_token(hash, Varnam::VARNAM_TOKEN_JOINER) $overridden_default_symbols.push Varnam::VARNAM_TOKEN_JOINER end