1
0
Fork 0

Vietnamese optimizations

* Simplified the layout to reduce the number of suggestions per key and to be more consistent with the original Nokia way

	* a migration to enforce the new layout
This commit is contained in:
NTT-2k5 2024-07-25 18:57:39 +07:00 committed by Dimo Karaivanov
parent ef32d5adaa
commit caac97c10a
7 changed files with 7364 additions and 7353 deletions

View file

@ -4,11 +4,11 @@ dictionaryFile: vi-utf8.csv
layout:
- [SPECIAL] # 0
- [PUNCTUATION] # 1
- [a, b, c, ă, â, á, ắ, ấ, à, ằ, ầ, ả, ẳ, ẩ, ã, ẵ, ẫ, ạ, ặ, ậ] # 2
- [d, e, f, đ, ê, é, ế, è, ề, ẻ, ể, ẽ, ễ, ẹ, ệ] # 3
- [g, h, i, í, ì, ỉ, ĩ, ị] # 4
- [j, k, l] # 5
- [m, n, o, ô, ơ, ó, ố, ớ, ò, ồ, ờ, ỏ, ổ, ở, õ, ỗ, ỡ, ọ, ộ, ợ] # 6
- [a, ă, â, b, c, ́] # 2
- [d, đ, e, ê, f, ̀] # 3
- [g, h, i, ̉] # 4
- [j, k, l, ̃] # 5
- [m, n, o, ô, ơ, ̣] # 6
- [p, q, r, s] # 7
- [t, u, v, ư, ú, ứ, ù, ừ, ủ, ử, ũ, ữ, ụ, ự] # 8
- [w, x, y, z, ý, ỳ, ỷ, ỹ, ỵ] # 9
- [t, u, ư, v] # 8
- [w, x, y, z] # 9

File diff suppressed because it is too large Load diff

View file

@ -21,6 +21,11 @@ class Migration {
" SET maxWordsPerSequence = " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT +
", fileHash = '0'",
832
),
new Migration(
// enforce the new Vietnamese layout
"DELETE FROM " + Tables.LANGUAGES_META + " WHERE langId = 481590",
952
)
};

View file

@ -13,11 +13,14 @@ import io.github.sspanak.tt9.languages.LanguageKind;
class LocaleWordsSorter {
private final Language language;
private final Pattern sortingPattern;
LocaleWordsSorter(@Nullable Language language) {
sortingPattern = LanguageKind.isIndic(language) ? Pattern.compile("\\p{L}\\p{M}+") : null;
this.language = language;
boolean isAlphabetWithModifiers = LanguageKind.isIndic(language) || LanguageKind.isVietnamese(language);
sortingPattern = isAlphabetWithModifiers ? Pattern.compile("\\p{L}\\p{M}+") : null;
}
@ -49,7 +52,9 @@ class LocaleWordsSorter {
}
boolean shouldSort(@Nullable Language language, @NonNull String stem, @NonNull String digitSequence) {
return LanguageKind.isIndic(language) && !stem.isEmpty() && stem.length() == digitSequence.length() - 1;
boolean shouldSort(@NonNull String stem, @NonNull String digitSequence) {
return
(LanguageKind.isIndic(language) && !stem.isEmpty() && stem.length() == digitSequence.length() - 1)
|| LanguageKind.isVietnamese(language);
}
}

View file

@ -101,7 +101,7 @@ public class WordPredictions extends Predictions {
words.addAll(dbWords);
} else {
suggestStem();
dbWords = localeWordsSorter.shouldSort(language, stem, digitSequence) ? localeWordsSorter.sort(dbWords) : dbWords;
dbWords = localeWordsSorter.shouldSort(stem, digitSequence) ? localeWordsSorter.sort(dbWords) : dbWords;
dbWords = rearrangeByPairFrequency(dbWords);
suggestMissingWords(generatePossibleStemVariations(dbWords));
suggestMissingWords(dbWords.isEmpty() ? generateWordVariations(inputWord) : dbWords);

View file

@ -20,4 +20,5 @@ public class LanguageKind {
public static boolean isIndic(Language language) { return isGujarati(language) || isHindi(language); }
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
public static boolean isUkrainian(Language language) { return language != null && language.getId() == 54645; }
public static boolean isVietnamese(Language language) { return language != null && language.getId() == 481590; }
}