Vietnamese optimizations
* Simplified the layout to reduce the number of suggestions per key and to be more consistent with the original Nokia way * a migration to enforce the new layout
This commit is contained in:
parent
ef32d5adaa
commit
caac97c10a
7 changed files with 7364 additions and 7353 deletions
|
|
@ -4,11 +4,11 @@ dictionaryFile: vi-utf8.csv
|
||||||
layout:
|
layout:
|
||||||
- [SPECIAL] # 0
|
- [SPECIAL] # 0
|
||||||
- [PUNCTUATION] # 1
|
- [PUNCTUATION] # 1
|
||||||
- [a, b, c, ă, â, á, ắ, ấ, à, ằ, ầ, ả, ẳ, ẩ, ã, ẵ, ẫ, ạ, ặ, ậ] # 2
|
- [a, ă, â, b, c, ́] # 2
|
||||||
- [d, e, f, đ, ê, é, ế, è, ề, ẻ, ể, ẽ, ễ, ẹ, ệ] # 3
|
- [d, đ, e, ê, f, ̀] # 3
|
||||||
- [g, h, i, í, ì, ỉ, ĩ, ị] # 4
|
- [g, h, i, ̉] # 4
|
||||||
- [j, k, l] # 5
|
- [j, k, l, ̃] # 5
|
||||||
- [m, n, o, ô, ơ, ó, ố, ớ, ò, ồ, ờ, ỏ, ổ, ở, õ, ỗ, ỡ, ọ, ộ, ợ] # 6
|
- [m, n, o, ô, ơ, ̣] # 6
|
||||||
- [p, q, r, s] # 7
|
- [p, q, r, s] # 7
|
||||||
- [t, u, v, ư, ú, ứ, ù, ừ, ủ, ử, ũ, ữ, ụ, ự] # 8
|
- [t, u, ư, v] # 8
|
||||||
- [w, x, y, z, ý, ỳ, ỷ, ỹ, ỵ] # 9
|
- [w, x, y, z] # 9
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -21,6 +21,11 @@ class Migration {
|
||||||
" SET maxWordsPerSequence = " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT +
|
" SET maxWordsPerSequence = " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT +
|
||||||
", fileHash = '0'",
|
", fileHash = '0'",
|
||||||
832
|
832
|
||||||
|
),
|
||||||
|
new Migration(
|
||||||
|
// enforce the new Vietnamese layout
|
||||||
|
"DELETE FROM " + Tables.LANGUAGES_META + " WHERE langId = 481590",
|
||||||
|
952
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,11 +13,14 @@ import io.github.sspanak.tt9.languages.LanguageKind;
|
||||||
|
|
||||||
|
|
||||||
class LocaleWordsSorter {
|
class LocaleWordsSorter {
|
||||||
|
private final Language language;
|
||||||
private final Pattern sortingPattern;
|
private final Pattern sortingPattern;
|
||||||
|
|
||||||
|
|
||||||
LocaleWordsSorter(@Nullable Language language) {
|
LocaleWordsSorter(@Nullable Language language) {
|
||||||
sortingPattern = LanguageKind.isIndic(language) ? Pattern.compile("\\p{L}\\p{M}+") : null;
|
this.language = language;
|
||||||
|
boolean isAlphabetWithModifiers = LanguageKind.isIndic(language) || LanguageKind.isVietnamese(language);
|
||||||
|
sortingPattern = isAlphabetWithModifiers ? Pattern.compile("\\p{L}\\p{M}+") : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -49,7 +52,9 @@ class LocaleWordsSorter {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
boolean shouldSort(@Nullable Language language, @NonNull String stem, @NonNull String digitSequence) {
|
boolean shouldSort(@NonNull String stem, @NonNull String digitSequence) {
|
||||||
return LanguageKind.isIndic(language) && !stem.isEmpty() && stem.length() == digitSequence.length() - 1;
|
return
|
||||||
|
(LanguageKind.isIndic(language) && !stem.isEmpty() && stem.length() == digitSequence.length() - 1)
|
||||||
|
|| LanguageKind.isVietnamese(language);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,7 @@ public class WordPredictions extends Predictions {
|
||||||
words.addAll(dbWords);
|
words.addAll(dbWords);
|
||||||
} else {
|
} else {
|
||||||
suggestStem();
|
suggestStem();
|
||||||
dbWords = localeWordsSorter.shouldSort(language, stem, digitSequence) ? localeWordsSorter.sort(dbWords) : dbWords;
|
dbWords = localeWordsSorter.shouldSort(stem, digitSequence) ? localeWordsSorter.sort(dbWords) : dbWords;
|
||||||
dbWords = rearrangeByPairFrequency(dbWords);
|
dbWords = rearrangeByPairFrequency(dbWords);
|
||||||
suggestMissingWords(generatePossibleStemVariations(dbWords));
|
suggestMissingWords(generatePossibleStemVariations(dbWords));
|
||||||
suggestMissingWords(dbWords.isEmpty() ? generateWordVariations(inputWord) : dbWords);
|
suggestMissingWords(dbWords.isEmpty() ? generateWordVariations(inputWord) : dbWords);
|
||||||
|
|
|
||||||
|
|
@ -20,4 +20,5 @@ public class LanguageKind {
|
||||||
public static boolean isIndic(Language language) { return isGujarati(language) || isHindi(language); }
|
public static boolean isIndic(Language language) { return isGujarati(language) || isHindi(language); }
|
||||||
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
|
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
|
||||||
public static boolean isUkrainian(Language language) { return language != null && language.getId() == 54645; }
|
public static boolean isUkrainian(Language language) { return language != null && language.getId() == 54645; }
|
||||||
|
public static boolean isVietnamese(Language language) { return language != null && language.getId() == 481590; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue