Hindi
* Added Hindi language * Removed the hardcoded special characters from language validation. Now reading them from the .yml * improved method of hiding the letters on 0 and 1, when needed * virtual keypad adjustments * improved the single-letter validation during build time * improved Devanagari validation script * improved sorting when filters are on
This commit is contained in:
parent
622a954633
commit
f8e6668281
18 changed files with 1305176 additions and 103 deletions
15
app/languages/definitions/Hindi.yml
Normal file
15
app/languages/definitions/Hindi.yml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
locale: hi-IN
|
||||
dictionaryFile: hi-utf8.csv
|
||||
abcString: कखग
|
||||
hasUpperCase: no
|
||||
layout:
|
||||
- [SPECIAL, ॐ] # 0 ==> [SPECIAL, Om]
|
||||
- [्, ़, ऽ, ः, PUNCTUATION_IN] # 1 ==> [halant (cancel vowel sign), nuqta (converts k -> q, ph -> f, etc), avagraha, visarga]
|
||||
- [अ, आ, ब, भ, च, छ, ा] # 2 ==> [a, ā, b, bh, c, ch, ā (combining)]
|
||||
- [द, ध, ड, ढ, ए, ऐ, फ, े, ै, ॆ, ॅ] # 3 ==> [d, dh, ḍ, ḍh, ē, ai, ph, ē (combining), ai (combining), e (combining), ê (ae-combining)];
|
||||
- [ग, घ, ह, इ, ई, ि, ी] # 4 ==> [g, gh, h, i, ī, i (combining), ī (combining)]
|
||||
- [ज, झ, क, ख, ल, ळ, ऌ, ॢ] # 5 ==> [j, jh, k, kh, l, ḷ, l̥, l̥ (combining)]
|
||||
- [म, ङ, ञ, ण, न, ओ, औ, ऑ, ं, ँ, ो, ौ, ॊ, ॉ] # 6 ==> [m, ṅ, ñ, ṇ, n, o, au, ŏ, ṁ (n-combining with consonant, e.g. "ng", "nd"), m̐ (m-combining with vowel, e.g. "am", "em"), ō (combining), au (combining), o (combining), ŏ (combining)]
|
||||
- [प, र, ऋ, स, श, ष, ृ] # 7 ==> [p, r, r̥ (ri), s, sh, rsh (like "norsk"), r̥ (ri-combining)]
|
||||
- [त, थ, ट, ठ, उ, ऊ, व, ु, ू] # 8 ==> [t, th, ṭ, ṭh, u, ū, v, u (combining), ū (combining)]
|
||||
- [य] # 9 ==> [y]
|
||||
1304915
app/languages/dictionaries/hi-utf8.csv
Normal file
1304915
app/languages/dictionaries/hi-utf8.csv
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -9,6 +9,7 @@ import io.github.sspanak.tt9.languages.Language;
|
|||
import io.github.sspanak.tt9.languages.NaturalLanguage;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.util.Characters;
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
|
||||
class Mode123 extends ModePassthrough {
|
||||
@Override public int getId() { return MODE_123; }
|
||||
|
|
@ -49,8 +50,12 @@ class Mode123 extends ModePassthrough {
|
|||
* use the default list, but reorder it a bit for convenience.
|
||||
*/
|
||||
private void setDefaultSpecialCharacters() {
|
||||
KEY_CHARACTERS.add(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 0)));
|
||||
KEY_CHARACTERS.add(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 1)));
|
||||
KEY_CHARACTERS.add(
|
||||
TextTools.removeLettersFromList(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 0)))
|
||||
);
|
||||
KEY_CHARACTERS.add(
|
||||
TextTools.removeLettersFromList(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 1)))
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import io.github.sspanak.tt9.languages.LanguageKind;
|
|||
import io.github.sspanak.tt9.languages.NaturalLanguage;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.util.Characters;
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
|
||||
class ModeCheonjiin extends InputMode {
|
||||
// used when we want do display a different set of characters for a given key, for example
|
||||
|
|
@ -68,14 +69,23 @@ class ModeCheonjiin extends InputMode {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* setCustomSpecialCharacters
|
||||
* Filter out the letters from the 0-key list and add "0", because there is no other way of
|
||||
* typing it.
|
||||
*/
|
||||
protected void setCustomSpecialCharacters() {
|
||||
// special
|
||||
KEY_CHARACTERS.add(TextTools.removeLettersFromList(applyPunctuationOrder(Characters.Special, 0)));
|
||||
if (settings.holdForPunctuationInKorean()) {
|
||||
ArrayList<String> specialChars = new ArrayList<>(applyPunctuationOrder(Characters.Special, 0));
|
||||
specialChars.add(0, "0");
|
||||
KEY_CHARACTERS.add(specialChars);
|
||||
KEY_CHARACTERS.get(0).add(0, "0");
|
||||
}
|
||||
}
|
||||
|
||||
// punctuation
|
||||
KEY_CHARACTERS.add(
|
||||
TextTools.removeLettersFromList(applyPunctuationOrder(Characters.PunctuationKorean, 1))
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
protected void setSpecialCharacterConstants() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
package io.github.sspanak.tt9.ime.modes.predictions;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.languages.LanguageKind;
|
||||
|
||||
|
||||
class LocaleWordsSorter {
|
||||
private final Pattern sortingPattern;
|
||||
|
||||
|
||||
LocaleWordsSorter(@Nullable Language language) {
|
||||
if (LanguageKind.isHindi(language)) {
|
||||
sortingPattern = Pattern.compile("[\\u0904-\\u0939\\u0958-\\u0961][\\u0900-\\u0904\\u093A-\\u094F\\u0962\\u0963]+");
|
||||
} else {
|
||||
sortingPattern = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reduces the length of a word using the sortingRegex. Usually meant to consider a
|
||||
* base letter + modifiers as a single character.
|
||||
*/
|
||||
private int reduceLength(String word) {
|
||||
Matcher matcher = sortingPattern.matcher(word);
|
||||
|
||||
int length = word.length();
|
||||
while (matcher.find()) {
|
||||
length -= matcher.end() - matcher.start() - 1;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
ArrayList<String> sort(ArrayList<String> words) {
|
||||
if (sortingPattern == null || words == null) {
|
||||
return words;
|
||||
}
|
||||
|
||||
ArrayList<String> wordsCopy = new ArrayList<>(words);
|
||||
Collections.sort(wordsCopy, (a, b) -> reduceLength(a) - reduceLength(b));
|
||||
|
||||
return wordsCopy;
|
||||
}
|
||||
|
||||
|
||||
boolean shouldSort(@Nullable Language language, @NonNull String stem, @NonNull String digitSequence) {
|
||||
return LanguageKind.isIndic(language) && !stem.isEmpty() && stem.length() == digitSequence.length() - 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,12 +5,14 @@ import java.util.ArrayList;
|
|||
import io.github.sspanak.tt9.db.DataStore;
|
||||
import io.github.sspanak.tt9.ime.helpers.TextField;
|
||||
import io.github.sspanak.tt9.languages.EmojiLanguage;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.util.Characters;
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
|
||||
public class WordPredictions extends Predictions {
|
||||
private final TextField textField;
|
||||
private LocaleWordsSorter localeWordsSorter;
|
||||
|
||||
private String inputWord;
|
||||
private boolean isStemFuzzy;
|
||||
|
|
@ -21,11 +23,21 @@ public class WordPredictions extends Predictions {
|
|||
public WordPredictions(SettingsStore settings, TextField textField) {
|
||||
super(settings);
|
||||
lastEnforcedTopWord = "";
|
||||
localeWordsSorter = new LocaleWordsSorter(null);
|
||||
stem = "";
|
||||
this.textField = textField;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Predictions setLanguage(Language language) {
|
||||
super.setLanguage(language);
|
||||
localeWordsSorter = new LocaleWordsSorter(language);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public WordPredictions setIsStemFuzzy(boolean yes) {
|
||||
this.isStemFuzzy = yes;
|
||||
return this;
|
||||
|
|
@ -89,6 +101,7 @@ public class WordPredictions extends Predictions {
|
|||
words.addAll(dbWords);
|
||||
} else {
|
||||
suggestStem();
|
||||
dbWords = localeWordsSorter.shouldSort(language, stem, digitSequence) ? localeWordsSorter.sort(dbWords) : dbWords;
|
||||
dbWords = rearrangeByPairFrequency(dbWords);
|
||||
suggestMissingWords(generatePossibleStemVariations(dbWords));
|
||||
suggestMissingWords(dbWords.isEmpty() ? generateWordVariations(inputWord) : dbWords);
|
||||
|
|
|
|||
|
|
@ -6,12 +6,13 @@ public class LanguageKind {
|
|||
public static final int KOREAN = 601579;
|
||||
|
||||
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
|
||||
public static boolean isBulgarian(Language language) { return language != null && language.getId() == 231650; }
|
||||
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
|
||||
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
|
||||
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
|
||||
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
|
||||
public static boolean isHebrew(Language language) { return language != null && (language.getId() == 305450 || language.getId() == 403177); }
|
||||
public static boolean isHindi(Language language) { return language != null && language.getId() == 468264; }
|
||||
public static boolean isIndic(Language language) { return isHindi(language); }
|
||||
public static boolean isHinglish(Language language) { return language != null && language.getId() == 468421; }
|
||||
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
|
||||
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@ import androidx.annotation.NonNull;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
|
||||
import io.github.sspanak.tt9.util.Characters;
|
||||
|
|
@ -76,39 +78,24 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
|
|||
private ArrayList<String> generateSpecialChars(ArrayList<String> definitionChars) {
|
||||
final String SPECIAL_CHARS_PLACEHOLDER = "SPECIAL";
|
||||
final String PUNCTUATION_PLACEHOLDER = "PUNCTUATION";
|
||||
final String ARABIC_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_AR";
|
||||
final String FRENCH_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_FR";
|
||||
final String GERMAN_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_DE";
|
||||
final String GREEK_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_GR";
|
||||
final String KOREAN_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_KR";
|
||||
|
||||
final Map<String, List<String>> specialChars = new HashMap<>();
|
||||
specialChars.put(SPECIAL_CHARS_PLACEHOLDER, Characters.Special);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER, Characters.PunctuationEnglish);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_AR", Characters.PunctuationArabic);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FR", Characters.PunctuationFrench);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_DE", Characters.PunctuationGerman);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_GR", Characters.PunctuationGreek);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_IN", Characters.PunctuationIndic);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_KR", Characters.PunctuationKorean);
|
||||
|
||||
ArrayList<String> keyChars = new ArrayList<>();
|
||||
for (String defChar : definitionChars) {
|
||||
switch (defChar) {
|
||||
case SPECIAL_CHARS_PLACEHOLDER:
|
||||
keyChars.addAll(Characters.Special);
|
||||
break;
|
||||
case PUNCTUATION_PLACEHOLDER:
|
||||
keyChars.addAll(Characters.PunctuationEnglish);
|
||||
break;
|
||||
case ARABIC_PUNCTUATION_STYLE:
|
||||
keyChars.addAll(Characters.PunctuationArabic);
|
||||
break;
|
||||
case FRENCH_PUNCTUATION_STYLE:
|
||||
keyChars.addAll(Characters.PunctuationFrench);
|
||||
break;
|
||||
case GERMAN_PUNCTUATION_STYLE:
|
||||
keyChars.addAll(Characters.PunctuationGerman);
|
||||
break;
|
||||
case GREEK_PUNCTUATION_STYLE:
|
||||
keyChars.addAll(Characters.PunctuationGreek);
|
||||
break;
|
||||
case KOREAN_PUNCTUATION_STYLE:
|
||||
keyChars.addAll(Characters.PunctuationKorean);
|
||||
break;
|
||||
default:
|
||||
keyChars.add(defChar);
|
||||
break;
|
||||
List<String> keySpecialChars = specialChars.containsKey(defChar) ? specialChars.get(defChar) : null;
|
||||
if (keySpecialChars != null) {
|
||||
keyChars.addAll(keySpecialChars);
|
||||
} else {
|
||||
keyChars.add(defChar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -88,10 +88,6 @@ class SettingsPunctuation extends SettingsInput {
|
|||
orderedChars = language.getKeyCharacters(number);
|
||||
}
|
||||
|
||||
if (number < 2) {
|
||||
orderedChars = removeLettersFromList(orderedChars);
|
||||
}
|
||||
|
||||
return orderedChars;
|
||||
}
|
||||
|
||||
|
|
@ -118,16 +114,4 @@ class SettingsPunctuation extends SettingsInput {
|
|||
|
||||
return charsList;
|
||||
}
|
||||
|
||||
|
||||
private ArrayList<String> removeLettersFromList(ArrayList<String> list) {
|
||||
ArrayList<String> cleanList = new ArrayList<>();
|
||||
for (String s : list) {
|
||||
if (!Character.isAlphabetic(s.codePointAt(0))) {
|
||||
cleanList.add(s);
|
||||
}
|
||||
}
|
||||
|
||||
return cleanList;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ public class SettingsStore extends SettingsUI {
|
|||
public final static int SOFT_KEY_DOUBLE_CLICK_DELAY = 500; // ms
|
||||
public final static int SOFT_KEY_REPEAT_DELAY = 40; // ms
|
||||
public final static int SOFT_KEY_TITLE_MAX_CHARS = 5;
|
||||
public final static int SOFT_KEY_TITLE_MAX_CHARS_INDIC = 3;
|
||||
public final static int SOFT_KEY_TITLE_SIZE = 18; // sp
|
||||
public final static float SOFT_KEY_COMPLEX_LABEL_TITLE_RELATIVE_SIZE = 0.55f;
|
||||
public final static float SOFT_KEY_COMPLEX_LABEL_ARABIC_TITLE_RELATIVE_SIZE = 0.72f;
|
||||
|
|
|
|||
|
|
@ -144,22 +144,17 @@ public class SoftKeyNumber extends SoftKey {
|
|||
}
|
||||
|
||||
ArrayList<String> chars = language.getKeyCharacters(number);
|
||||
boolean isBulgarian = LanguageKind.isBulgarian(language);
|
||||
boolean isGreek = LanguageKind.isGreek(language);
|
||||
boolean isLatinBased = LanguageKind.isLatinBased(language);
|
||||
boolean isUkrainian = LanguageKind.isUkrainian(language);
|
||||
boolean isUppercase = tt9.getTextCase() == InputMode.CASE_UPPER;
|
||||
final int maxChars = LanguageKind.isIndic(language) ? SettingsStore.SOFT_KEY_TITLE_MAX_CHARS_INDIC : SettingsStore.SOFT_KEY_TITLE_MAX_CHARS;
|
||||
|
||||
if (
|
||||
isBulgarian
|
||||
|| isGreek
|
||||
|| isLatinBased
|
||||
|| (isUkrainian && number == 2)
|
||||
|| chars.size() < SettingsStore.SOFT_KEY_TITLE_MAX_CHARS) {
|
||||
return getDefaultCharList(chars, language.getLocale(), isGreek, isLatinBased, isUppercase);
|
||||
} else {
|
||||
return abbreviateCharList(chars, language.getLocale(), isUppercase);
|
||||
String displayChars = getDefaultCharList(chars, language.getLocale(), isGreek, isLatinBased, isUppercase);
|
||||
if (displayChars.length() > maxChars) {
|
||||
displayChars = abbreviateCharList(displayChars, language.getLocale(), isUppercase);
|
||||
}
|
||||
|
||||
return displayChars;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -188,27 +183,37 @@ public class SoftKeyNumber extends SoftKey {
|
|||
* on one key. As suggested by the community, we could display them as "A-Z".
|
||||
* @see <a href="https://github.com/sspanak/tt9/issues/628">Issue #628</a>
|
||||
*/
|
||||
private String abbreviateCharList(ArrayList<String> chars, Locale locale, boolean isUppercase) {
|
||||
boolean containsCombiningChars = TextTools.isCombining(chars.get(0)) || TextTools.isCombining(chars.get(chars.size() - 1));
|
||||
private String abbreviateCharList(String chars, Locale locale, boolean isUppercase) {
|
||||
String firstLetter = chars.substring(0, 1);
|
||||
String lastLetter = chars.substring(chars.length() - 1);
|
||||
boolean containsCombiningChars = TextTools.isCombining(firstLetter) || TextTools.isCombining(lastLetter);
|
||||
|
||||
return
|
||||
(isUppercase ? chars.get(0).toUpperCase(locale) : chars.get(0))
|
||||
(isUppercase ? firstLetter.toUpperCase(locale) : firstLetter)
|
||||
+ (containsCombiningChars ? "– " : "–")
|
||||
+ (isUppercase ? chars.get(chars.size() - 1).toUpperCase(locale) : chars.get(chars.size() - 1));
|
||||
+ (isUppercase ? lastLetter.toUpperCase(locale) : lastLetter);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* As suggested by the community, there is no need to display the accented letters.
|
||||
* People are used to seeing just "ABC", "DEF", etc. In the case of Korean, the keypad looks too
|
||||
* cluttered, so we skip the double consonants, like on phones with a physical keypad.
|
||||
* Reduces the number of displayed characters by leaving the most descriptive ones. This prevents
|
||||
* the visual clutter on the keys.
|
||||
*/
|
||||
private boolean shouldSkipAccents(char currentLetter, boolean isGreek, boolean isLatinBased) {
|
||||
return
|
||||
currentLetter == 'ѝ'
|
||||
|| currentLetter == 'ґ'
|
||||
// Latin. As suggested by the community, there is no need to display the accented letters. People are
|
||||
// used to seeing just "ABC", "DEF", etc.
|
||||
(isLatinBased && currentLetter > 'z')
|
||||
// Cyrillic. Same as above.
|
||||
|| currentLetter == 'ѝ' || currentLetter == 'ґ'
|
||||
// Korean double consonants
|
||||
|| (currentLetter == 'ㄲ' || currentLetter == 'ㄸ' || currentLetter == 'ㅃ' || currentLetter == 'ㅆ' || currentLetter == 'ㅉ')
|
||||
|| (isLatinBased && currentLetter > 'z')
|
||||
// Greek diacritics and ending sigma
|
||||
|| currentLetter == 'ς'
|
||||
|| (isGreek && (currentLetter < 'α' || currentLetter > 'ω'));
|
||||
|| (isGreek && (currentLetter < 'α' || currentLetter > 'ω'))
|
||||
// Hindi matras
|
||||
|| (currentLetter >= 0x0900 && currentLetter <= 0x0903) || (currentLetter >= 0x093A && currentLetter <= 0x094F)
|
||||
|| (currentLetter >= 0x0951 && currentLetter <= 0x0957) || currentLetter == 0x0962 || currentLetter == 0x0963
|
||||
;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -125,15 +125,21 @@ public class SuggestionsBar {
|
|||
return "";
|
||||
}
|
||||
|
||||
if (suggestions.get(id).endsWith(STEM_SUFFIX)) {
|
||||
String suggestion = suggestions.get(id);
|
||||
|
||||
if (suggestion.endsWith(STEM_SUFFIX)) {
|
||||
return stem;
|
||||
} else if (suggestions.get(id).startsWith(STEM_VARIATION_PREFIX)) {
|
||||
return stem + suggestions.get(id).substring(STEM_VARIATION_PREFIX.length());
|
||||
} else if (suggestions.get(id).startsWith(STEM_PUNCTUATION_VARIATION_PREFIX)) {
|
||||
return stem + suggestions.get(id).substring(STEM_PUNCTUATION_VARIATION_PREFIX.length());
|
||||
} else if (suggestion.startsWith(STEM_VARIATION_PREFIX)) {
|
||||
return stem + suggestion.substring(STEM_VARIATION_PREFIX.length());
|
||||
} else if (suggestion.startsWith(STEM_PUNCTUATION_VARIATION_PREFIX)) {
|
||||
return stem + suggestion.substring(STEM_PUNCTUATION_VARIATION_PREFIX.length());
|
||||
}
|
||||
|
||||
return suggestions.get(id).equals(Characters.NEW_LINE) ? "\n" : suggestions.get(id);
|
||||
return switch (suggestion) {
|
||||
case Characters.ZWJ_GRAPHIC -> Characters.ZWJ;
|
||||
case Characters.ZWNJ_GRAPHIC -> Characters.ZWNJ;
|
||||
default -> suggestion.equals(Characters.NEW_LINE) ? "\n" : suggestion;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -195,14 +201,15 @@ public class SuggestionsBar {
|
|||
String trimmedSuggestion = suggestion.substring(stem.length());
|
||||
trimmedSuggestion = Character.isAlphabetic(trimmedSuggestion.charAt(0)) ? STEM_VARIATION_PREFIX + trimmedSuggestion : STEM_PUNCTUATION_VARIATION_PREFIX + trimmedSuggestion;
|
||||
suggestions.add(trimmedSuggestion);
|
||||
return;
|
||||
}
|
||||
// make the new line better readable
|
||||
else if (suggestion.equals("\n")) {
|
||||
suggestions.add(Characters.NEW_LINE);
|
||||
}
|
||||
// or add any other suggestion as is
|
||||
else {
|
||||
suggestions.add(suggestion);
|
||||
|
||||
// convert the unreadable special characters to their readable form or add the readable ones
|
||||
switch (suggestion) {
|
||||
case "\n" -> suggestions.add(Characters.NEW_LINE);
|
||||
case Characters.ZWJ -> suggestions.add(Characters.ZWJ_GRAPHIC);
|
||||
case Characters.ZWNJ -> suggestions.add(Characters.ZWNJ_GRAPHIC);
|
||||
default -> suggestions.add(suggestion);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,10 @@ import io.github.sspanak.tt9.languages.LanguageKind;
|
|||
public class Characters {
|
||||
public static final String GR_QUESTION_MARK = ";";
|
||||
public static final String NEW_LINE = Build.VERSION.SDK_INT >= Build.VERSION_CODES.M && new Paint().hasGlyph("⏎") ? "⏎" : "\\n";
|
||||
public static final String ZWJ = "\u200D";
|
||||
public static final String ZWJ_GRAPHIC = "ZWJ";
|
||||
public static final String ZWNJ = "\u200C";
|
||||
public static final String ZWNJ_GRAPHIC = "ZWNJ";
|
||||
|
||||
final public static ArrayList<String> ArabicNumbers = new ArrayList<>(Arrays.asList(
|
||||
"٠", "١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩"
|
||||
|
|
@ -47,6 +51,10 @@ public class Characters {
|
|||
",", ".", "-", "«", "»", "(", ")", "&", "~", "`", "'", "\"", "·", ":", "!", GR_QUESTION_MARK
|
||||
));
|
||||
|
||||
final public static ArrayList<String> PunctuationIndic = new ArrayList<>(Arrays.asList(
|
||||
",", ".", "-", ZWJ, ZWNJ, "(", ")", "।", "॰", "॥", "&", "~", "`", ";", ":", "'", "\"", "!", "?"
|
||||
));
|
||||
|
||||
final public static ArrayList<String> PunctuationKorean = new ArrayList<>(Arrays.asList(
|
||||
",", ".", "~", "1", "(", ")", "&", "-", "`", ";", ":", "'", "\"", "!", "?"
|
||||
));
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package io.github.sspanak.tt9.util;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
|
@ -88,4 +89,16 @@ public class TextTools {
|
|||
|
||||
return sdf.format(new Date(timestamp));
|
||||
}
|
||||
|
||||
|
||||
public static ArrayList<String> removeLettersFromList(ArrayList<String> list) {
|
||||
ArrayList<String> cleanList = new ArrayList<>();
|
||||
for (String ch : list) {
|
||||
if (!Character.isAlphabetic(ch.codePointAt(0))) {
|
||||
cleanList.add(ch);
|
||||
}
|
||||
}
|
||||
|
||||
return cleanList;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
|||
|
||||
|
||||
ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
||||
String alphabet = ""
|
||||
String alphabet = ''
|
||||
int layoutKey = 0
|
||||
HashMap<String, String> sounds = new HashMap<>()
|
||||
HashMap<String, String> layoutSounds = new HashMap<>()
|
||||
|
|
@ -58,10 +58,6 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
String localeString = ""
|
||||
String dictionaryFileName = ""
|
||||
|
||||
alphabet = languageFile.name.contains("Catalan") ? '·' : alphabet
|
||||
alphabet = languageFile.name.contains("Hebrew") || languageFile.name.contains("Yiddish") ? '"' : alphabet
|
||||
alphabet = languageFile.name.contains("Korean") ? ':' : alphabet
|
||||
|
||||
for (String line : languageFile.readLines()) {
|
||||
if (
|
||||
line.matches("^[a-zA-Z].*")
|
||||
|
|
@ -110,19 +106,15 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
|
||||
// alphabet string
|
||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
|
||||
|
||||
alphabet += lineCharacters
|
||||
|
||||
// sounds, single letters
|
||||
// sounds, single letters or special characters that are treated as letters
|
||||
if (lineCharacters) {
|
||||
lineCharacters.each { letter ->
|
||||
layoutSounds.put(letter, layoutKey.toString())
|
||||
}
|
||||
} else if (line.contains("PUNCTUATION")) {
|
||||
layoutSounds.put("-", layoutKey.toString())
|
||||
layoutSounds.put(".", layoutKey.toString())
|
||||
layoutSounds.put("'", layoutKey.toString())
|
||||
layoutSounds.put('"', layoutKey.toString())
|
||||
layoutSounds.put('·', layoutKey.toString())
|
||||
}
|
||||
|
||||
if (isLayoutLine(line)) {
|
||||
|
|
@ -178,7 +170,8 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
|
||||
// this cannot be static, because DictionaryTools will not be visible
|
||||
def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, String> sounds, boolean isAlphabeticLanguage, Locale locale, int maxErrors, String csvDelimiter, int maxWordFrequency) {
|
||||
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase(locale)}\\-\\.']+\$"
|
||||
String regexSafeAlphabet = alphabet.replaceAll("([\\[\\]\\-\\.])", "")
|
||||
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${regexSafeAlphabet}\\.\\-]+\$" : "^[${regexSafeAlphabet}${regexSafeAlphabet.toUpperCase(locale)}\\.\\-]+\$"
|
||||
|
||||
int errorCount = 0
|
||||
String errorMsg = ''
|
||||
|
|
@ -238,6 +231,30 @@ def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, Str
|
|||
|
||||
//////////////////// PARSING ////////////////////
|
||||
|
||||
static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
|
||||
if (languageName == null || !line.contains('PUNCTUATION') || !isLayoutLine(line)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
final DEFAULT = "'-."
|
||||
|
||||
if (languageName.contains('Korean')) {
|
||||
return DEFAULT
|
||||
} else if (languageName.contains("Hebrew") || languageName.contains("Yiddish")) {
|
||||
return DEFAULT + '"'
|
||||
}
|
||||
|
||||
String allChars = line
|
||||
.replaceFirst('\\].*', '')
|
||||
.replaceFirst('^\\s+- \\[', '')
|
||||
.replaceFirst("PUNCTUATION[^,\\s]*", '')
|
||||
.replace(',', '')
|
||||
.replace(' ', '')
|
||||
|
||||
return DEFAULT + allChars
|
||||
}
|
||||
|
||||
|
||||
static def extractAlphabetCharsFromLine(String line) {
|
||||
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
|
||||
return ''
|
||||
|
|
@ -298,7 +315,7 @@ static def validateWord(String word, String validCharacters, boolean isAlphabeti
|
|||
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
||||
}
|
||||
|
||||
if (isAlphabeticLanguage && word.matches("^(.|\\p{L}\\p{M}?)\$")) {
|
||||
if (isAlphabeticLanguage && word.trim().length() == 1) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue