1
0
Fork 0
* Added Hindi language

	* Removed the hardcoded special characters from language validation. Now reading them from the .yml

	* improved method of hiding the letters on 0 and 1, when needed

	* virtual keypad adjustments

	* improved the single-letter validation during build time

	* improved Devanagari validation script

	* improved sorting when filters are on
This commit is contained in:
sspanak 2024-12-01 18:47:41 +02:00 committed by Dimo Karaivanov
parent 622a954633
commit f8e6668281
18 changed files with 1305176 additions and 103 deletions

View file

@ -0,0 +1,15 @@
locale: hi-IN
dictionaryFile: hi-utf8.csv
abcString: कखग
hasUpperCase: no
layout:
- [SPECIAL, ॐ] # 0 ==> [SPECIAL, Om]
- [्, ़, ऽ, , PUNCTUATION_IN] # 1 ==> [halant (cancel vowel sign), nuqta (converts k -> q, ph -> f, etc), avagraha, visarga]
- [अ, आ, ब, भ, च, छ, ा] # 2 ==> [a, ā, b, bh, c, ch, ā (combining)]
- [द, ध, ड, ढ, ए, ऐ, फ, े, ै, ॆ, ॅ] # 3 ==> [d, dh, ḍ, ḍh, ē, ai, ph, ē (combining), ai (combining), e (combining), ê (ae-combining)];
- [ग, घ, ह, इ, ई, ि, ी] # 4 ==> [g, gh, h, i, ī, i (combining), ī (combining)]
- [ज, झ, क, ख, ल, ळ, ऌ, ॢ] # 5 ==> [j, jh, k, kh, l, ḷ, l̥, l̥ (combining)]
- [म, ङ, ञ, ण, न, ओ, औ, ऑ, ं, ँ, ो, ौ, ॊ, ॉ] # 6 ==> [m, ṅ, ñ, ṇ, n, o, au, ŏ, ṁ (n-combining with consonant, e.g. "ng", "nd"), m̐ (m-combining with vowel, e.g. "am", "em"), ō (combining), au (combining), o (combining), ŏ (combining)]
- [प, र, ऋ, स, श, ष, ृ] # 7 ==> [p, r, r̥ (ri), s, sh, rsh (like "norsk"), r̥ (ri-combining)]
- [त, थ, ट, ठ, उ, ऊ, व, ु, ू] # 8 ==> [t, th, ṭ, ṭh, u, ū, v, u (combining), ū (combining)]
- [य] # 9 ==> [y]

File diff suppressed because it is too large Load diff

View file

@ -9,6 +9,7 @@ import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.NaturalLanguage;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Characters;
import io.github.sspanak.tt9.util.TextTools;
class Mode123 extends ModePassthrough {
@Override public int getId() { return MODE_123; }
@ -49,8 +50,12 @@ class Mode123 extends ModePassthrough {
* use the default list, but reorder it a bit for convenience.
*/
private void setDefaultSpecialCharacters() {
KEY_CHARACTERS.add(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 0)));
KEY_CHARACTERS.add(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 1)));
KEY_CHARACTERS.add(
TextTools.removeLettersFromList(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 0)))
);
KEY_CHARACTERS.add(
TextTools.removeLettersFromList(applyNumericFieldCharacterOrder(settings.getOrderedKeyChars(language, 1)))
);
}

View file

@ -18,6 +18,7 @@ import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.languages.NaturalLanguage;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Characters;
import io.github.sspanak.tt9.util.TextTools;
class ModeCheonjiin extends InputMode {
// used when we want do display a different set of characters for a given key, for example
@ -68,14 +69,23 @@ class ModeCheonjiin extends InputMode {
}
/**
* setCustomSpecialCharacters
* Filter out the letters from the 0-key list and add "0", because there is no other way of
* typing it.
*/
protected void setCustomSpecialCharacters() {
// special
KEY_CHARACTERS.add(TextTools.removeLettersFromList(applyPunctuationOrder(Characters.Special, 0)));
if (settings.holdForPunctuationInKorean()) {
ArrayList<String> specialChars = new ArrayList<>(applyPunctuationOrder(Characters.Special, 0));
specialChars.add(0, "0");
KEY_CHARACTERS.add(specialChars);
KEY_CHARACTERS.get(0).add(0, "0");
}
}
// punctuation
KEY_CHARACTERS.add(
TextTools.removeLettersFromList(applyPunctuationOrder(Characters.PunctuationKorean, 1))
);
}
protected void setSpecialCharacterConstants() {

View file

@ -0,0 +1,59 @@
package io.github.sspanak.tt9.ime.modes.predictions;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageKind;
class LocaleWordsSorter {
private final Pattern sortingPattern;
LocaleWordsSorter(@Nullable Language language) {
if (LanguageKind.isHindi(language)) {
sortingPattern = Pattern.compile("[\\u0904-\\u0939\\u0958-\\u0961][\\u0900-\\u0904\\u093A-\\u094F\\u0962\\u0963]+");
} else {
sortingPattern = null;
}
}
/**
* Reduces the length of a word using the sortingRegex. Usually meant to consider a
* base letter + modifiers as a single character.
*/
private int reduceLength(String word) {
Matcher matcher = sortingPattern.matcher(word);
int length = word.length();
while (matcher.find()) {
length -= matcher.end() - matcher.start() - 1;
}
return length;
}
ArrayList<String> sort(ArrayList<String> words) {
if (sortingPattern == null || words == null) {
return words;
}
ArrayList<String> wordsCopy = new ArrayList<>(words);
Collections.sort(wordsCopy, (a, b) -> reduceLength(a) - reduceLength(b));
return wordsCopy;
}
boolean shouldSort(@Nullable Language language, @NonNull String stem, @NonNull String digitSequence) {
return LanguageKind.isIndic(language) && !stem.isEmpty() && stem.length() == digitSequence.length() - 1;
}
}

View file

@ -5,12 +5,14 @@ import java.util.ArrayList;
import io.github.sspanak.tt9.db.DataStore;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.EmojiLanguage;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Characters;
import io.github.sspanak.tt9.util.TextTools;
public class WordPredictions extends Predictions {
private final TextField textField;
private LocaleWordsSorter localeWordsSorter;
private String inputWord;
private boolean isStemFuzzy;
@ -21,11 +23,21 @@ public class WordPredictions extends Predictions {
public WordPredictions(SettingsStore settings, TextField textField) {
super(settings);
lastEnforcedTopWord = "";
localeWordsSorter = new LocaleWordsSorter(null);
stem = "";
this.textField = textField;
}
@Override
public Predictions setLanguage(Language language) {
super.setLanguage(language);
localeWordsSorter = new LocaleWordsSorter(language);
return this;
}
public WordPredictions setIsStemFuzzy(boolean yes) {
this.isStemFuzzy = yes;
return this;
@ -89,6 +101,7 @@ public class WordPredictions extends Predictions {
words.addAll(dbWords);
} else {
suggestStem();
dbWords = localeWordsSorter.shouldSort(language, stem, digitSequence) ? localeWordsSorter.sort(dbWords) : dbWords;
dbWords = rearrangeByPairFrequency(dbWords);
suggestMissingWords(generatePossibleStemVariations(dbWords));
suggestMissingWords(dbWords.isEmpty() ? generateWordVariations(inputWord) : dbWords);

View file

@ -6,12 +6,13 @@ public class LanguageKind {
public static final int KOREAN = 601579;
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
public static boolean isBulgarian(Language language) { return language != null && language.getId() == 231650; }
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
public static boolean isHebrew(Language language) { return language != null && (language.getId() == 305450 || language.getId() == 403177); }
public static boolean isHindi(Language language) { return language != null && language.getId() == 468264; }
public static boolean isIndic(Language language) { return isHindi(language); }
public static boolean isHinglish(Language language) { return language != null && language.getId() == 468421; }
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }

View file

@ -4,7 +4,9 @@ import androidx.annotation.NonNull;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
import io.github.sspanak.tt9.util.Characters;
@ -76,39 +78,24 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
private ArrayList<String> generateSpecialChars(ArrayList<String> definitionChars) {
final String SPECIAL_CHARS_PLACEHOLDER = "SPECIAL";
final String PUNCTUATION_PLACEHOLDER = "PUNCTUATION";
final String ARABIC_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_AR";
final String FRENCH_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_FR";
final String GERMAN_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_DE";
final String GREEK_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_GR";
final String KOREAN_PUNCTUATION_STYLE = PUNCTUATION_PLACEHOLDER + "_KR";
final Map<String, List<String>> specialChars = new HashMap<>();
specialChars.put(SPECIAL_CHARS_PLACEHOLDER, Characters.Special);
specialChars.put(PUNCTUATION_PLACEHOLDER, Characters.PunctuationEnglish);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_AR", Characters.PunctuationArabic);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FR", Characters.PunctuationFrench);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_DE", Characters.PunctuationGerman);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_GR", Characters.PunctuationGreek);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_IN", Characters.PunctuationIndic);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_KR", Characters.PunctuationKorean);
ArrayList<String> keyChars = new ArrayList<>();
for (String defChar : definitionChars) {
switch (defChar) {
case SPECIAL_CHARS_PLACEHOLDER:
keyChars.addAll(Characters.Special);
break;
case PUNCTUATION_PLACEHOLDER:
keyChars.addAll(Characters.PunctuationEnglish);
break;
case ARABIC_PUNCTUATION_STYLE:
keyChars.addAll(Characters.PunctuationArabic);
break;
case FRENCH_PUNCTUATION_STYLE:
keyChars.addAll(Characters.PunctuationFrench);
break;
case GERMAN_PUNCTUATION_STYLE:
keyChars.addAll(Characters.PunctuationGerman);
break;
case GREEK_PUNCTUATION_STYLE:
keyChars.addAll(Characters.PunctuationGreek);
break;
case KOREAN_PUNCTUATION_STYLE:
keyChars.addAll(Characters.PunctuationKorean);
break;
default:
keyChars.add(defChar);
break;
List<String> keySpecialChars = specialChars.containsKey(defChar) ? specialChars.get(defChar) : null;
if (keySpecialChars != null) {
keyChars.addAll(keySpecialChars);
} else {
keyChars.add(defChar);
}
}

View file

@ -88,10 +88,6 @@ class SettingsPunctuation extends SettingsInput {
orderedChars = language.getKeyCharacters(number);
}
if (number < 2) {
orderedChars = removeLettersFromList(orderedChars);
}
return orderedChars;
}
@ -118,16 +114,4 @@ class SettingsPunctuation extends SettingsInput {
return charsList;
}
private ArrayList<String> removeLettersFromList(ArrayList<String> list) {
ArrayList<String> cleanList = new ArrayList<>();
for (String s : list) {
if (!Character.isAlphabetic(s.codePointAt(0))) {
cleanList.add(s);
}
}
return cleanList;
}
}

View file

@ -25,6 +25,7 @@ public class SettingsStore extends SettingsUI {
public final static int SOFT_KEY_DOUBLE_CLICK_DELAY = 500; // ms
public final static int SOFT_KEY_REPEAT_DELAY = 40; // ms
public final static int SOFT_KEY_TITLE_MAX_CHARS = 5;
public final static int SOFT_KEY_TITLE_MAX_CHARS_INDIC = 3;
public final static int SOFT_KEY_TITLE_SIZE = 18; // sp
public final static float SOFT_KEY_COMPLEX_LABEL_TITLE_RELATIVE_SIZE = 0.55f;
public final static float SOFT_KEY_COMPLEX_LABEL_ARABIC_TITLE_RELATIVE_SIZE = 0.72f;

View file

@ -144,22 +144,17 @@ public class SoftKeyNumber extends SoftKey {
}
ArrayList<String> chars = language.getKeyCharacters(number);
boolean isBulgarian = LanguageKind.isBulgarian(language);
boolean isGreek = LanguageKind.isGreek(language);
boolean isLatinBased = LanguageKind.isLatinBased(language);
boolean isUkrainian = LanguageKind.isUkrainian(language);
boolean isUppercase = tt9.getTextCase() == InputMode.CASE_UPPER;
final int maxChars = LanguageKind.isIndic(language) ? SettingsStore.SOFT_KEY_TITLE_MAX_CHARS_INDIC : SettingsStore.SOFT_KEY_TITLE_MAX_CHARS;
if (
isBulgarian
|| isGreek
|| isLatinBased
|| (isUkrainian && number == 2)
|| chars.size() < SettingsStore.SOFT_KEY_TITLE_MAX_CHARS) {
return getDefaultCharList(chars, language.getLocale(), isGreek, isLatinBased, isUppercase);
} else {
return abbreviateCharList(chars, language.getLocale(), isUppercase);
String displayChars = getDefaultCharList(chars, language.getLocale(), isGreek, isLatinBased, isUppercase);
if (displayChars.length() > maxChars) {
displayChars = abbreviateCharList(displayChars, language.getLocale(), isUppercase);
}
return displayChars;
}
@ -188,27 +183,37 @@ public class SoftKeyNumber extends SoftKey {
* on one key. As suggested by the community, we could display them as "A-Z".
* @see <a href="https://github.com/sspanak/tt9/issues/628">Issue #628</a>
*/
private String abbreviateCharList(ArrayList<String> chars, Locale locale, boolean isUppercase) {
boolean containsCombiningChars = TextTools.isCombining(chars.get(0)) || TextTools.isCombining(chars.get(chars.size() - 1));
private String abbreviateCharList(String chars, Locale locale, boolean isUppercase) {
String firstLetter = chars.substring(0, 1);
String lastLetter = chars.substring(chars.length() - 1);
boolean containsCombiningChars = TextTools.isCombining(firstLetter) || TextTools.isCombining(lastLetter);
return
(isUppercase ? chars.get(0).toUpperCase(locale) : chars.get(0))
(isUppercase ? firstLetter.toUpperCase(locale) : firstLetter)
+ (containsCombiningChars ? " " : "")
+ (isUppercase ? chars.get(chars.size() - 1).toUpperCase(locale) : chars.get(chars.size() - 1));
+ (isUppercase ? lastLetter.toUpperCase(locale) : lastLetter);
}
/**
* As suggested by the community, there is no need to display the accented letters.
* People are used to seeing just "ABC", "DEF", etc. In the case of Korean, the keypad looks too
* cluttered, so we skip the double consonants, like on phones with a physical keypad.
* Reduces the number of displayed characters by leaving the most descriptive ones. This prevents
* the visual clutter on the keys.
*/
private boolean shouldSkipAccents(char currentLetter, boolean isGreek, boolean isLatinBased) {
return
currentLetter == 'ѝ'
|| currentLetter == 'ґ'
// Latin. As suggested by the community, there is no need to display the accented letters. People are
// used to seeing just "ABC", "DEF", etc.
(isLatinBased && currentLetter > 'z')
// Cyrillic. Same as above.
|| currentLetter == 'ѝ' || currentLetter == 'ґ'
// Korean double consonants
|| (currentLetter == 'ㄲ' || currentLetter == 'ㄸ' || currentLetter == 'ㅃ' || currentLetter == 'ㅆ' || currentLetter == 'ㅉ')
|| (isLatinBased && currentLetter > 'z')
// Greek diacritics and ending sigma
|| currentLetter == 'ς'
|| (isGreek && (currentLetter < 'α' || currentLetter > 'ω'));
|| (isGreek && (currentLetter < 'α' || currentLetter > 'ω'))
// Hindi matras
|| (currentLetter >= 0x0900 && currentLetter <= 0x0903) || (currentLetter >= 0x093A && currentLetter <= 0x094F)
|| (currentLetter >= 0x0951 && currentLetter <= 0x0957) || currentLetter == 0x0962 || currentLetter == 0x0963
;
}
}

View file

@ -125,15 +125,21 @@ public class SuggestionsBar {
return "";
}
if (suggestions.get(id).endsWith(STEM_SUFFIX)) {
String suggestion = suggestions.get(id);
if (suggestion.endsWith(STEM_SUFFIX)) {
return stem;
} else if (suggestions.get(id).startsWith(STEM_VARIATION_PREFIX)) {
return stem + suggestions.get(id).substring(STEM_VARIATION_PREFIX.length());
} else if (suggestions.get(id).startsWith(STEM_PUNCTUATION_VARIATION_PREFIX)) {
return stem + suggestions.get(id).substring(STEM_PUNCTUATION_VARIATION_PREFIX.length());
} else if (suggestion.startsWith(STEM_VARIATION_PREFIX)) {
return stem + suggestion.substring(STEM_VARIATION_PREFIX.length());
} else if (suggestion.startsWith(STEM_PUNCTUATION_VARIATION_PREFIX)) {
return stem + suggestion.substring(STEM_PUNCTUATION_VARIATION_PREFIX.length());
}
return suggestions.get(id).equals(Characters.NEW_LINE) ? "\n" : suggestions.get(id);
return switch (suggestion) {
case Characters.ZWJ_GRAPHIC -> Characters.ZWJ;
case Characters.ZWNJ_GRAPHIC -> Characters.ZWNJ;
default -> suggestion.equals(Characters.NEW_LINE) ? "\n" : suggestion;
};
}
@ -195,14 +201,15 @@ public class SuggestionsBar {
String trimmedSuggestion = suggestion.substring(stem.length());
trimmedSuggestion = Character.isAlphabetic(trimmedSuggestion.charAt(0)) ? STEM_VARIATION_PREFIX + trimmedSuggestion : STEM_PUNCTUATION_VARIATION_PREFIX + trimmedSuggestion;
suggestions.add(trimmedSuggestion);
return;
}
// make the new line better readable
else if (suggestion.equals("\n")) {
suggestions.add(Characters.NEW_LINE);
}
// or add any other suggestion as is
else {
suggestions.add(suggestion);
// convert the unreadable special characters to their readable form or add the readable ones
switch (suggestion) {
case "\n" -> suggestions.add(Characters.NEW_LINE);
case Characters.ZWJ -> suggestions.add(Characters.ZWJ_GRAPHIC);
case Characters.ZWNJ -> suggestions.add(Characters.ZWNJ_GRAPHIC);
default -> suggestions.add(suggestion);
}
}

View file

@ -12,6 +12,10 @@ import io.github.sspanak.tt9.languages.LanguageKind;
public class Characters {
public static final String GR_QUESTION_MARK = ";";
public static final String NEW_LINE = Build.VERSION.SDK_INT >= Build.VERSION_CODES.M && new Paint().hasGlyph("") ? "" : "\\n";
public static final String ZWJ = "\u200D";
public static final String ZWJ_GRAPHIC = "ZWJ";
public static final String ZWNJ = "\u200C";
public static final String ZWNJ_GRAPHIC = "ZWNJ";
final public static ArrayList<String> ArabicNumbers = new ArrayList<>(Arrays.asList(
"٠", "١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩"
@ -47,6 +51,10 @@ public class Characters {
",", ".", "-", "«", "»", "(", ")", "&", "~", "`", "'", "\"", "·", ":", "!", GR_QUESTION_MARK
));
final public static ArrayList<String> PunctuationIndic = new ArrayList<>(Arrays.asList(
",", ".", "-", ZWJ, ZWNJ, "(", ")", "", "", "", "&", "~", "`", ";", ":", "'", "\"", "!", "?"
));
final public static ArrayList<String> PunctuationKorean = new ArrayList<>(Arrays.asList(
",", ".", "~", "1", "(", ")", "&", "-", "`", ";", ":", "'", "\"", "!", "?"
));

View file

@ -1,6 +1,7 @@
package io.github.sspanak.tt9.util;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
@ -88,4 +89,16 @@ public class TextTools {
return sdf.format(new Date(timestamp));
}
public static ArrayList<String> removeLettersFromList(ArrayList<String> list) {
ArrayList<String> cleanList = new ArrayList<>();
for (String ch : list) {
if (!Character.isAlphabetic(ch.codePointAt(0))) {
cleanList.add(ch);
}
}
return cleanList;
}
}

View file

@ -43,7 +43,7 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
String alphabet = ""
String alphabet = ''
int layoutKey = 0
HashMap<String, String> sounds = new HashMap<>()
HashMap<String, String> layoutSounds = new HashMap<>()
@ -58,10 +58,6 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
String localeString = ""
String dictionaryFileName = ""
alphabet = languageFile.name.contains("Catalan") ? '·' : alphabet
alphabet = languageFile.name.contains("Hebrew") || languageFile.name.contains("Yiddish") ? '"' : alphabet
alphabet = languageFile.name.contains("Korean") ? '' : alphabet
for (String line : languageFile.readLines()) {
if (
line.matches("^[a-zA-Z].*")
@ -110,19 +106,15 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
// alphabet string
def lineCharacters = extractAlphabetCharsFromLine(line)
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
alphabet += lineCharacters
// sounds, single letters
// sounds, single letters or special characters that are treated as letters
if (lineCharacters) {
lineCharacters.each { letter ->
layoutSounds.put(letter, layoutKey.toString())
}
} else if (line.contains("PUNCTUATION")) {
layoutSounds.put("-", layoutKey.toString())
layoutSounds.put(".", layoutKey.toString())
layoutSounds.put("'", layoutKey.toString())
layoutSounds.put('"', layoutKey.toString())
layoutSounds.put('·', layoutKey.toString())
}
if (isLayoutLine(line)) {
@ -178,7 +170,8 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
// this cannot be static, because DictionaryTools will not be visible
def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, String> sounds, boolean isAlphabeticLanguage, Locale locale, int maxErrors, String csvDelimiter, int maxWordFrequency) {
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase(locale)}\\-\\.']+\$"
String regexSafeAlphabet = alphabet.replaceAll("([\\[\\]\\-\\.])", "")
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${regexSafeAlphabet}\\.\\-]+\$" : "^[${regexSafeAlphabet}${regexSafeAlphabet.toUpperCase(locale)}\\.\\-]+\$"
int errorCount = 0
String errorMsg = ''
@ -238,6 +231,30 @@ def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, Str
//////////////////// PARSING ////////////////////
static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
if (languageName == null || !line.contains('PUNCTUATION') || !isLayoutLine(line)) {
return ''
}
final DEFAULT = "'-."
if (languageName.contains('Korean')) {
return DEFAULT
} else if (languageName.contains("Hebrew") || languageName.contains("Yiddish")) {
return DEFAULT + '"'
}
String allChars = line
.replaceFirst('\\].*', '')
.replaceFirst('^\\s+- \\[', '')
.replaceFirst("PUNCTUATION[^,\\s]*", '')
.replace(',', '')
.replace(' ', '')
return DEFAULT + allChars
}
static def extractAlphabetCharsFromLine(String line) {
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
return ''
@ -298,7 +315,7 @@ static def validateWord(String word, String validCharacters, boolean isAlphabeti
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
}
if (isAlphabeticLanguage && word.matches("^(.|\\p{L}\\p{M}?)\$")) {
if (isAlphabeticLanguage && word.trim().length() == 1) {
errorCount++
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
}