1
0
Fork 0

Language improvements

* fixed some English words always appearing in small letters, when some should be capitalized (I, Friday, December, etc...)"

* fixed Bulgarian words that should have been capitalized, but were not

* fixed mixed case suggestions priority not updating

* optimized dictionary loading (it's more than 2x faster now)

* fixed the last words of dictionaries not being loaded
This commit is contained in:
sspanak 2022-12-15 11:27:01 +02:00
parent 8888485f70
commit 759317dce1
11 changed files with 25342 additions and 23472 deletions

View file

@ -81,6 +81,15 @@ public class DictionaryDb {
}
public static boolean doesWordExistSync(Language language, String word) {
if (language == null || word == null || word.equals("")) {
return false;
}
return getInstance().wordsDao().doesWordExist(language.getId(), word) > 0;
}
public static void truncateWords(Handler handler) {
new Thread() {
@Override
@ -155,7 +164,13 @@ public class DictionaryDb {
@Override
public void run() {
try {
int affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word.toLowerCase(language.getLocale()), sequence);
int affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word, sequence);
if (affectedRows == 0) {
// If the user has changed the case manually, so there would be no matching word.
// In this case, try again with the lowercase equivalent.
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word.toLowerCase(language.getLocale()), sequence);
}
Logger.d("incrementWordFrequency", "Affected rows: " + affectedRows);
} catch (Exception e) {
Logger.e(

View file

@ -108,19 +108,19 @@ public class DictionaryLoader {
DictionaryDb.runInTransaction(() -> {
try {
long start = System.currentTimeMillis();
importLetters(language);
Logger.i(
logTag,
"Loaded letters for '" + language.getName() + "' language in: " + (System.currentTimeMillis() - start) + " ms"
);
start = System.currentTimeMillis();
importWords(language);
Logger.i(
logTag,
"Dictionary: '" + language.getDictionaryFile() + "'" +
" processing time: " + (System.currentTimeMillis() - start) + " ms"
);
start = System.currentTimeMillis();
importLetters(language);
Logger.i(
logTag,
"Loaded letters for '" + language.getName() + "' language in: " + (System.currentTimeMillis() - start) + " ms"
);
} catch (DictionaryImportAbortedException e) {
stop();
@ -167,6 +167,10 @@ public class DictionaryLoader {
continue;
}
if (DictionaryDb.doesWordExistSync(language, langChar.toUpperCase(language.getLocale()))) {
continue;
}
Word word = new Word();
word.langId = language.getId();
word.frequency = 0;
@ -210,7 +214,7 @@ public class DictionaryLoader {
throw new DictionaryImportException(dictionaryFile, word, line);
}
if (line % settings.getDictionaryImportWordChunkSize() == 0) {
if (line % settings.getDictionaryImportWordChunkSize() == 0 || line == totalWords - 1) {
DictionaryDb.insertWordsSync(dbWords);
dbWords.clear();
}

View file

@ -12,6 +12,9 @@ interface WordsDao {
@Query("SELECT COUNT(id) FROM words WHERE :langId < 0 OR lang = :langId")
int count(int langId);
@Query("SELECT COUNT(id) FROM words WHERE lang = :langId AND word = :word")
int doesWordExist(int langId, String word);
@Query(
"SELECT * " +
"FROM words " +

View file

@ -3,6 +3,7 @@ package io.github.sspanak.tt9.languages;
import androidx.annotation.NonNull;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Locale;
@ -15,6 +16,7 @@ public class Language {
protected int abcLowerCaseIcon;
protected int abcUpperCaseIcon;
protected ArrayList<ArrayList<String>> characterMap = new ArrayList<>();
private final HashMap<Character, String> reverseCharacterMap = new HashMap<>();
// settings
protected boolean isPunctuationPartOfWords; // see the getter for more info
@ -65,12 +67,26 @@ public class Language {
/************* utility *************/
private void generateReverseCharacterMap() {
reverseCharacterMap.clear();
for (int digit = 0; digit <= 9; digit++) {
for (String keyChar : getKeyCharacters(digit)) {
reverseCharacterMap.put(keyChar.charAt(0), String.valueOf(digit));
}
}
}
public String capitalize(String word) {
return word != null ? word.substring(0, 1).toUpperCase(locale) + word.substring(1).toLowerCase(locale) : null;
}
public boolean isMixedCaseWord(String word) {
return word != null && !word.toLowerCase(locale).equals(word) && !word.toUpperCase(locale).equals(word);
return word != null
&& (
(word.length() == 1 && word.toUpperCase(locale).equals(word))
|| (!word.toLowerCase(locale).equals(word) && !word.toUpperCase(locale).equals(word))
);
}
public ArrayList<String> getKeyCharacters(int key) {
@ -90,16 +106,17 @@ public class Language {
StringBuilder sequence = new StringBuilder();
String lowerCaseWord = word.toLowerCase(locale);
for (int i = 0; i < lowerCaseWord.length(); i++) {
for (int key = 0; key <= 9; key++) {
if (getKeyCharacters(key).contains(Character.toString(lowerCaseWord.charAt(i)))) {
sequence.append(key);
}
}
if (reverseCharacterMap.isEmpty()) {
generateReverseCharacterMap();
}
if (word.length() != sequence.length()) {
throw new InvalidLanguageCharactersException(this, "Failed generating digit sequence for word: '" + word);
for (int i = 0; i < lowerCaseWord.length(); i++) {
char letter = lowerCaseWord.charAt(i);
if (!reverseCharacterMap.containsKey(letter)) {
throw new InvalidLanguageCharactersException(this, "Failed generating digit sequence for word: '" + word);
}
sequence.append(reverseCharacterMap.get(letter));
}
return sequence.toString();