Language improvements
* fixed some English words always appearing in small letters, when some should be capitalized (I, Friday, December, etc...)" * fixed Bulgarian words that should have been capitalized, but were not * fixed mixed case suggestions priority not updating * optimized dictionary loading (it's more than 2x faster now) * fixed the last words of dictionaries not being loaded
This commit is contained in:
parent
8888485f70
commit
759317dce1
11 changed files with 25342 additions and 23472 deletions
|
|
@ -81,6 +81,15 @@ public class DictionaryDb {
|
|||
}
|
||||
|
||||
|
||||
public static boolean doesWordExistSync(Language language, String word) {
|
||||
if (language == null || word == null || word.equals("")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return getInstance().wordsDao().doesWordExist(language.getId(), word) > 0;
|
||||
}
|
||||
|
||||
|
||||
public static void truncateWords(Handler handler) {
|
||||
new Thread() {
|
||||
@Override
|
||||
|
|
@ -155,7 +164,13 @@ public class DictionaryDb {
|
|||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
int affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word.toLowerCase(language.getLocale()), sequence);
|
||||
int affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word, sequence);
|
||||
if (affectedRows == 0) {
|
||||
// If the user has changed the case manually, so there would be no matching word.
|
||||
// In this case, try again with the lowercase equivalent.
|
||||
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word.toLowerCase(language.getLocale()), sequence);
|
||||
}
|
||||
|
||||
Logger.d("incrementWordFrequency", "Affected rows: " + affectedRows);
|
||||
} catch (Exception e) {
|
||||
Logger.e(
|
||||
|
|
|
|||
|
|
@ -108,19 +108,19 @@ public class DictionaryLoader {
|
|||
DictionaryDb.runInTransaction(() -> {
|
||||
try {
|
||||
long start = System.currentTimeMillis();
|
||||
importLetters(language);
|
||||
Logger.i(
|
||||
logTag,
|
||||
"Loaded letters for '" + language.getName() + "' language in: " + (System.currentTimeMillis() - start) + " ms"
|
||||
);
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
importWords(language);
|
||||
Logger.i(
|
||||
logTag,
|
||||
"Dictionary: '" + language.getDictionaryFile() + "'" +
|
||||
" processing time: " + (System.currentTimeMillis() - start) + " ms"
|
||||
);
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
importLetters(language);
|
||||
Logger.i(
|
||||
logTag,
|
||||
"Loaded letters for '" + language.getName() + "' language in: " + (System.currentTimeMillis() - start) + " ms"
|
||||
);
|
||||
} catch (DictionaryImportAbortedException e) {
|
||||
stop();
|
||||
|
||||
|
|
@ -167,6 +167,10 @@ public class DictionaryLoader {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (DictionaryDb.doesWordExistSync(language, langChar.toUpperCase(language.getLocale()))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Word word = new Word();
|
||||
word.langId = language.getId();
|
||||
word.frequency = 0;
|
||||
|
|
@ -210,7 +214,7 @@ public class DictionaryLoader {
|
|||
throw new DictionaryImportException(dictionaryFile, word, line);
|
||||
}
|
||||
|
||||
if (line % settings.getDictionaryImportWordChunkSize() == 0) {
|
||||
if (line % settings.getDictionaryImportWordChunkSize() == 0 || line == totalWords - 1) {
|
||||
DictionaryDb.insertWordsSync(dbWords);
|
||||
dbWords.clear();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ interface WordsDao {
|
|||
@Query("SELECT COUNT(id) FROM words WHERE :langId < 0 OR lang = :langId")
|
||||
int count(int langId);
|
||||
|
||||
@Query("SELECT COUNT(id) FROM words WHERE lang = :langId AND word = :word")
|
||||
int doesWordExist(int langId, String word);
|
||||
|
||||
@Query(
|
||||
"SELECT * " +
|
||||
"FROM words " +
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package io.github.sspanak.tt9.languages;
|
|||
import androidx.annotation.NonNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
|
||||
|
||||
|
|
@ -15,6 +16,7 @@ public class Language {
|
|||
protected int abcLowerCaseIcon;
|
||||
protected int abcUpperCaseIcon;
|
||||
protected ArrayList<ArrayList<String>> characterMap = new ArrayList<>();
|
||||
private final HashMap<Character, String> reverseCharacterMap = new HashMap<>();
|
||||
|
||||
// settings
|
||||
protected boolean isPunctuationPartOfWords; // see the getter for more info
|
||||
|
|
@ -65,12 +67,26 @@ public class Language {
|
|||
|
||||
/************* utility *************/
|
||||
|
||||
private void generateReverseCharacterMap() {
|
||||
reverseCharacterMap.clear();
|
||||
for (int digit = 0; digit <= 9; digit++) {
|
||||
for (String keyChar : getKeyCharacters(digit)) {
|
||||
reverseCharacterMap.put(keyChar.charAt(0), String.valueOf(digit));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String capitalize(String word) {
|
||||
return word != null ? word.substring(0, 1).toUpperCase(locale) + word.substring(1).toLowerCase(locale) : null;
|
||||
}
|
||||
|
||||
public boolean isMixedCaseWord(String word) {
|
||||
return word != null && !word.toLowerCase(locale).equals(word) && !word.toUpperCase(locale).equals(word);
|
||||
return word != null
|
||||
&& (
|
||||
(word.length() == 1 && word.toUpperCase(locale).equals(word))
|
||||
|| (!word.toLowerCase(locale).equals(word) && !word.toUpperCase(locale).equals(word))
|
||||
);
|
||||
}
|
||||
|
||||
public ArrayList<String> getKeyCharacters(int key) {
|
||||
|
|
@ -90,16 +106,17 @@ public class Language {
|
|||
StringBuilder sequence = new StringBuilder();
|
||||
String lowerCaseWord = word.toLowerCase(locale);
|
||||
|
||||
for (int i = 0; i < lowerCaseWord.length(); i++) {
|
||||
for (int key = 0; key <= 9; key++) {
|
||||
if (getKeyCharacters(key).contains(Character.toString(lowerCaseWord.charAt(i)))) {
|
||||
sequence.append(key);
|
||||
}
|
||||
}
|
||||
if (reverseCharacterMap.isEmpty()) {
|
||||
generateReverseCharacterMap();
|
||||
}
|
||||
|
||||
if (word.length() != sequence.length()) {
|
||||
throw new InvalidLanguageCharactersException(this, "Failed generating digit sequence for word: '" + word);
|
||||
for (int i = 0; i < lowerCaseWord.length(); i++) {
|
||||
char letter = lowerCaseWord.charAt(i);
|
||||
if (!reverseCharacterMap.containsKey(letter)) {
|
||||
throw new InvalidLanguageCharactersException(this, "Failed generating digit sequence for word: '" + word);
|
||||
}
|
||||
|
||||
sequence.append(reverseCharacterMap.get(letter));
|
||||
}
|
||||
|
||||
return sequence.toString();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue