Fixed word frequency issues causing wrong suggestions order (#164)
* all suggestions are now ordered by length, then by frequency * word frequency is normalized to 255, instead of to 5; normalization now makes sense * only maxed out languages are normalized, not all * all words are normalized at once, instead of only the one that has reached the limit * normalization now happens on start up, instead of using a trigger * fixed word frequency not updating when a punctuation mark is appended at the end, for example: 'try,' * switched the positions of ; and : * updated documentation
This commit is contained in:
parent
cfe81462e0
commit
f6c51d9304
8 changed files with 78 additions and 27 deletions
|
|
@ -18,28 +18,16 @@ import io.github.sspanak.tt9.Logger;
|
|||
import io.github.sspanak.tt9.ime.TraditionalT9;
|
||||
import io.github.sspanak.tt9.languages.InvalidLanguageException;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.preferences.SettingsStore;
|
||||
|
||||
public class DictionaryDb {
|
||||
private static T9RoomDb dbInstance;
|
||||
|
||||
private static final RoomDatabase.Callback TRIGGER_CALLBACK = new RoomDatabase.Callback() {
|
||||
@Override
|
||||
public void onCreate(@NonNull SupportSQLiteDatabase db) {
|
||||
super.onCreate(db);
|
||||
db.execSQL(
|
||||
"CREATE TRIGGER IF NOT EXISTS normalize_freq " +
|
||||
" AFTER UPDATE ON words " +
|
||||
" WHEN NEW.freq > 50000 " +
|
||||
" BEGIN" +
|
||||
" UPDATE words SET freq = freq / 10000 " +
|
||||
" WHERE seq = NEW.seq; " +
|
||||
"END;"
|
||||
);
|
||||
}
|
||||
|
||||
private static final RoomDatabase.Callback DROP_NORMALIZATION_TRIGGER = new RoomDatabase.Callback() {
|
||||
@Override
|
||||
public void onOpen(@NonNull SupportSQLiteDatabase db) {
|
||||
super.onOpen(db);
|
||||
db.execSQL("DROP TRIGGER IF EXISTS normalize_freq");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -48,8 +36,8 @@ public class DictionaryDb {
|
|||
if (dbInstance == null) {
|
||||
context = context == null ? TraditionalT9.getMainContext() : context;
|
||||
dbInstance = Room.databaseBuilder(context, T9RoomDb.class, "t9dict.db")
|
||||
.addCallback(TRIGGER_CALLBACK)
|
||||
.build();
|
||||
.addCallback(DROP_NORMALIZATION_TRIGGER) // @todo: Remove trigger dropping after December 2023. Assuming everyone would have upgraded by then.
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -65,6 +53,34 @@ public class DictionaryDb {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* normalizeWordFrequencies
|
||||
* Normalizes the word frequencies for all languages that have reached the maximum, as defined in
|
||||
* the settings.
|
||||
*
|
||||
* This query will finish immediately, if there is nothing to do. It's safe to run it often.
|
||||
*
|
||||
*/
|
||||
public static void normalizeWordFrequencies(SettingsStore settings) {
|
||||
new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
long time = System.currentTimeMillis();
|
||||
|
||||
int affectedRows = dbInstance.wordsDao().normalizeFrequencies(
|
||||
settings.getWordFrequencyNormalizationDivider(),
|
||||
settings.getWordFrequencyMax()
|
||||
);
|
||||
|
||||
Logger.d(
|
||||
"db.normalizeWordFrequencies",
|
||||
"Normalized " + affectedRows + " words in: " + (System.currentTimeMillis() - time) + " ms"
|
||||
);
|
||||
}
|
||||
}.start();
|
||||
}
|
||||
|
||||
|
||||
public static void runInTransaction(Runnable r) {
|
||||
getInstance().runInTransaction(r);
|
||||
}
|
||||
|
|
@ -165,10 +181,25 @@ public class DictionaryDb {
|
|||
public void run() {
|
||||
try {
|
||||
int affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word, sequence);
|
||||
|
||||
// In case the user has changed the text case, there would be no match.
|
||||
// Try again with the lowercase equivalent.
|
||||
String lowercaseWord = "";
|
||||
if (affectedRows == 0) {
|
||||
// If the user has changed the case manually, so there would be no matching word.
|
||||
// In this case, try again with the lowercase equivalent.
|
||||
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word.toLowerCase(language.getLocale()), sequence);
|
||||
lowercaseWord = word.toLowerCase(language.getLocale());
|
||||
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), lowercaseWord, sequence);
|
||||
|
||||
Logger.d("incrementWordFrequency", "Attempting to increment frequency for lowercase variant: " + lowercaseWord);
|
||||
}
|
||||
|
||||
// Some languages permit appending the punctuation to the end of the words, like so: "try,".
|
||||
// But there are no such words in the dictionary, so try without the punctuation mark.
|
||||
if (affectedRows == 0 && language.isPunctuationPartOfWords() && sequence.endsWith("1")) {
|
||||
String truncatedWord = lowercaseWord.substring(0, word.length() - 1);
|
||||
String truncatedSequence = sequence.substring(0, sequence.length() - 1);
|
||||
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), truncatedWord, truncatedSequence);
|
||||
|
||||
Logger.d("incrementWordFrequency", "Attempting to increment frequency with stripped punctuation: " + truncatedWord);
|
||||
}
|
||||
|
||||
Logger.d("incrementWordFrequency", "Affected rows: " + affectedRows);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ interface WordsDao {
|
|||
"lang = :langId " +
|
||||
"AND seq > :sequence AND seq <= :sequence || '99' " +
|
||||
"AND (:word IS NULL OR word LIKE :word || '%') " +
|
||||
"ORDER BY freq DESC, LENGTH(seq) ASC, seq ASC " +
|
||||
"ORDER BY LENGTH(seq) ASC, freq DESC, seq ASC " +
|
||||
"LIMIT :limit"
|
||||
)
|
||||
List<Word> getFuzzy(int langId, int limit, String sequence, String word);
|
||||
|
|
@ -51,4 +51,16 @@ interface WordsDao {
|
|||
"WHERE lang = :langId AND word = :word AND seq = :sequence"
|
||||
)
|
||||
int incrementFrequency(int langId, String word, String sequence);
|
||||
|
||||
@Query(
|
||||
"UPDATE words " +
|
||||
"SET freq = freq / :normalizationDivider " +
|
||||
"WHERE lang IN ( " +
|
||||
"SELECT lang " +
|
||||
"FROM words " +
|
||||
"WHERE freq >= :maxFrequency " +
|
||||
"GROUP BY lang" +
|
||||
")"
|
||||
)
|
||||
int normalizeFrequencies(int normalizationDivider, int maxFrequency);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ public class TraditionalT9 extends KeyPadHandler {
|
|||
self = this;
|
||||
|
||||
DictionaryDb.init(this);
|
||||
DictionaryDb.normalizeWordFrequencies(settings);
|
||||
|
||||
if (softKeyHandler == null) {
|
||||
softKeyHandler = new SoftKeyHandler(this);
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import java.util.Arrays;
|
|||
|
||||
public class Punctuation {
|
||||
final public static ArrayList<String> Main = new ArrayList<>(Arrays.asList(
|
||||
",", ".", "-", "(", ")", "[", "]", "&", "~", "`", "'", ":", ";", "\"", "!", "?"
|
||||
",", ".", "-", "(", ")", "[", "]", "&", "~", "`", "'", ";", ":", "\"", "!", "?"
|
||||
));
|
||||
|
||||
final public static ArrayList<String> Secondary = new ArrayList<>(Arrays.asList(
|
||||
|
|
|
|||
|
|
@ -23,11 +23,12 @@ public class PreferencesActivity extends AppCompatActivity implements Preference
|
|||
|
||||
@Override
|
||||
protected void onCreate(Bundle savedInstanceState) {
|
||||
DictionaryDb.init(this);
|
||||
|
||||
settings = new SettingsStore(this);
|
||||
applyTheme();
|
||||
|
||||
DictionaryDb.init(this);
|
||||
DictionaryDb.normalizeWordFrequencies(settings);
|
||||
|
||||
super.onCreate(savedInstanceState);
|
||||
validateFunctionKeys();
|
||||
buildScreen();
|
||||
|
|
|
|||
|
|
@ -227,6 +227,9 @@ public class SettingsStore {
|
|||
public int getSoftKeyInitialDelay() { return 250; /* ms */ }
|
||||
public int getSoftKeyRepeatDelay() { return 40; /* ms */ }
|
||||
|
||||
public int getWordFrequencyMax() { return 25500; }
|
||||
public int getWordFrequencyNormalizationDivider() { return 100; } // normalized frequency = getWordFrequencyMax() / getWordFrequencyNormalizationDivider()
|
||||
|
||||
|
||||
/************* add word, last word *************/
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue