1
0
Fork 0

Fixed word frequency issues causing wrong suggestions order (#164)

* all suggestions are now ordered by length, then by frequency

* word frequency is normalized to 255, instead of to 5; normalization now makes sense

* only maxed out languages are normalized, not all

* all words are normalized at once, instead of only the one that has reached the limit

* normalization now happens on start up, instead of using a trigger

* fixed word frequency not updating when a punctuation mark is appended at the end, for example: 'try,'

* switched the positions of ; and :

* updated documentation
This commit is contained in:
Dimo Karaivanov 2023-01-31 18:14:01 +02:00 committed by GitHub
parent cfe81462e0
commit f6c51d9304
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 78 additions and 27 deletions

View file

@ -18,28 +18,16 @@ import io.github.sspanak.tt9.Logger;
import io.github.sspanak.tt9.ime.TraditionalT9;
import io.github.sspanak.tt9.languages.InvalidLanguageException;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.SettingsStore;
public class DictionaryDb {
private static T9RoomDb dbInstance;
private static final RoomDatabase.Callback TRIGGER_CALLBACK = new RoomDatabase.Callback() {
@Override
public void onCreate(@NonNull SupportSQLiteDatabase db) {
super.onCreate(db);
db.execSQL(
"CREATE TRIGGER IF NOT EXISTS normalize_freq " +
" AFTER UPDATE ON words " +
" WHEN NEW.freq > 50000 " +
" BEGIN" +
" UPDATE words SET freq = freq / 10000 " +
" WHERE seq = NEW.seq; " +
"END;"
);
}
private static final RoomDatabase.Callback DROP_NORMALIZATION_TRIGGER = new RoomDatabase.Callback() {
@Override
public void onOpen(@NonNull SupportSQLiteDatabase db) {
super.onOpen(db);
db.execSQL("DROP TRIGGER IF EXISTS normalize_freq");
}
};
@ -48,8 +36,8 @@ public class DictionaryDb {
if (dbInstance == null) {
context = context == null ? TraditionalT9.getMainContext() : context;
dbInstance = Room.databaseBuilder(context, T9RoomDb.class, "t9dict.db")
.addCallback(TRIGGER_CALLBACK)
.build();
.addCallback(DROP_NORMALIZATION_TRIGGER) // @todo: Remove trigger dropping after December 2023. Assuming everyone would have upgraded by then.
.build();
}
}
@ -65,6 +53,34 @@ public class DictionaryDb {
}
/**
* normalizeWordFrequencies
* Normalizes the word frequencies for all languages that have reached the maximum, as defined in
* the settings.
*
* This query will finish immediately, if there is nothing to do. It's safe to run it often.
*
*/
public static void normalizeWordFrequencies(SettingsStore settings) {
new Thread() {
@Override
public void run() {
long time = System.currentTimeMillis();
int affectedRows = dbInstance.wordsDao().normalizeFrequencies(
settings.getWordFrequencyNormalizationDivider(),
settings.getWordFrequencyMax()
);
Logger.d(
"db.normalizeWordFrequencies",
"Normalized " + affectedRows + " words in: " + (System.currentTimeMillis() - time) + " ms"
);
}
}.start();
}
public static void runInTransaction(Runnable r) {
getInstance().runInTransaction(r);
}
@ -165,10 +181,25 @@ public class DictionaryDb {
public void run() {
try {
int affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word, sequence);
// In case the user has changed the text case, there would be no match.
// Try again with the lowercase equivalent.
String lowercaseWord = "";
if (affectedRows == 0) {
// If the user has changed the case manually, so there would be no matching word.
// In this case, try again with the lowercase equivalent.
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), word.toLowerCase(language.getLocale()), sequence);
lowercaseWord = word.toLowerCase(language.getLocale());
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), lowercaseWord, sequence);
Logger.d("incrementWordFrequency", "Attempting to increment frequency for lowercase variant: " + lowercaseWord);
}
// Some languages permit appending the punctuation to the end of the words, like so: "try,".
// But there are no such words in the dictionary, so try without the punctuation mark.
if (affectedRows == 0 && language.isPunctuationPartOfWords() && sequence.endsWith("1")) {
String truncatedWord = lowercaseWord.substring(0, word.length() - 1);
String truncatedSequence = sequence.substring(0, sequence.length() - 1);
affectedRows = getInstance().wordsDao().incrementFrequency(language.getId(), truncatedWord, truncatedSequence);
Logger.d("incrementWordFrequency", "Attempting to increment frequency with stripped punctuation: " + truncatedWord);
}
Logger.d("incrementWordFrequency", "Affected rows: " + affectedRows);

View file

@ -34,7 +34,7 @@ interface WordsDao {
"lang = :langId " +
"AND seq > :sequence AND seq <= :sequence || '99' " +
"AND (:word IS NULL OR word LIKE :word || '%') " +
"ORDER BY freq DESC, LENGTH(seq) ASC, seq ASC " +
"ORDER BY LENGTH(seq) ASC, freq DESC, seq ASC " +
"LIMIT :limit"
)
List<Word> getFuzzy(int langId, int limit, String sequence, String word);
@ -51,4 +51,16 @@ interface WordsDao {
"WHERE lang = :langId AND word = :word AND seq = :sequence"
)
int incrementFrequency(int langId, String word, String sequence);
@Query(
"UPDATE words " +
"SET freq = freq / :normalizationDivider " +
"WHERE lang IN ( " +
"SELECT lang " +
"FROM words " +
"WHERE freq >= :maxFrequency " +
"GROUP BY lang" +
")"
)
int normalizeFrequencies(int normalizationDivider, int maxFrequency);
}

View file

@ -68,6 +68,7 @@ public class TraditionalT9 extends KeyPadHandler {
self = this;
DictionaryDb.init(this);
DictionaryDb.normalizeWordFrequencies(settings);
if (softKeyHandler == null) {
softKeyHandler = new SoftKeyHandler(this);

View file

@ -8,7 +8,7 @@ import java.util.Arrays;
public class Punctuation {
final public static ArrayList<String> Main = new ArrayList<>(Arrays.asList(
",", ".", "-", "(", ")", "[", "]", "&", "~", "`", "'", ":", ";", "\"", "!", "?"
",", ".", "-", "(", ")", "[", "]", "&", "~", "`", "'", ";", ":", "\"", "!", "?"
));
final public static ArrayList<String> Secondary = new ArrayList<>(Arrays.asList(

View file

@ -23,11 +23,12 @@ public class PreferencesActivity extends AppCompatActivity implements Preference
@Override
protected void onCreate(Bundle savedInstanceState) {
DictionaryDb.init(this);
settings = new SettingsStore(this);
applyTheme();
DictionaryDb.init(this);
DictionaryDb.normalizeWordFrequencies(settings);
super.onCreate(savedInstanceState);
validateFunctionKeys();
buildScreen();

View file

@ -227,6 +227,9 @@ public class SettingsStore {
public int getSoftKeyInitialDelay() { return 250; /* ms */ }
public int getSoftKeyRepeatDelay() { return 40; /* ms */ }
public int getWordFrequencyMax() { return 25500; }
public int getWordFrequencyNormalizationDivider() { return 100; } // normalized frequency = getWordFrequencyMax() / getWordFrequencyNormalizationDivider()
/************* add word, last word *************/