1
0
Fork 0

Removed the hard limit of 20 displayed words and now displaying all exact digit sequence matches + fuzzy matches when the total < 20. This enables missing words in Hindi, Gujarati and possibly others

This commit is contained in:
sspanak 2025-03-29 16:52:12 +02:00 committed by Dimo Karaivanov
parent 02af8561e2
commit 23a4a3530f
11 changed files with 51 additions and 77 deletions

View file

@ -6,30 +6,44 @@ import androidx.annotation.NonNull;
public class WordPositionsStringBuilder {
public int size = 0;
private int maxFuzzy = Integer.MAX_VALUE;
private int size = 0;
private final StringBuilder positions = new StringBuilder();
public WordPositionsStringBuilder appendFromDbRanges(Cursor cursor) {
while (cursor.moveToNext()) {
append(cursor.getInt(0), cursor.getInt(1));
append(cursor.getInt(0), cursor.getInt(1), cursor.getInt(2) == 1);
}
return this;
}
private void append(int start, int end) {
private void append(int start, int end, boolean isExact) {
if (size >= maxFuzzy && !isExact) {
return;
}
if (size > 0) {
positions.append(",");
}
positions.append(start);
size++;
for (int position = start + 1; position <= end; position++) {
for (int position = start + 1; position <= end && (size < maxFuzzy || isExact); position++) {
positions.append(",").append(position);
size++;
}
}
public int getSize() {
return size;
}
public WordPositionsStringBuilder setMaxFuzzy(int maxSize) {
this.maxFuzzy = maxSize;
return this;
}
@NonNull
@Override
public String toString() {

View file

@ -42,11 +42,10 @@ public class InsertOps {
}
public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash, int maxWordsPerSequence) {
SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash, maxWordsPerSequence) VALUES (?, ?, ?)");
public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash) {
SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash) VALUES (?, ?)");
query.bindLong(1, langId);
query.bindString(2, fileHash);
query.bindLong(3, maxWordsPerSequence);
query.execute();
}

View file

@ -1,7 +1,5 @@
package io.github.sspanak.tt9.db.sqlite;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
class Migration {
static final Migration[] LIST = {
new Migration(
@ -13,19 +11,15 @@ class Migration {
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL"
),
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN maxWordsPerSequence INTEGER NOT NULL DEFAULT -1"
),
new Migration(
"UPDATE " + Tables.LANGUAGES_META +
" SET maxWordsPerSequence = " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT +
", fileHash = '0'",
832
),
new Migration(
// enforce the new Vietnamese layout
"DELETE FROM " + Tables.LANGUAGES_META + " WHERE langId = 481590",
952
),
new Migration(
// DROP COLUMN is supported in SQLite 3.35.0 which comes with API 34+, so...
"ALTER TABLE " + Tables.LANGUAGES_META + " RENAME COLUMN maxWordsPerSequence TO _delete_me_1",
1009
)
};

View file

@ -11,7 +11,6 @@ import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.db.entities.WordList;
@ -25,7 +24,6 @@ import io.github.sspanak.tt9.util.Logger;
public class ReadOps {
private final String LOG_TAG = "ReadOperations";
private final HashMap<Language, Integer> maxWordsPerSequence = new HashMap<>();
/**
@ -124,13 +122,13 @@ public class ReadOps {
@NonNull
public WordList getWords(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String positions, String filter, int maximumWords, boolean fullOutput) {
public WordList getWords(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String positions, String filter, boolean fullOutput) {
if (positions.isEmpty()) {
Logger.d(LOG_TAG, "No word positions. Not searching words.");
return new WordList();
}
String wordsQuery = getWordsQuery(language, positions, filter, maximumWords, fullOutput);
String wordsQuery = getWordsQuery(language, positions, filter, fullOutput);
if (wordsQuery.isEmpty() || (cancel != null && cancel.isCanceled())) {
return new WordList();
}
@ -153,7 +151,7 @@ public class ReadOps {
}
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, boolean onlyExactSequenceMatches, String wordFilter, int minPositions) {
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, boolean onlyExactSequenceMatches, String wordFilter, int minPositions, int maxPositions) {
int generations;
if (onlyExactSequenceMatches) {
@ -166,17 +164,17 @@ public class ReadOps {
};
}
return getWordPositions(db, cancel, language, sequence, generations, minPositions, wordFilter);
return getWordPositions(db, cancel, language, sequence, generations, minPositions, maxPositions, wordFilter);
}
@NonNull
public String getWordPositions(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, int generations, int minPositions, String wordFilter) {
public String getWordPositions(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, int generations, int minPositions, int maxPositions, String wordFilter) {
if ((sequence.length() == 1 && !language.isTranscribed()) || (cancel != null && cancel.isCanceled())) {
return sequence;
}
WordPositionsStringBuilder positions = new WordPositionsStringBuilder();
WordPositionsStringBuilder positions = new WordPositionsStringBuilder().setMaxFuzzy(maxPositions);
String cachedFactoryPositions = SlowQueryStats.getCachedIfSlow(SlowQueryStats.generateKey(language, sequence, wordFilter, minPositions));
if (cachedFactoryPositions != null) {
@ -191,8 +189,8 @@ public class ReadOps {
return sequence;
}
if (positions.size < minPositions && generations < Integer.MAX_VALUE) {
Logger.d(LOG_TAG, "Not enough positions: " + positions.size + " < " + minPositions + ". Searching for more.");
if (positions.getSize() < minPositions && generations < Integer.MAX_VALUE) {
Logger.d(LOG_TAG, "Not enough positions: " + positions.getSize() + " < " + minPositions + ". Searching for more.");
try (Cursor cursor = db.rawQuery(getFactoryWordPositionsQuery(language, sequence, Integer.MAX_VALUE), null, cancel)) {
positions.appendFromDbRanges(cursor);
} catch (OperationCanceledException ignored) {
@ -217,7 +215,7 @@ public class ReadOps {
private String getPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) {
return
"SELECT `start`, `end` FROM ( " +
"SELECT `start`, `end`, `exact` FROM ( " +
getFactoryWordPositionsQuery(language, sequence, generations) +
") UNION " +
getCustomWordPositionsQuery(language, sequence, generations);
@ -230,8 +228,8 @@ public class ReadOps {
*/
@NonNull
private String getFactoryWordPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) {
StringBuilder sql = new StringBuilder("SELECT `start`, `end` FROM ")
.append(Tables.getWordPositions(language.getId()))
StringBuilder sql = new StringBuilder("SELECT `start`, `end`, LENGTH(`sequence`) = ").append(sequence.length()).append(" AS `exact`")
.append(" FROM ").append(Tables.getWordPositions(language.getId()))
.append(" WHERE ");
if (generations >= 0 && generations < 10) {
@ -252,7 +250,7 @@ public class ReadOps {
.append(sequence)
.append("' OR sequence BETWEEN '").append(sequence).append("1' AND '").append(sequence).append(rangeEnd).append("'");
sql.append(" ORDER BY `start` ");
sql.append(" LIMIT ").append(maxWordsPerSequence.get(language));
sql.append(" LIMIT ").append(SettingsStore.SUGGESTIONS_MAX);
}
String positionsSql = sql.toString();
@ -267,7 +265,8 @@ public class ReadOps {
*/
@NonNull
private String getCustomWordPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) {
String sql = "SELECT -id as `start`, -id as `end` FROM " + Tables.CUSTOM_WORDS +
String sql = "SELECT -id as `start`, -id as `end`, LENGTH(`sequence`) = " + sequence.length() + " as `exact` " +
" FROM " + Tables.CUSTOM_WORDS +
" WHERE langId = " + language.getId() +
" AND (sequence = " + sequence;
@ -282,7 +281,7 @@ public class ReadOps {
}
@NonNull private String getWordsQuery(@NonNull Language language, @NonNull String positions, @NonNull String filter, int maxWords, boolean fullOutput) {
@NonNull private String getWordsQuery(@NonNull Language language, @NonNull String positions, @NonNull String filter, boolean fullOutput) {
StringBuilder sql = new StringBuilder();
sql
.append("SELECT word");
@ -299,12 +298,6 @@ public class ReadOps {
sql.append(" ORDER BY LENGTH(word), frequency DESC");
if (maxWords < 0 && maxWordsPerSequence.containsKey(language)) {
Integer limit = maxWordsPerSequence.get(language);
maxWords = limit != null ? limit : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
sql.append(" LIMIT ").append(maxWords);
String wordsSql = sql.toString();
Logger.v(LOG_TAG, "Words SQL: " + wordsSql);
return wordsSql;
@ -339,20 +332,4 @@ public class ReadOps {
return pairs;
}
/**
* Caches the languages with more than 100 words per a sequence (the balanced performance limit).
*/
public void cacheLongPositionsIfMissing(@NonNull SQLiteDatabase db, @NonNull Language language) {
if (maxWordsPerSequence.containsKey(language)) {
return;
}
String sql = "SELECT maxWordsPerSequence FROM " + Tables.LANGUAGES_META + " WHERE langId = " + language.getId();
int maxWords = (int) CompiledQueryCache.simpleQueryForLong(db, sql, SettingsStore.SUGGESTIONS_POSITIONS_LIMIT);
maxWords = maxWords > 0 ? maxWords : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
maxWordsPerSequence.put(language, maxWords);
}
}

View file

@ -115,8 +115,7 @@ public class Tables {
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
"langId INTEGER UNIQUE NOT NULL, " +
"positionsToNormalize TEXT NULL," +
"fileHash TEXT NOT NULL DEFAULT 0, " +
"maxWordsPerSequence INTEGER NOT NULL DEFAULT -1 " +
"fileHash TEXT NOT NULL DEFAULT 0 " +
")";
}
}

View file

@ -263,7 +263,6 @@ public class DictionaryLoader {
WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1);
float progressRatio = (maxProgress - minProgress) / wordFile.getWords();
int wordCount = 0;
int maxWordsPerSequence = 0;
positionShift = positionShift == 0 ? 1 : positionShift;
@ -279,7 +278,6 @@ public class DictionaryLoader {
ArrayList<String> words = wordFile.getNextWords(digitSequence);
batch.add(words, digitSequence, wordCount + positionShift);
wordCount += words.size();
maxWordsPerSequence = Math.max(maxWordsPerSequence, words.size());
if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) {
saveWordBatch(batch);
@ -294,7 +292,7 @@ public class DictionaryLoader {
}
saveWordBatch(batch);
InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash(), maxWordsPerSequence);
InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash());
}

View file

@ -77,23 +77,19 @@ public class WordStore extends BaseSyncStore {
return new ArrayList<>();
}
Timer.start("cache_long_positions");
readOps.cacheLongPositionsIfMissing(sqlite.getDb(), language);
long longPositionsTime = Timer.stop("cache_long_positions");
final int minWords = Math.max(minimumWords, 0);
final int maxWords = maximumWords >= 0 ? Math.max(maximumWords, minWords) : maximumWords;
final String filter = wordFilter == null ? "" : wordFilter;
Timer.start("get_positions");
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, onlyExactSequence, filter, minWords);
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, onlyExactSequence, filter, minWords, maxWords);
long positionsTime = Timer.stop("get_positions");
Timer.start("get_words");
ArrayList<String> words = readOps.getWords(sqlite.getDb(), cancel, language, positions, filter, maxWords, false).toStringList();
ArrayList<String> words = readOps.getWords(sqlite.getDb(), cancel, language, positions, filter, false).toStringList();
long wordsTime = Timer.stop("get_words");
printLoadingSummary(sequence, words, longPositionsTime, positionsTime, wordsTime);
printLoadingSummary(sequence, words, positionsTime, wordsTime);
if (!cancel.isCanceled()) { // do not cache empty results from aborted queries
SlowQueryStats.add(language, sequence, wordFilter, minWords, (int) (positionsTime + wordsTime), positions);
}
@ -179,8 +175,8 @@ public class WordStore extends BaseSyncStore {
try {
Timer.start(LOG_TAG);
String topWordPositions = readOps.getWordPositions(sqlite.getDb(), null, language, sequence, 0, 0, "");
WordList topWords = readOps.getWords(sqlite.getDb(), null, language, topWordPositions, "", 9999, true);
String topWordPositions = readOps.getWordPositions(sqlite.getDb(), null, language, sequence, 0, 0, Integer.MAX_VALUE, "");
WordList topWords = readOps.getWords(sqlite.getDb(), null, language, topWordPositions, "", true);
if (topWords.isEmpty()) {
throw new Exception("No such word");
}
@ -245,7 +241,7 @@ public class WordStore extends BaseSyncStore {
}
private void printLoadingSummary(String sequence, ArrayList<String> words, long longPositionsTime, long positionIndexTime, long wordsTime) {
private void printLoadingSummary(String sequence, ArrayList<String> words, long positionIndexTime, long wordsTime) {
if (!Logger.isDebugLevel()) {
return;
}
@ -255,7 +251,6 @@ public class WordStore extends BaseSyncStore {
.append("\nWord Count: ").append(words.size())
.append(".\nTime: ").append(positionIndexTime + wordsTime)
.append(" ms (positions: ").append(positionIndexTime)
.append(" ms, long positions: ").append(longPositionsTime)
.append(" ms, words: ").append(wordsTime).append(" ms).");
if (words.isEmpty()) {

View file

@ -21,7 +21,6 @@ public class IdeogramPredictions extends WordPredictions {
public IdeogramPredictions(SettingsStore settings, TextField textField) {
super(settings, textField);
minWords = 1;
maxWords = -1;
onlyExactMatches = true;
}

View file

@ -16,7 +16,7 @@ abstract public class Predictions {
@NonNull protected String digitSequence = "";
@NonNull protected Language language = new NullLanguage();
protected int minWords = SettingsStore.SUGGESTIONS_MIN;
protected int maxWords = SettingsStore.SUGGESTIONS_MAX;
protected final int maxWords = SettingsStore.SUGGESTIONS_MAX;
protected boolean onlyExactMatches = false;
@NonNull protected String stem = "";

View file

@ -81,8 +81,8 @@ public class WordPredictions extends Predictions {
digitSequence.substring(1),
onlyExactMatches,
stem.length() > 1 ? stem.substring(1) : "",
SettingsStore.SUGGESTIONS_MIN,
SettingsStore.SUGGESTIONS_MAX
minWords,
maxWords
);
}

View file

@ -33,7 +33,6 @@ public class SettingsStore extends SettingsHotkeys {
public final static float SOFT_KEY_V_SHAPE_RATIO_OUTER = (float) Math.pow(SOFT_KEY_V_SHAPE_RATIO_INNER, 2);
public final static int SUGGESTIONS_MAX = 20;
public final static int SUGGESTIONS_MIN = 8;
public final static int SUGGESTIONS_POSITIONS_LIMIT = 100;
public final static int SUGGESTIONS_RENDER_DEBOUNCE_TIME = 25; // ms
public final static int SUGGESTIONS_RENDER_CLEAR_DEBOUNCE_TIME = 60; // ms
public final static int SUGGESTIONS_SELECT_ANIMATION_DURATION = 66;