1
0
Fork 0

Removed the hard limit of 20 displayed words and now displaying all exact digit sequence matches + fuzzy matches when the total < 20. This enables missing words in Hindi, Gujarati and possibly others

This commit is contained in:
sspanak 2025-03-29 16:52:12 +02:00 committed by Dimo Karaivanov
parent 02af8561e2
commit 23a4a3530f
11 changed files with 51 additions and 77 deletions

View file

@ -6,30 +6,44 @@ import androidx.annotation.NonNull;
public class WordPositionsStringBuilder { public class WordPositionsStringBuilder {
public int size = 0; private int maxFuzzy = Integer.MAX_VALUE;
private int size = 0;
private final StringBuilder positions = new StringBuilder(); private final StringBuilder positions = new StringBuilder();
public WordPositionsStringBuilder appendFromDbRanges(Cursor cursor) { public WordPositionsStringBuilder appendFromDbRanges(Cursor cursor) {
while (cursor.moveToNext()) { while (cursor.moveToNext()) {
append(cursor.getInt(0), cursor.getInt(1)); append(cursor.getInt(0), cursor.getInt(1), cursor.getInt(2) == 1);
} }
return this; return this;
} }
private void append(int start, int end) { private void append(int start, int end, boolean isExact) {
if (size >= maxFuzzy && !isExact) {
return;
}
if (size > 0) { if (size > 0) {
positions.append(","); positions.append(",");
} }
positions.append(start); positions.append(start);
size++; size++;
for (int position = start + 1; position <= end; position++) { for (int position = start + 1; position <= end && (size < maxFuzzy || isExact); position++) {
positions.append(",").append(position); positions.append(",").append(position);
size++; size++;
} }
} }
public int getSize() {
return size;
}
public WordPositionsStringBuilder setMaxFuzzy(int maxSize) {
this.maxFuzzy = maxSize;
return this;
}
@NonNull @NonNull
@Override @Override
public String toString() { public String toString() {

View file

@ -42,11 +42,10 @@ public class InsertOps {
} }
public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash, int maxWordsPerSequence) { public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash) {
SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash, maxWordsPerSequence) VALUES (?, ?, ?)"); SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash) VALUES (?, ?)");
query.bindLong(1, langId); query.bindLong(1, langId);
query.bindString(2, fileHash); query.bindString(2, fileHash);
query.bindLong(3, maxWordsPerSequence);
query.execute(); query.execute();
} }

View file

@ -1,7 +1,5 @@
package io.github.sspanak.tt9.db.sqlite; package io.github.sspanak.tt9.db.sqlite;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
class Migration { class Migration {
static final Migration[] LIST = { static final Migration[] LIST = {
new Migration( new Migration(
@ -13,19 +11,15 @@ class Migration {
new Migration( new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL" "ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL"
), ),
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN maxWordsPerSequence INTEGER NOT NULL DEFAULT -1"
),
new Migration(
"UPDATE " + Tables.LANGUAGES_META +
" SET maxWordsPerSequence = " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT +
", fileHash = '0'",
832
),
new Migration( new Migration(
// enforce the new Vietnamese layout // enforce the new Vietnamese layout
"DELETE FROM " + Tables.LANGUAGES_META + " WHERE langId = 481590", "DELETE FROM " + Tables.LANGUAGES_META + " WHERE langId = 481590",
952 952
),
new Migration(
// DROP COLUMN is supported in SQLite 3.35.0 which comes with API 34+, so...
"ALTER TABLE " + Tables.LANGUAGES_META + " RENAME COLUMN maxWordsPerSequence TO _delete_me_1",
1009
) )
}; };

View file

@ -11,7 +11,6 @@ import androidx.annotation.NonNull;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import io.github.sspanak.tt9.db.entities.NormalizationList; import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.db.entities.WordList; import io.github.sspanak.tt9.db.entities.WordList;
@ -25,7 +24,6 @@ import io.github.sspanak.tt9.util.Logger;
public class ReadOps { public class ReadOps {
private final String LOG_TAG = "ReadOperations"; private final String LOG_TAG = "ReadOperations";
private final HashMap<Language, Integer> maxWordsPerSequence = new HashMap<>();
/** /**
@ -124,13 +122,13 @@ public class ReadOps {
@NonNull @NonNull
public WordList getWords(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String positions, String filter, int maximumWords, boolean fullOutput) { public WordList getWords(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String positions, String filter, boolean fullOutput) {
if (positions.isEmpty()) { if (positions.isEmpty()) {
Logger.d(LOG_TAG, "No word positions. Not searching words."); Logger.d(LOG_TAG, "No word positions. Not searching words.");
return new WordList(); return new WordList();
} }
String wordsQuery = getWordsQuery(language, positions, filter, maximumWords, fullOutput); String wordsQuery = getWordsQuery(language, positions, filter, fullOutput);
if (wordsQuery.isEmpty() || (cancel != null && cancel.isCanceled())) { if (wordsQuery.isEmpty() || (cancel != null && cancel.isCanceled())) {
return new WordList(); return new WordList();
} }
@ -153,7 +151,7 @@ public class ReadOps {
} }
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, boolean onlyExactSequenceMatches, String wordFilter, int minPositions) { public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, boolean onlyExactSequenceMatches, String wordFilter, int minPositions, int maxPositions) {
int generations; int generations;
if (onlyExactSequenceMatches) { if (onlyExactSequenceMatches) {
@ -166,17 +164,17 @@ public class ReadOps {
}; };
} }
return getWordPositions(db, cancel, language, sequence, generations, minPositions, wordFilter); return getWordPositions(db, cancel, language, sequence, generations, minPositions, maxPositions, wordFilter);
} }
@NonNull @NonNull
public String getWordPositions(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, int generations, int minPositions, String wordFilter) { public String getWordPositions(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, int generations, int minPositions, int maxPositions, String wordFilter) {
if ((sequence.length() == 1 && !language.isTranscribed()) || (cancel != null && cancel.isCanceled())) { if ((sequence.length() == 1 && !language.isTranscribed()) || (cancel != null && cancel.isCanceled())) {
return sequence; return sequence;
} }
WordPositionsStringBuilder positions = new WordPositionsStringBuilder(); WordPositionsStringBuilder positions = new WordPositionsStringBuilder().setMaxFuzzy(maxPositions);
String cachedFactoryPositions = SlowQueryStats.getCachedIfSlow(SlowQueryStats.generateKey(language, sequence, wordFilter, minPositions)); String cachedFactoryPositions = SlowQueryStats.getCachedIfSlow(SlowQueryStats.generateKey(language, sequence, wordFilter, minPositions));
if (cachedFactoryPositions != null) { if (cachedFactoryPositions != null) {
@ -191,8 +189,8 @@ public class ReadOps {
return sequence; return sequence;
} }
if (positions.size < minPositions && generations < Integer.MAX_VALUE) { if (positions.getSize() < minPositions && generations < Integer.MAX_VALUE) {
Logger.d(LOG_TAG, "Not enough positions: " + positions.size + " < " + minPositions + ". Searching for more."); Logger.d(LOG_TAG, "Not enough positions: " + positions.getSize() + " < " + minPositions + ". Searching for more.");
try (Cursor cursor = db.rawQuery(getFactoryWordPositionsQuery(language, sequence, Integer.MAX_VALUE), null, cancel)) { try (Cursor cursor = db.rawQuery(getFactoryWordPositionsQuery(language, sequence, Integer.MAX_VALUE), null, cancel)) {
positions.appendFromDbRanges(cursor); positions.appendFromDbRanges(cursor);
} catch (OperationCanceledException ignored) { } catch (OperationCanceledException ignored) {
@ -217,7 +215,7 @@ public class ReadOps {
private String getPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) { private String getPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) {
return return
"SELECT `start`, `end` FROM ( " + "SELECT `start`, `end`, `exact` FROM ( " +
getFactoryWordPositionsQuery(language, sequence, generations) + getFactoryWordPositionsQuery(language, sequence, generations) +
") UNION " + ") UNION " +
getCustomWordPositionsQuery(language, sequence, generations); getCustomWordPositionsQuery(language, sequence, generations);
@ -230,8 +228,8 @@ public class ReadOps {
*/ */
@NonNull @NonNull
private String getFactoryWordPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) { private String getFactoryWordPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) {
StringBuilder sql = new StringBuilder("SELECT `start`, `end` FROM ") StringBuilder sql = new StringBuilder("SELECT `start`, `end`, LENGTH(`sequence`) = ").append(sequence.length()).append(" AS `exact`")
.append(Tables.getWordPositions(language.getId())) .append(" FROM ").append(Tables.getWordPositions(language.getId()))
.append(" WHERE "); .append(" WHERE ");
if (generations >= 0 && generations < 10) { if (generations >= 0 && generations < 10) {
@ -252,7 +250,7 @@ public class ReadOps {
.append(sequence) .append(sequence)
.append("' OR sequence BETWEEN '").append(sequence).append("1' AND '").append(sequence).append(rangeEnd).append("'"); .append("' OR sequence BETWEEN '").append(sequence).append("1' AND '").append(sequence).append(rangeEnd).append("'");
sql.append(" ORDER BY `start` "); sql.append(" ORDER BY `start` ");
sql.append(" LIMIT ").append(maxWordsPerSequence.get(language)); sql.append(" LIMIT ").append(SettingsStore.SUGGESTIONS_MAX);
} }
String positionsSql = sql.toString(); String positionsSql = sql.toString();
@ -267,7 +265,8 @@ public class ReadOps {
*/ */
@NonNull @NonNull
private String getCustomWordPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) { private String getCustomWordPositionsQuery(@NonNull Language language, @NonNull String sequence, int generations) {
String sql = "SELECT -id as `start`, -id as `end` FROM " + Tables.CUSTOM_WORDS + String sql = "SELECT -id as `start`, -id as `end`, LENGTH(`sequence`) = " + sequence.length() + " as `exact` " +
" FROM " + Tables.CUSTOM_WORDS +
" WHERE langId = " + language.getId() + " WHERE langId = " + language.getId() +
" AND (sequence = " + sequence; " AND (sequence = " + sequence;
@ -282,7 +281,7 @@ public class ReadOps {
} }
@NonNull private String getWordsQuery(@NonNull Language language, @NonNull String positions, @NonNull String filter, int maxWords, boolean fullOutput) { @NonNull private String getWordsQuery(@NonNull Language language, @NonNull String positions, @NonNull String filter, boolean fullOutput) {
StringBuilder sql = new StringBuilder(); StringBuilder sql = new StringBuilder();
sql sql
.append("SELECT word"); .append("SELECT word");
@ -299,12 +298,6 @@ public class ReadOps {
sql.append(" ORDER BY LENGTH(word), frequency DESC"); sql.append(" ORDER BY LENGTH(word), frequency DESC");
if (maxWords < 0 && maxWordsPerSequence.containsKey(language)) {
Integer limit = maxWordsPerSequence.get(language);
maxWords = limit != null ? limit : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
sql.append(" LIMIT ").append(maxWords);
String wordsSql = sql.toString(); String wordsSql = sql.toString();
Logger.v(LOG_TAG, "Words SQL: " + wordsSql); Logger.v(LOG_TAG, "Words SQL: " + wordsSql);
return wordsSql; return wordsSql;
@ -339,20 +332,4 @@ public class ReadOps {
return pairs; return pairs;
} }
/**
* Caches the languages with more than 100 words per a sequence (the balanced performance limit).
*/
public void cacheLongPositionsIfMissing(@NonNull SQLiteDatabase db, @NonNull Language language) {
if (maxWordsPerSequence.containsKey(language)) {
return;
}
String sql = "SELECT maxWordsPerSequence FROM " + Tables.LANGUAGES_META + " WHERE langId = " + language.getId();
int maxWords = (int) CompiledQueryCache.simpleQueryForLong(db, sql, SettingsStore.SUGGESTIONS_POSITIONS_LIMIT);
maxWords = maxWords > 0 ? maxWords : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
maxWordsPerSequence.put(language, maxWords);
}
} }

View file

@ -115,8 +115,7 @@ public class Tables {
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" + return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
"langId INTEGER UNIQUE NOT NULL, " + "langId INTEGER UNIQUE NOT NULL, " +
"positionsToNormalize TEXT NULL," + "positionsToNormalize TEXT NULL," +
"fileHash TEXT NOT NULL DEFAULT 0, " + "fileHash TEXT NOT NULL DEFAULT 0 " +
"maxWordsPerSequence INTEGER NOT NULL DEFAULT -1 " +
")"; ")";
} }
} }

View file

@ -263,7 +263,6 @@ public class DictionaryLoader {
WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1); WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1);
float progressRatio = (maxProgress - minProgress) / wordFile.getWords(); float progressRatio = (maxProgress - minProgress) / wordFile.getWords();
int wordCount = 0; int wordCount = 0;
int maxWordsPerSequence = 0;
positionShift = positionShift == 0 ? 1 : positionShift; positionShift = positionShift == 0 ? 1 : positionShift;
@ -279,7 +278,6 @@ public class DictionaryLoader {
ArrayList<String> words = wordFile.getNextWords(digitSequence); ArrayList<String> words = wordFile.getNextWords(digitSequence);
batch.add(words, digitSequence, wordCount + positionShift); batch.add(words, digitSequence, wordCount + positionShift);
wordCount += words.size(); wordCount += words.size();
maxWordsPerSequence = Math.max(maxWordsPerSequence, words.size());
if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) { if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) {
saveWordBatch(batch); saveWordBatch(batch);
@ -294,7 +292,7 @@ public class DictionaryLoader {
} }
saveWordBatch(batch); saveWordBatch(batch);
InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash(), maxWordsPerSequence); InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash());
} }

View file

@ -77,23 +77,19 @@ public class WordStore extends BaseSyncStore {
return new ArrayList<>(); return new ArrayList<>();
} }
Timer.start("cache_long_positions");
readOps.cacheLongPositionsIfMissing(sqlite.getDb(), language);
long longPositionsTime = Timer.stop("cache_long_positions");
final int minWords = Math.max(minimumWords, 0); final int minWords = Math.max(minimumWords, 0);
final int maxWords = maximumWords >= 0 ? Math.max(maximumWords, minWords) : maximumWords; final int maxWords = maximumWords >= 0 ? Math.max(maximumWords, minWords) : maximumWords;
final String filter = wordFilter == null ? "" : wordFilter; final String filter = wordFilter == null ? "" : wordFilter;
Timer.start("get_positions"); Timer.start("get_positions");
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, onlyExactSequence, filter, minWords); String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, onlyExactSequence, filter, minWords, maxWords);
long positionsTime = Timer.stop("get_positions"); long positionsTime = Timer.stop("get_positions");
Timer.start("get_words"); Timer.start("get_words");
ArrayList<String> words = readOps.getWords(sqlite.getDb(), cancel, language, positions, filter, maxWords, false).toStringList(); ArrayList<String> words = readOps.getWords(sqlite.getDb(), cancel, language, positions, filter, false).toStringList();
long wordsTime = Timer.stop("get_words"); long wordsTime = Timer.stop("get_words");
printLoadingSummary(sequence, words, longPositionsTime, positionsTime, wordsTime); printLoadingSummary(sequence, words, positionsTime, wordsTime);
if (!cancel.isCanceled()) { // do not cache empty results from aborted queries if (!cancel.isCanceled()) { // do not cache empty results from aborted queries
SlowQueryStats.add(language, sequence, wordFilter, minWords, (int) (positionsTime + wordsTime), positions); SlowQueryStats.add(language, sequence, wordFilter, minWords, (int) (positionsTime + wordsTime), positions);
} }
@ -179,8 +175,8 @@ public class WordStore extends BaseSyncStore {
try { try {
Timer.start(LOG_TAG); Timer.start(LOG_TAG);
String topWordPositions = readOps.getWordPositions(sqlite.getDb(), null, language, sequence, 0, 0, ""); String topWordPositions = readOps.getWordPositions(sqlite.getDb(), null, language, sequence, 0, 0, Integer.MAX_VALUE, "");
WordList topWords = readOps.getWords(sqlite.getDb(), null, language, topWordPositions, "", 9999, true); WordList topWords = readOps.getWords(sqlite.getDb(), null, language, topWordPositions, "", true);
if (topWords.isEmpty()) { if (topWords.isEmpty()) {
throw new Exception("No such word"); throw new Exception("No such word");
} }
@ -245,7 +241,7 @@ public class WordStore extends BaseSyncStore {
} }
private void printLoadingSummary(String sequence, ArrayList<String> words, long longPositionsTime, long positionIndexTime, long wordsTime) { private void printLoadingSummary(String sequence, ArrayList<String> words, long positionIndexTime, long wordsTime) {
if (!Logger.isDebugLevel()) { if (!Logger.isDebugLevel()) {
return; return;
} }
@ -255,7 +251,6 @@ public class WordStore extends BaseSyncStore {
.append("\nWord Count: ").append(words.size()) .append("\nWord Count: ").append(words.size())
.append(".\nTime: ").append(positionIndexTime + wordsTime) .append(".\nTime: ").append(positionIndexTime + wordsTime)
.append(" ms (positions: ").append(positionIndexTime) .append(" ms (positions: ").append(positionIndexTime)
.append(" ms, long positions: ").append(longPositionsTime)
.append(" ms, words: ").append(wordsTime).append(" ms)."); .append(" ms, words: ").append(wordsTime).append(" ms).");
if (words.isEmpty()) { if (words.isEmpty()) {

View file

@ -21,7 +21,6 @@ public class IdeogramPredictions extends WordPredictions {
public IdeogramPredictions(SettingsStore settings, TextField textField) { public IdeogramPredictions(SettingsStore settings, TextField textField) {
super(settings, textField); super(settings, textField);
minWords = 1; minWords = 1;
maxWords = -1;
onlyExactMatches = true; onlyExactMatches = true;
} }

View file

@ -16,7 +16,7 @@ abstract public class Predictions {
@NonNull protected String digitSequence = ""; @NonNull protected String digitSequence = "";
@NonNull protected Language language = new NullLanguage(); @NonNull protected Language language = new NullLanguage();
protected int minWords = SettingsStore.SUGGESTIONS_MIN; protected int minWords = SettingsStore.SUGGESTIONS_MIN;
protected int maxWords = SettingsStore.SUGGESTIONS_MAX; protected final int maxWords = SettingsStore.SUGGESTIONS_MAX;
protected boolean onlyExactMatches = false; protected boolean onlyExactMatches = false;
@NonNull protected String stem = ""; @NonNull protected String stem = "";

View file

@ -81,8 +81,8 @@ public class WordPredictions extends Predictions {
digitSequence.substring(1), digitSequence.substring(1),
onlyExactMatches, onlyExactMatches,
stem.length() > 1 ? stem.substring(1) : "", stem.length() > 1 ? stem.substring(1) : "",
SettingsStore.SUGGESTIONS_MIN, minWords,
SettingsStore.SUGGESTIONS_MAX maxWords
); );
} }

View file

@ -33,7 +33,6 @@ public class SettingsStore extends SettingsHotkeys {
public final static float SOFT_KEY_V_SHAPE_RATIO_OUTER = (float) Math.pow(SOFT_KEY_V_SHAPE_RATIO_INNER, 2); public final static float SOFT_KEY_V_SHAPE_RATIO_OUTER = (float) Math.pow(SOFT_KEY_V_SHAPE_RATIO_INNER, 2);
public final static int SUGGESTIONS_MAX = 20; public final static int SUGGESTIONS_MAX = 20;
public final static int SUGGESTIONS_MIN = 8; public final static int SUGGESTIONS_MIN = 8;
public final static int SUGGESTIONS_POSITIONS_LIMIT = 100;
public final static int SUGGESTIONS_RENDER_DEBOUNCE_TIME = 25; // ms public final static int SUGGESTIONS_RENDER_DEBOUNCE_TIME = 25; // ms
public final static int SUGGESTIONS_RENDER_CLEAR_DEBOUNCE_TIME = 60; // ms public final static int SUGGESTIONS_RENDER_CLEAR_DEBOUNCE_TIME = 60; // ms
public final static int SUGGESTIONS_SELECT_ANIMATION_DURATION = 66; public final static int SUGGESTIONS_SELECT_ANIMATION_DURATION = 66;