1
0
Fork 0

optimized the long positions caching

This commit is contained in:
sspanak 2024-12-21 17:28:55 +02:00 committed by Dimo Karaivanov
parent 0e5013f320
commit 2898db0702
6 changed files with 21 additions and 68 deletions

View file

@ -1,44 +0,0 @@
package io.github.sspanak.tt9.db.entities;
import androidx.annotation.NonNull;
import java.util.HashMap;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
public class LongPositionsCache {
private final HashMap<Integer, HashMap<String, Integer>> positions = new HashMap<>();
public boolean contains(@NonNull Language language) {
return positions.containsKey(language.getId());
}
public void put(@NonNull Language language, @NonNull String sequence, int wordCount) {
if (wordCount < SettingsStore.SUGGESTIONS_POSITIONS_LIMIT && !contains(language)) {
positions.put(language.getId(), null);
return;
}
HashMap<String, Integer> words = positions.get(language.getId());
if (words == null) {
words = new HashMap<>();
positions.put(language.getId(), words);
}
words.put(sequence, wordCount);
}
public int get(@NonNull Language language, @NonNull String sequence) {
if (!contains(language)) {
return SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
HashMap<String, Integer> words = positions.get(language.getId());
if (words == null) {
return SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
Integer wordCount = words.get(sequence);
return wordCount == null ? SettingsStore.SUGGESTIONS_POSITIONS_LIMIT : wordCount;
}
}

View file

@ -42,10 +42,11 @@ public class InsertOps {
}
public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash) {
SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash) VALUES (?, ?)");
public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash, int maxWordsPerSequence) {
SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash, maxWordsPerSequence) VALUES (?, ?, ?)");
query.bindLong(1, langId);
query.bindString(2, fileHash);
query.bindLong(3, maxWordsPerSequence);
query.execute();
}

View file

@ -13,6 +13,10 @@ class Migration {
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL",
true
),
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN maxWordsPerSequence INTEGER NOT NULL DEFAULT -1",
true
)
};

View file

@ -11,8 +11,8 @@ import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import io.github.sspanak.tt9.db.entities.LongPositionsCache;
import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.db.entities.WordList;
import io.github.sspanak.tt9.db.entities.WordPositionsStringBuilder;
@ -26,7 +26,7 @@ import io.github.sspanak.tt9.util.Logger;
public class ReadOps {
private final String LOG_TAG = "ReadOperations";
private final LongPositionsCache longPositionsCache = new LongPositionsCache();
private final HashMap<Language, Integer> maxWordsPerSequence = new HashMap<>();
/**
@ -256,7 +256,7 @@ public class ReadOps {
.append(sequence)
.append("' OR sequence BETWEEN '").append(sequence).append("1' AND '").append(sequence).append(rangeEnd).append("'");
sql.append(" ORDER BY `start` ");
sql.append(" LIMIT ").append(longPositionsCache.get(language, sequence));
sql.append(" LIMIT ").append(maxWordsPerSequence.get(language));
}
String positionsSql = sql.toString();
@ -342,28 +342,17 @@ public class ReadOps {
/**
* Returns the sequences that result in more words than the standard performance-balanced limit of 100.
* Caches the languages with more than 100 words per a sequence (the balanced performance limit).
*/
public void cacheLongPositionsIfMissing(@NonNull SQLiteDatabase db, @NonNull Language language) {
if (longPositionsCache.contains(language)) {
if (maxWordsPerSequence.containsKey(language)) {
return;
}
String[] select = new String[]{"sequence", "`end` - `start`"};
String table = Tables.getWordPositions(language.getId());
String where = "LENGTH(sequence) > " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
String sql = "SELECT maxWordsPerSequence FROM " + Tables.LANGUAGES_META + " WHERE langId = " + language.getId();
int maxWords = (int) CompiledQueryCache.simpleQueryForLong(db, sql, SettingsStore.SUGGESTIONS_POSITIONS_LIMIT);
maxWords = maxWords > 0 ? maxWords : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
boolean hasResults = false;
try (Cursor cursor = db.query(table, select, where, null, null, null, null)) {
while (cursor.moveToNext()) {
hasResults = true;
longPositionsCache.put(language, cursor.getString(0), cursor.getInt(1));
}
}
if (!hasResults) {
longPositionsCache.put(language, "", 0);
}
maxWordsPerSequence.put(language, maxWords);
}
}

View file

@ -115,7 +115,8 @@ public class Tables {
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
"langId INTEGER UNIQUE NOT NULL, " +
"positionsToNormalize TEXT NULL," +
"fileHash TEXT NOT NULL DEFAULT 0 " +
"fileHash TEXT NOT NULL DEFAULT 0, " +
"maxWordsPerSequence INTEGER NOT NULL DEFAULT -1 " +
")";
}
}

View file

@ -263,6 +263,7 @@ public class DictionaryLoader {
WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1);
float progressRatio = (maxProgress - minProgress) / wordFile.getWords();
int wordCount = 0;
int maxWordCount = 0;
try (BufferedReader ignored = wordFile.getReader()) {
while (wordFile.notEOF()) {
@ -276,6 +277,7 @@ public class DictionaryLoader {
ArrayList<String> words = wordFile.getNextWords(digitSequence);
batch.add(words, digitSequence, wordCount + positionShift);
wordCount += words.size();
maxWordCount = Math.max(maxWordCount, wordCount);
if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) {
saveWordBatch(batch);
@ -290,7 +292,7 @@ public class DictionaryLoader {
}
saveWordBatch(batch);
InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash());
InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash(), maxWordCount);
}