1
0
Fork 0

optimized the long positions caching

This commit is contained in:
sspanak 2024-12-21 17:28:55 +02:00 committed by Dimo Karaivanov
parent 0e5013f320
commit 2898db0702
6 changed files with 21 additions and 68 deletions

View file

@ -1,44 +0,0 @@
package io.github.sspanak.tt9.db.entities;
import androidx.annotation.NonNull;
import java.util.HashMap;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
public class LongPositionsCache {
private final HashMap<Integer, HashMap<String, Integer>> positions = new HashMap<>();
public boolean contains(@NonNull Language language) {
return positions.containsKey(language.getId());
}
public void put(@NonNull Language language, @NonNull String sequence, int wordCount) {
if (wordCount < SettingsStore.SUGGESTIONS_POSITIONS_LIMIT && !contains(language)) {
positions.put(language.getId(), null);
return;
}
HashMap<String, Integer> words = positions.get(language.getId());
if (words == null) {
words = new HashMap<>();
positions.put(language.getId(), words);
}
words.put(sequence, wordCount);
}
public int get(@NonNull Language language, @NonNull String sequence) {
if (!contains(language)) {
return SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
HashMap<String, Integer> words = positions.get(language.getId());
if (words == null) {
return SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
Integer wordCount = words.get(sequence);
return wordCount == null ? SettingsStore.SUGGESTIONS_POSITIONS_LIMIT : wordCount;
}
}

View file

@ -42,10 +42,11 @@ public class InsertOps {
} }
public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash) { public static void replaceLanguageMeta(@NonNull SQLiteDatabase db, int langId, String fileHash, int maxWordsPerSequence) {
SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash) VALUES (?, ?)"); SQLiteStatement query = CompiledQueryCache.get(db, "REPLACE INTO " + Tables.LANGUAGES_META + " (langId, fileHash, maxWordsPerSequence) VALUES (?, ?, ?)");
query.bindLong(1, langId); query.bindLong(1, langId);
query.bindString(2, fileHash); query.bindString(2, fileHash);
query.bindLong(3, maxWordsPerSequence);
query.execute(); query.execute();
} }

View file

@ -13,6 +13,10 @@ class Migration {
new Migration( new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL", "ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL",
true true
),
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN maxWordsPerSequence INTEGER NOT NULL DEFAULT -1",
true
) )
}; };

View file

@ -11,8 +11,8 @@ import androidx.annotation.NonNull;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import io.github.sspanak.tt9.db.entities.LongPositionsCache;
import io.github.sspanak.tt9.db.entities.NormalizationList; import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.db.entities.WordList; import io.github.sspanak.tt9.db.entities.WordList;
import io.github.sspanak.tt9.db.entities.WordPositionsStringBuilder; import io.github.sspanak.tt9.db.entities.WordPositionsStringBuilder;
@ -26,7 +26,7 @@ import io.github.sspanak.tt9.util.Logger;
public class ReadOps { public class ReadOps {
private final String LOG_TAG = "ReadOperations"; private final String LOG_TAG = "ReadOperations";
private final LongPositionsCache longPositionsCache = new LongPositionsCache(); private final HashMap<Language, Integer> maxWordsPerSequence = new HashMap<>();
/** /**
@ -256,7 +256,7 @@ public class ReadOps {
.append(sequence) .append(sequence)
.append("' OR sequence BETWEEN '").append(sequence).append("1' AND '").append(sequence).append(rangeEnd).append("'"); .append("' OR sequence BETWEEN '").append(sequence).append("1' AND '").append(sequence).append(rangeEnd).append("'");
sql.append(" ORDER BY `start` "); sql.append(" ORDER BY `start` ");
sql.append(" LIMIT ").append(longPositionsCache.get(language, sequence)); sql.append(" LIMIT ").append(maxWordsPerSequence.get(language));
} }
String positionsSql = sql.toString(); String positionsSql = sql.toString();
@ -342,28 +342,17 @@ public class ReadOps {
/** /**
* Returns the sequences that result in more words than the standard performance-balanced limit of 100. * Caches the languages with more than 100 words per a sequence (the balanced performance limit).
*/ */
public void cacheLongPositionsIfMissing(@NonNull SQLiteDatabase db, @NonNull Language language) { public void cacheLongPositionsIfMissing(@NonNull SQLiteDatabase db, @NonNull Language language) {
if (longPositionsCache.contains(language)) { if (maxWordsPerSequence.containsKey(language)) {
return; return;
} }
String[] select = new String[]{"sequence", "`end` - `start`"}; String sql = "SELECT maxWordsPerSequence FROM " + Tables.LANGUAGES_META + " WHERE langId = " + language.getId();
String table = Tables.getWordPositions(language.getId()); int maxWords = (int) CompiledQueryCache.simpleQueryForLong(db, sql, SettingsStore.SUGGESTIONS_POSITIONS_LIMIT);
String where = "LENGTH(sequence) > " + SettingsStore.SUGGESTIONS_POSITIONS_LIMIT; maxWords = maxWords > 0 ? maxWords : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
boolean hasResults = false; maxWordsPerSequence.put(language, maxWords);
try (Cursor cursor = db.query(table, select, where, null, null, null, null)) {
while (cursor.moveToNext()) {
hasResults = true;
longPositionsCache.put(language, cursor.getString(0), cursor.getInt(1));
}
}
if (!hasResults) {
longPositionsCache.put(language, "", 0);
}
} }
} }

View file

@ -115,7 +115,8 @@ public class Tables {
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" + return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
"langId INTEGER UNIQUE NOT NULL, " + "langId INTEGER UNIQUE NOT NULL, " +
"positionsToNormalize TEXT NULL," + "positionsToNormalize TEXT NULL," +
"fileHash TEXT NOT NULL DEFAULT 0 " + "fileHash TEXT NOT NULL DEFAULT 0, " +
"maxWordsPerSequence INTEGER NOT NULL DEFAULT -1 " +
")"; ")";
} }
} }

View file

@ -263,6 +263,7 @@ public class DictionaryLoader {
WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1); WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1);
float progressRatio = (maxProgress - minProgress) / wordFile.getWords(); float progressRatio = (maxProgress - minProgress) / wordFile.getWords();
int wordCount = 0; int wordCount = 0;
int maxWordCount = 0;
try (BufferedReader ignored = wordFile.getReader()) { try (BufferedReader ignored = wordFile.getReader()) {
while (wordFile.notEOF()) { while (wordFile.notEOF()) {
@ -276,6 +277,7 @@ public class DictionaryLoader {
ArrayList<String> words = wordFile.getNextWords(digitSequence); ArrayList<String> words = wordFile.getNextWords(digitSequence);
batch.add(words, digitSequence, wordCount + positionShift); batch.add(words, digitSequence, wordCount + positionShift);
wordCount += words.size(); wordCount += words.size();
maxWordCount = Math.max(maxWordCount, wordCount);
if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) { if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) {
saveWordBatch(batch); saveWordBatch(batch);
@ -290,7 +292,7 @@ public class DictionaryLoader {
} }
saveWordBatch(batch); saveWordBatch(batch);
InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash()); InsertOps.replaceLanguageMeta(sqlite.getDb(), language.getId(), wordFile.getHash(), maxWordCount);
} }