1
0
Fork 0

simplified normalization algorithm: now only the sequnce over the top is normalized, not the entire language

This commit is contained in:
sspanak 2024-05-17 20:29:29 +03:00 committed by Dimo Karaivanov
parent f98754cf2d
commit 17b8a30f44
8 changed files with 81 additions and 24 deletions

View file

@ -6,6 +6,7 @@ import androidx.annotation.NonNull;
import java.util.ArrayList;
import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.db.entities.Word;
import io.github.sspanak.tt9.db.entities.WordList;
import io.github.sspanak.tt9.db.sqlite.DeleteOps;
@ -224,7 +225,7 @@ public class WordStore {
}
if (newTopFrequency > SettingsStore.WORD_FREQUENCY_MAX) {
scheduleNormalization(language);
scheduleNormalization(language, topWordPositions);
}
Logger.d(LOG_TAG, "Changed frequency of '" + word + "' to: " + newTopFrequency + ". Time: " + Timer.stop(LOG_TAG) + " ms");
@ -243,11 +244,11 @@ public class WordStore {
try {
sqlite.beginTransaction();
int nextLangId = readOps.getNextInNormalizationQueue(sqlite.getDb());
UpdateOps.normalize(sqlite.getDb(), nextLangId);
NormalizationList normalizationList = readOps.getNextInNormalizationQueue(sqlite.getDb());
UpdateOps.normalize(sqlite.getDb(), normalizationList);
sqlite.finishTransaction();
String message = nextLangId > 0 ? "Normalized language: " + nextLangId : "No languages to normalize";
String message = normalizationList.langId > 0 ? "Normalized language: " + normalizationList.langId + ", positions: " + normalizationList.positions : "No languages to normalize";
Logger.d(LOG_TAG, message + ". Time: " + Timer.stop(LOG_TAG) + " ms");
} catch (Exception e) {
sqlite.failTransaction();
@ -256,9 +257,9 @@ public class WordStore {
}
public void scheduleNormalization(Language language) {
if (language != null && !(language instanceof NullLanguage) && checkOrNotify()) {
UpdateOps.scheduleNormalization(sqlite.getDb(), language);
public void scheduleNormalization(Language language, String positions) {
if (language != null && !(language instanceof NullLanguage) && positions != null && !positions.isEmpty() && checkOrNotify()) {
UpdateOps.scheduleNormalization(sqlite.getDb(), language, positions);
}
}

View file

@ -0,0 +1,35 @@
package io.github.sspanak.tt9.db.entities;
import java.util.regex.Pattern;
public class NormalizationList {
private final static Pattern validPositions = Pattern.compile("^[0-9,]+\\d|\\d+$");
public int langId = -1;
public String positions = null;
public NormalizationList(String rawNormalizationResponse) {
if (rawNormalizationResponse == null) {
return;
}
String[] parts = rawNormalizationResponse.split(",", 2);
if (arePartsValid(parts)) {
langId = Integer.parseInt(parts[0]);
positions = parts[1];
}
}
private boolean arePartsValid(String[] parts) {
if (parts.length != 2 || !validPositions.matcher(parts[1]).matches()) {
return false;
}
try {
Integer.parseInt(parts[0]);
return true;
} catch (Exception e) {
return false;
}
}
}

View file

@ -60,4 +60,12 @@ class CompiledQueryCache {
static long simpleQueryForLong(SQLiteDatabase db, String sql, long defaultValue) {
return getInstance(db).simpleQueryForLong(sql, defaultValue);
}
static String simpleQueryForString(SQLiteDatabase db, String sql, String defaultValue) {
try {
return get(db, sql).simpleQueryForString();
} catch (SQLiteDoneException e) {
return defaultValue;
}
}
}

View file

@ -5,6 +5,14 @@ class Migration {
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN fileHash TEXT NOT NULL DEFAULT 0",
true
),
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " RENAME COLUMN normalizationPending TO _delete_me_0",
true
),
new Migration(
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL",
true
)
};

View file

@ -9,6 +9,7 @@ import androidx.annotation.NonNull;
import java.util.ArrayList;
import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.util.Logger;
import io.github.sspanak.tt9.db.SlowQueryStats;
import io.github.sspanak.tt9.db.entities.WordList;
@ -274,11 +275,13 @@ public class ReadOps {
}
public int getNextInNormalizationQueue(@NonNull SQLiteDatabase db) {
return (int) CompiledQueryCache.simpleQueryForLong(
public NormalizationList getNextInNormalizationQueue(@NonNull SQLiteDatabase db) {
String res = CompiledQueryCache.simpleQueryForString(
db,
"SELECT langId FROM " + Tables.LANGUAGES_META + " WHERE normalizationPending = 1 LIMIT 1",
-1
"SELECT langId || ',' || positionsToNormalize FROM " + Tables.LANGUAGES_META + " WHERE positionsToNormalize IS NOT NULL LIMIT 1",
null
);
return new NormalizationList(res);
}
}

View file

@ -103,7 +103,7 @@ public class Tables {
private static String createLanguagesMeta() {
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
"langId INTEGER UNIQUE NOT NULL, " +
"normalizationPending INT2 NOT NULL DEFAULT 0," +
"positionsToNormalize TEXT NULL," +
"fileHash TEXT NOT NULL DEFAULT 0 " +
")";
}

View file

@ -5,6 +5,7 @@ import android.database.sqlite.SQLiteStatement;
import androidx.annotation.NonNull;
import io.github.sspanak.tt9.db.entities.NormalizationList;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Logger;
@ -37,25 +38,26 @@ public class UpdateOps {
}
public static void normalize(@NonNull SQLiteDatabase db, int langId) {
if (langId <= 0) {
public static void normalize(@NonNull SQLiteDatabase db, NormalizationList normalizationList) {
if (normalizationList.langId <= 0 || normalizationList.positions == null || normalizationList.positions.isEmpty()) {
return;
}
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.getWords(langId) + " SET frequency = frequency / ?");
query.bindLong(1, SettingsStore.WORD_FREQUENCY_NORMALIZATION_DIVIDER);
query.execute();
db.execSQL(
"UPDATE " + Tables.getWords(normalizationList.langId) +
" SET frequency = frequency / " + SettingsStore.WORD_FREQUENCY_NORMALIZATION_DIVIDER +
" WHERE position IN (" + normalizationList.positions + ")"
);
query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET normalizationPending = ? WHERE langId = ?");
query.bindLong(1, 0);
query.bindLong(2, langId);
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET positionsToNormalize = NULL WHERE langId = ?");
query.bindLong(1, normalizationList.langId);
query.execute();
}
public static void scheduleNormalization(@NonNull SQLiteDatabase db, @NonNull Language language) {
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET normalizationPending = ? WHERE langId = ?");
query.bindLong(1, 1);
public static void scheduleNormalization(@NonNull SQLiteDatabase db, @NonNull Language language, @NonNull String positions) {
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET positionsToNormalize = ? WHERE langId = ?");
query.bindString(1, positions);
query.bindLong(2, language.getId());
query.execute();
}

View file

@ -22,7 +22,7 @@ public class SettingsStore extends SettingsUI {
public final static int SUGGESTIONS_TRANSLATE_ANIMATION_DURATION = 0;
public final static int WORD_FREQUENCY_MAX = 25500;
public final static int WORD_FREQUENCY_NORMALIZATION_DIVIDER = 100; // normalized frequency = WORD_FREQUENCY_MAX / WORD_FREQUENCY_NORMALIZATION_DIVIDER
public final static int WORD_NORMALIZATION_DELAY = 120000; // ms
public final static int WORD_NORMALIZATION_DELAY = 15000; // ms
/************* hacks *************/
public final static int PREFERENCES_CLICK_DEBOUNCE_TIME = 250; // ms