simplified normalization algorithm: now only the sequnce over the top is normalized, not the entire language
This commit is contained in:
parent
f98754cf2d
commit
17b8a30f44
8 changed files with 81 additions and 24 deletions
|
|
@ -6,6 +6,7 @@ import androidx.annotation.NonNull;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import io.github.sspanak.tt9.db.entities.NormalizationList;
|
||||||
import io.github.sspanak.tt9.db.entities.Word;
|
import io.github.sspanak.tt9.db.entities.Word;
|
||||||
import io.github.sspanak.tt9.db.entities.WordList;
|
import io.github.sspanak.tt9.db.entities.WordList;
|
||||||
import io.github.sspanak.tt9.db.sqlite.DeleteOps;
|
import io.github.sspanak.tt9.db.sqlite.DeleteOps;
|
||||||
|
|
@ -224,7 +225,7 @@ public class WordStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newTopFrequency > SettingsStore.WORD_FREQUENCY_MAX) {
|
if (newTopFrequency > SettingsStore.WORD_FREQUENCY_MAX) {
|
||||||
scheduleNormalization(language);
|
scheduleNormalization(language, topWordPositions);
|
||||||
}
|
}
|
||||||
|
|
||||||
Logger.d(LOG_TAG, "Changed frequency of '" + word + "' to: " + newTopFrequency + ". Time: " + Timer.stop(LOG_TAG) + " ms");
|
Logger.d(LOG_TAG, "Changed frequency of '" + word + "' to: " + newTopFrequency + ". Time: " + Timer.stop(LOG_TAG) + " ms");
|
||||||
|
|
@ -243,11 +244,11 @@ public class WordStore {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
sqlite.beginTransaction();
|
sqlite.beginTransaction();
|
||||||
int nextLangId = readOps.getNextInNormalizationQueue(sqlite.getDb());
|
NormalizationList normalizationList = readOps.getNextInNormalizationQueue(sqlite.getDb());
|
||||||
UpdateOps.normalize(sqlite.getDb(), nextLangId);
|
UpdateOps.normalize(sqlite.getDb(), normalizationList);
|
||||||
sqlite.finishTransaction();
|
sqlite.finishTransaction();
|
||||||
|
|
||||||
String message = nextLangId > 0 ? "Normalized language: " + nextLangId : "No languages to normalize";
|
String message = normalizationList.langId > 0 ? "Normalized language: " + normalizationList.langId + ", positions: " + normalizationList.positions : "No languages to normalize";
|
||||||
Logger.d(LOG_TAG, message + ". Time: " + Timer.stop(LOG_TAG) + " ms");
|
Logger.d(LOG_TAG, message + ". Time: " + Timer.stop(LOG_TAG) + " ms");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
sqlite.failTransaction();
|
sqlite.failTransaction();
|
||||||
|
|
@ -256,9 +257,9 @@ public class WordStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void scheduleNormalization(Language language) {
|
public void scheduleNormalization(Language language, String positions) {
|
||||||
if (language != null && !(language instanceof NullLanguage) && checkOrNotify()) {
|
if (language != null && !(language instanceof NullLanguage) && positions != null && !positions.isEmpty() && checkOrNotify()) {
|
||||||
UpdateOps.scheduleNormalization(sqlite.getDb(), language);
|
UpdateOps.scheduleNormalization(sqlite.getDb(), language, positions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
package io.github.sspanak.tt9.db.entities;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class NormalizationList {
|
||||||
|
private final static Pattern validPositions = Pattern.compile("^[0-9,]+\\d|\\d+$");
|
||||||
|
|
||||||
|
public int langId = -1;
|
||||||
|
public String positions = null;
|
||||||
|
|
||||||
|
public NormalizationList(String rawNormalizationResponse) {
|
||||||
|
if (rawNormalizationResponse == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] parts = rawNormalizationResponse.split(",", 2);
|
||||||
|
if (arePartsValid(parts)) {
|
||||||
|
langId = Integer.parseInt(parts[0]);
|
||||||
|
positions = parts[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean arePartsValid(String[] parts) {
|
||||||
|
if (parts.length != 2 || !validPositions.matcher(parts[1]).matches()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Integer.parseInt(parts[0]);
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -60,4 +60,12 @@ class CompiledQueryCache {
|
||||||
static long simpleQueryForLong(SQLiteDatabase db, String sql, long defaultValue) {
|
static long simpleQueryForLong(SQLiteDatabase db, String sql, long defaultValue) {
|
||||||
return getInstance(db).simpleQueryForLong(sql, defaultValue);
|
return getInstance(db).simpleQueryForLong(sql, defaultValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static String simpleQueryForString(SQLiteDatabase db, String sql, String defaultValue) {
|
||||||
|
try {
|
||||||
|
return get(db, sql).simpleQueryForString();
|
||||||
|
} catch (SQLiteDoneException e) {
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,14 @@ class Migration {
|
||||||
new Migration(
|
new Migration(
|
||||||
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN fileHash TEXT NOT NULL DEFAULT 0",
|
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN fileHash TEXT NOT NULL DEFAULT 0",
|
||||||
true
|
true
|
||||||
|
),
|
||||||
|
new Migration(
|
||||||
|
"ALTER TABLE " + Tables.LANGUAGES_META + " RENAME COLUMN normalizationPending TO _delete_me_0",
|
||||||
|
true
|
||||||
|
),
|
||||||
|
new Migration(
|
||||||
|
"ALTER TABLE " + Tables.LANGUAGES_META + " ADD COLUMN positionsToNormalize TEXT NULL",
|
||||||
|
true
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import androidx.annotation.NonNull;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import io.github.sspanak.tt9.db.entities.NormalizationList;
|
||||||
import io.github.sspanak.tt9.util.Logger;
|
import io.github.sspanak.tt9.util.Logger;
|
||||||
import io.github.sspanak.tt9.db.SlowQueryStats;
|
import io.github.sspanak.tt9.db.SlowQueryStats;
|
||||||
import io.github.sspanak.tt9.db.entities.WordList;
|
import io.github.sspanak.tt9.db.entities.WordList;
|
||||||
|
|
@ -274,11 +275,13 @@ public class ReadOps {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int getNextInNormalizationQueue(@NonNull SQLiteDatabase db) {
|
public NormalizationList getNextInNormalizationQueue(@NonNull SQLiteDatabase db) {
|
||||||
return (int) CompiledQueryCache.simpleQueryForLong(
|
String res = CompiledQueryCache.simpleQueryForString(
|
||||||
db,
|
db,
|
||||||
"SELECT langId FROM " + Tables.LANGUAGES_META + " WHERE normalizationPending = 1 LIMIT 1",
|
"SELECT langId || ',' || positionsToNormalize FROM " + Tables.LANGUAGES_META + " WHERE positionsToNormalize IS NOT NULL LIMIT 1",
|
||||||
-1
|
null
|
||||||
);
|
);
|
||||||
|
|
||||||
|
return new NormalizationList(res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,7 @@ public class Tables {
|
||||||
private static String createLanguagesMeta() {
|
private static String createLanguagesMeta() {
|
||||||
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
|
return "CREATE TABLE IF NOT EXISTS " + LANGUAGES_META + " (" +
|
||||||
"langId INTEGER UNIQUE NOT NULL, " +
|
"langId INTEGER UNIQUE NOT NULL, " +
|
||||||
"normalizationPending INT2 NOT NULL DEFAULT 0," +
|
"positionsToNormalize TEXT NULL," +
|
||||||
"fileHash TEXT NOT NULL DEFAULT 0 " +
|
"fileHash TEXT NOT NULL DEFAULT 0 " +
|
||||||
")";
|
")";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import android.database.sqlite.SQLiteStatement;
|
||||||
|
|
||||||
import androidx.annotation.NonNull;
|
import androidx.annotation.NonNull;
|
||||||
|
|
||||||
|
import io.github.sspanak.tt9.db.entities.NormalizationList;
|
||||||
import io.github.sspanak.tt9.languages.Language;
|
import io.github.sspanak.tt9.languages.Language;
|
||||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||||
import io.github.sspanak.tt9.util.Logger;
|
import io.github.sspanak.tt9.util.Logger;
|
||||||
|
|
@ -37,25 +38,26 @@ public class UpdateOps {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static void normalize(@NonNull SQLiteDatabase db, int langId) {
|
public static void normalize(@NonNull SQLiteDatabase db, NormalizationList normalizationList) {
|
||||||
if (langId <= 0) {
|
if (normalizationList.langId <= 0 || normalizationList.positions == null || normalizationList.positions.isEmpty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.getWords(langId) + " SET frequency = frequency / ?");
|
db.execSQL(
|
||||||
query.bindLong(1, SettingsStore.WORD_FREQUENCY_NORMALIZATION_DIVIDER);
|
"UPDATE " + Tables.getWords(normalizationList.langId) +
|
||||||
query.execute();
|
" SET frequency = frequency / " + SettingsStore.WORD_FREQUENCY_NORMALIZATION_DIVIDER +
|
||||||
|
" WHERE position IN (" + normalizationList.positions + ")"
|
||||||
|
);
|
||||||
|
|
||||||
query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET normalizationPending = ? WHERE langId = ?");
|
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET positionsToNormalize = NULL WHERE langId = ?");
|
||||||
query.bindLong(1, 0);
|
query.bindLong(1, normalizationList.langId);
|
||||||
query.bindLong(2, langId);
|
|
||||||
query.execute();
|
query.execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static void scheduleNormalization(@NonNull SQLiteDatabase db, @NonNull Language language) {
|
public static void scheduleNormalization(@NonNull SQLiteDatabase db, @NonNull Language language, @NonNull String positions) {
|
||||||
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET normalizationPending = ? WHERE langId = ?");
|
SQLiteStatement query = CompiledQueryCache.get(db, "UPDATE " + Tables.LANGUAGES_META + " SET positionsToNormalize = ? WHERE langId = ?");
|
||||||
query.bindLong(1, 1);
|
query.bindString(1, positions);
|
||||||
query.bindLong(2, language.getId());
|
query.bindLong(2, language.getId());
|
||||||
query.execute();
|
query.execute();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ public class SettingsStore extends SettingsUI {
|
||||||
public final static int SUGGESTIONS_TRANSLATE_ANIMATION_DURATION = 0;
|
public final static int SUGGESTIONS_TRANSLATE_ANIMATION_DURATION = 0;
|
||||||
public final static int WORD_FREQUENCY_MAX = 25500;
|
public final static int WORD_FREQUENCY_MAX = 25500;
|
||||||
public final static int WORD_FREQUENCY_NORMALIZATION_DIVIDER = 100; // normalized frequency = WORD_FREQUENCY_MAX / WORD_FREQUENCY_NORMALIZATION_DIVIDER
|
public final static int WORD_FREQUENCY_NORMALIZATION_DIVIDER = 100; // normalized frequency = WORD_FREQUENCY_MAX / WORD_FREQUENCY_NORMALIZATION_DIVIDER
|
||||||
public final static int WORD_NORMALIZATION_DELAY = 120000; // ms
|
public final static int WORD_NORMALIZATION_DELAY = 15000; // ms
|
||||||
|
|
||||||
/************* hacks *************/
|
/************* hacks *************/
|
||||||
public final static int PREFERENCES_CLICK_DEBOUNCE_TIME = 250; // ms
|
public final static int PREFERENCES_CLICK_DEBOUNCE_TIME = 250; // ms
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue