1
0
Fork 0
* added Japanese (Hiragana, Katakana, Kanji)

* improved dictionary validation: it is now possible to have the same ideogram with two different transcriptions

* fixed frequency updating not working sometimes (in Chinese too)
This commit is contained in:
Dimo Karaivanov 2025-04-12 11:59:13 +03:00 committed by GitHub
parent efa1fb4d79
commit 0ec912f9c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 1603029 additions and 89 deletions

View file

@ -0,0 +1,49 @@
locale: ja-JP
dictionaryFile: ja-romaji.csv
name: 日本語 / ローマ字
hasABC: no
hasSpaceBetweenWords: no
hasUpperCase: no
layout:
- [SPECIAL] # 0
- [PUNCTUATION_ZH] # 1
- [a, b, c] # 2
- [d, e, f] # 3
- [g, h, i] # 4
- [j, k, l] # 5
- [m, n, o] # 6
- [p, q, r, s] # 7
- [t, u, v] # 8
- [w, x, y, z] # 9
filterBySounds: yes
sounds:
- [Qh,0] # Hiragana prefix. For identifying and loading only the specific characters in Hiragana mode.
- [Qk,1] # Katakana prefix. The prefixed ones will appear only in Katakana mode.
# All others will appear in Predictive mode
- [Ql,1] # ー
- [A,2]
- [B,2]
- [C,2]
- [D,3]
- [E,3]
- [F,3]
- [G,4]
- [H,4]
- [I,4]
- [J,5]
- [K,5]
- [L,5]
- [M,6]
- [N,6]
- [O,6]
- [P,7]
- [Q,7]
- [R,7]
- [S,7]
- [T,8]
- [U,8]
- [V,8]
- [W,9]
- [X,9]
- [Y,9]
- [Z,9]

File diff suppressed because it is too large Load diff

View file

@ -88,7 +88,7 @@ public class DataStore {
}
public static void getWords(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, int minWords, int maxWords) {
public static void getWords(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, boolean orderByLength, int minWords, int maxWords) {
if (getWordsTask != null && !getWordsTask.isDone()) {
dataHandler.accept(new ArrayList<>());
getWordsCancellationSignal.cancel();
@ -96,14 +96,14 @@ public class DataStore {
}
getWordsCancellationSignal = new CancellationSignal();
getWordsTask = executor.submit(() -> getWordsSync(dataHandler, language, sequence, onlyExactSequence, filter, minWords, maxWords));
getWordsTask = executor.submit(() -> getWordsSync(dataHandler, language, sequence, onlyExactSequence, filter, orderByLength, minWords, maxWords));
executor.submit(DataStore::setGetWordsTimeout);
}
private static void getWordsSync(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, int minWords, int maxWords) {
private static void getWordsSync(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, boolean orderByLength, int minWords, int maxWords) {
try {
ArrayList<String> data = words.getMany(getWordsCancellationSignal, language, sequence, onlyExactSequence, filter, minWords, maxWords);
ArrayList<String> data = words.getMany(getWordsCancellationSignal, language, sequence, onlyExactSequence, filter, orderByLength, minWords, maxWords);
asyncReturn.post(() -> dataHandler.accept(data));
} catch (Exception e) {
Logger.e(LOG_TAG, "Error fetching words: " + e.getMessage());

View file

@ -122,13 +122,13 @@ public class ReadOps {
@NonNull
public WordList getWords(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String positions, String filter, boolean fullOutput) {
public WordList getWords(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String positions, String filter, boolean orderByLength, boolean fullOutput) {
if (positions.isEmpty()) {
Logger.d(LOG_TAG, "No word positions. Not searching words.");
return new WordList();
}
String wordsQuery = getWordsQuery(language, positions, filter, fullOutput);
String wordsQuery = getWordsQuery(language, positions, filter, orderByLength, fullOutput);
if (wordsQuery.isEmpty() || (cancel != null && cancel.isCanceled())) {
return new WordList();
}
@ -281,7 +281,7 @@ public class ReadOps {
}
@NonNull private String getWordsQuery(@NonNull Language language, @NonNull String positions, @NonNull String filter, boolean fullOutput) {
@NonNull private String getWordsQuery(@NonNull Language language, @NonNull String positions, @NonNull String filter, boolean orderByLength, boolean fullOutput) {
StringBuilder sql = new StringBuilder();
sql
.append("SELECT word");
@ -296,7 +296,11 @@ public class ReadOps {
sql.append(" AND word LIKE '").append(filter.replaceAll("'", "''")).append("%'");
}
sql.append(" ORDER BY LENGTH(word), frequency DESC");
sql.append(" ORDER BY ");
if (orderByLength) {
sql.append("LENGTH(word), ");
}
sql.append("frequency DESC");
String wordsSql = sql.toString();
Logger.v(LOG_TAG, "Words SQL: " + wordsSql);

View file

@ -17,6 +17,7 @@ public class UpdateOps {
public static boolean changeFrequency(@NonNull SQLiteDatabase db, @NonNull Language language, Text wordFilter, int position, int frequency) {
boolean isFilterOn = wordFilter != null && !wordFilter.isEmpty();
String sql = "UPDATE " + Tables.getWords(language.getId()) + " SET frequency = ? WHERE position = ?";
if (wordFilter != null && !wordFilter.isEmpty()) {
@ -26,13 +27,17 @@ public class UpdateOps {
SQLiteStatement query = CompiledQueryCache.get(db, sql);
query.bindLong(1, frequency);
query.bindLong(2, position);
if (wordFilter != null && !wordFilter.isEmpty()) {
if (isFilterOn) {
query.bindString(3, wordFilter.capitalize());
query.bindString(4, wordFilter.toLowerCase());
query.bindString(5, wordFilter.toUpperCase());
}
Logger.v(LOG_TAG, "Change frequency SQL: " + query + "; (" + frequency + ", " + position + ", " + wordFilter + ")");
if (!isFilterOn) {
Logger.v(LOG_TAG, "Change frequency SQL: " + sql + "; (" + frequency + ", " + position + ")");
} else {
Logger.v(LOG_TAG, "Change frequency SQL: " + sql + "; (" + frequency + ", " + position + ", '" + wordFilter + "')");
}
return query.executeUpdateDelete() > 0;
}

View file

@ -62,7 +62,7 @@ public class WordStore extends BaseSyncStore {
* and other similar. When "onlyExactSequence" is TRUE, the word list is constrained only to
* the words with length equal to the digit sequence length (exact matches).
*/
public ArrayList<String> getMany(@NonNull CancellationSignal cancel, Language language, String sequence, boolean onlyExactSequence, String wordFilter, int minimumWords, int maximumWords) {
public ArrayList<String> getMany(@NonNull CancellationSignal cancel, Language language, String sequence, boolean onlyExactSequence, String wordFilter, boolean orderByLength, int minimumWords, int maximumWords) {
if (!checkOrNotify()) {
return new ArrayList<>();
}
@ -86,7 +86,7 @@ public class WordStore extends BaseSyncStore {
long positionsTime = Timer.stop("get_positions");
Timer.start("get_words");
ArrayList<String> words = readOps.getWords(sqlite.getDb(), cancel, language, positions, filter, false).toStringList();
ArrayList<String> words = readOps.getWords(sqlite.getDb(), cancel, language, positions, filter, orderByLength, false).toStringList();
long wordsTime = Timer.stop("get_words");
printLoadingSummary(sequence, words, positionsTime, wordsTime);
@ -176,7 +176,7 @@ public class WordStore extends BaseSyncStore {
Timer.start(LOG_TAG);
String topWordPositions = readOps.getWordPositions(sqlite.getDb(), null, language, sequence, 0, 0, Integer.MAX_VALUE, "");
WordList topWords = readOps.getWords(sqlite.getDb(), null, language, topWordPositions, "", true);
WordList topWords = readOps.getWords(sqlite.getDb(), null, language, topWordPositions, "", false, true);
if (topWords.isEmpty()) {
throw new Exception("No such word");
}

View file

@ -131,7 +131,11 @@ abstract public class MainViewHandler extends HotkeyHandler {
@NonNull
public String getInputModeName() {
if (InputModeKind.isPredictive(mInputMode)) {
if (InputModeKind.isHiragana(mInputMode)) {
return "";
} else if (InputModeKind.isKatakana(mInputMode)) {
return "";
} else if (InputModeKind.isPredictive(mInputMode)) {
return "T9";
} else if (InputModeKind.isNumeric(mInputMode)){
return "123";

View file

@ -21,6 +21,7 @@ import io.github.sspanak.tt9.ime.modes.InputMode;
import io.github.sspanak.tt9.ime.modes.InputModeKind;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageCollection;
import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.ui.UI;
import io.github.sspanak.tt9.util.Text;
@ -293,7 +294,9 @@ public abstract class TypingHandler extends KeyPadHandler {
}
allowedInputModes = new ArrayList<>(inputType.determineInputModes(getApplicationContext()));
if (!mLanguage.hasABC()) {
if (LanguageKind.isJapanese(mLanguage)) {
determineJapaneseInputModes();
} else if (!mLanguage.hasABC()) {
allowedInputModes.remove((Integer) InputMode.MODE_ABC);
} else if (!settings.getPredictiveMode()) {
allowedInputModes.remove((Integer) InputMode.MODE_PREDICTIVE);
@ -303,6 +306,19 @@ public abstract class TypingHandler extends KeyPadHandler {
}
/**
* Since Japanese is unique with its 3 alphabets, we need to setup the input modes separately.
*/
private void determineJapaneseInputModes() {
allowedInputModes.add(InputMode.MODE_HIRAGANA);
allowedInputModes.add(InputMode.MODE_KATAKANA);
allowedInputModes.remove((Integer) InputMode.MODE_ABC);
if (!settings.getPredictiveMode()) {
allowedInputModes.remove((Integer) InputMode.MODE_PREDICTIVE);
}
}
/**
* determineInputMode
* Same as determineInputModeId(), but returns an actual InputMode.

View file

@ -42,14 +42,18 @@ public class InputModeValidator {
if (allowedModes.contains(oldModeId)) {
newModeId = oldModeId;
} else if ((oldModeId == InputMode.MODE_HIRAGANA || oldModeId == InputMode.MODE_KATAKANA) && allowedModes.contains(InputMode.MODE_PREDICTIVE)) {
newModeId = InputMode.MODE_PREDICTIVE;
} else if (allowedModes.contains(InputMode.MODE_ABC)) {
newModeId = InputMode.MODE_ABC;
} else if (allowedModes.contains(InputMode.MODE_HIRAGANA)) {
newModeId = InputMode.MODE_HIRAGANA;
} else if (!allowedModes.isEmpty()) {
newModeId = allowedModes.get(0);
}
if (newModeId != oldModeId) {
Logger.d("validateMode", "Invalid input mode: " + oldModeId + " Enforcing: " + newModeId);
Logger.d("validateMode", "Invalid input mode: " + oldModeId + " Enforcing: " + newModeId + " from " + allowedModes);
}
return newModeId;

View file

@ -5,6 +5,8 @@ import android.text.InputType;
import android.view.inputmethod.EditorInfo;
import android.view.inputmethod.InputConnection;
import androidx.annotation.NonNull;
import java.util.HashSet;
import java.util.Set;
@ -134,7 +136,7 @@ abstract public class StandardInputType {
*
* @return Set<InputMode.MODE_PASSTHROUGH | InputMode.MODE_ABC | InputMode.MODE_123 | InputMode.MODE_PREDICTIVE>
*/
public Set<Integer> determineInputModes(Context context) {
public Set<Integer> determineInputModes(@NonNull Context context) {
Set<Integer> allowedModes = new HashSet<>();
if (field == null) {

View file

@ -20,6 +20,8 @@ abstract public class InputMode {
public static final int MODE_ABC = 1;
public static final int MODE_123 = 2;
public static final int MODE_PASSTHROUGH = 4;
public static final int MODE_HIRAGANA = 5;
public static final int MODE_KATAKANA = 6;
// text case
public static final int CASE_UNDEFINED = -1;
@ -52,8 +54,16 @@ abstract public class InputMode {
switch (mode) {
case MODE_PREDICTIVE:
if (LanguageKind.isChinese(language)) return new ModePinyin(settings, language, inputType, textField);
if (LanguageKind.isJapanese(language)) return new ModeKanji(settings, language, inputType, textField);
if (LanguageKind.isKorean(language)) return new ModeCheonjiin(settings, inputType, textField);
if (language != null && language.isTranscribed()) return new ModeIdeograms(settings, language, inputType, textField);
return new ModeWords(settings, language, inputType, textField);
case MODE_HIRAGANA:
if (LanguageKind.isJapanese(language)) return new ModeHiragana(settings, language, inputType, textField);
return new ModeABC(settings, language, inputType);
case MODE_KATAKANA:
if (LanguageKind.isJapanese(language)) return new ModeKatakana(settings, language, inputType, textField);
return new ModeABC(settings, language, inputType);
case MODE_ABC:
return new ModeABC(settings, language, inputType);
case MODE_PASSTHROUGH:

View file

@ -9,16 +9,28 @@ public class InputModeKind {
return mode != null && mode.getId() == InputMode.MODE_123;
}
public static boolean isNumeric(InputMode mode) {
return isPassthrough(mode) || is123(mode);
}
public static boolean isABC(InputMode mode) {
return mode != null && mode.getId() == InputMode.MODE_ABC;
}
public static boolean isHiragana(InputMode mode) {
return mode != null && mode.getId() == InputMode.MODE_HIRAGANA;
}
public static boolean isKatakana(InputMode mode) {
return mode != null && mode.getId() == InputMode.MODE_KATAKANA;
}
public static boolean isNumeric(InputMode mode) {
return isPassthrough(mode) || is123(mode);
}
public static boolean isPredictive(InputMode mode) {
return mode != null && mode.getId() == InputMode.MODE_PREDICTIVE;
return mode != null && (
mode.getId() == InputMode.MODE_PREDICTIVE ||
mode.getId() == InputMode.MODE_HIRAGANA ||
mode.getId() == InputMode.MODE_KATAKANA
);
}
public static boolean isCheonjiin(InputMode mode) {

View file

@ -23,7 +23,7 @@ import io.github.sspanak.tt9.util.chars.Characters;
class ModeCheonjiin extends InputMode {
// used when we want do display a different set of characters for a given key, for example
// in email fields
private final ArrayList<ArrayList<String>> KEY_CHARACTERS = new ArrayList<>();
protected final ArrayList<ArrayList<String>> KEY_CHARACTERS = new ArrayList<>();
// special chars and emojis
private static String SPECIAL_CHAR_SEQUENCE_PREFIX;

View file

@ -0,0 +1,25 @@
package io.github.sspanak.tt9.ime.modes;
import io.github.sspanak.tt9.hacks.InputType;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.ime.modes.predictions.KanaPredictions;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
public class ModeHiragana extends ModeKanji {
protected ModeHiragana(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
super(settings, lang, inputType, textField);
NAME = "ひらがな";
}
@Override
protected void initPredictions() {
predictions = new KanaPredictions(settings, textField, false);
predictions.setWordsChangedHandler(this::onPredictions);
}
@Override
public int getId() {
return MODE_HIRAGANA;
}
}

View file

@ -12,27 +12,23 @@ import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Logger;
import io.github.sspanak.tt9.util.Text;
import io.github.sspanak.tt9.util.TextTools;
import io.github.sspanak.tt9.util.chars.Characters;
public class ModeIdeograms extends ModeWords {
private static final String LOG_TAG = ModeIdeograms.class.getSimpleName();
protected String NAME;
private boolean isFiltering = false;
protected ModeIdeograms(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
super(settings, lang, inputType, textField);
NAME = super.toString();
}
@Override public void determineNextWordTextCase() {}
@Override protected String adjustSuggestionTextCase(String word, int newTextCase) { return word; }
@Override
protected void initPredictions() {
predictions = new IdeogramPredictions(settings, textField);
predictions.setWordsChangedHandler(this::onPredictions);
}
@Override public void determineNextWordTextCase() {}
@Override
@ -53,6 +49,24 @@ public class ModeIdeograms extends ModeWords {
}
@Override
protected void setCustomSpecialCharacters() {
KEY_CHARACTERS.add(applyPunctuationOrder(Characters.Special, 0));
int spaceIndex = KEY_CHARACTERS.get(0).indexOf(" ");
if (spaceIndex >= 0) {
KEY_CHARACTERS.get(0).set(spaceIndex, Characters.IDEOGRAPHIC_SPACE);
}
}
/******************************* LOAD SUGGESTIONS *********************************/
@Override
protected void initPredictions() {
predictions = new IdeogramPredictions(settings, textField);
predictions.setWordsChangedHandler(this::onPredictions);
}
@Override
protected void onPredictions() {
if (language.hasTranscriptionsEmbedded()) {
@ -72,6 +86,7 @@ public class ModeIdeograms extends ModeWords {
super.onPredictions();
}
/******************************* ACCEPT WORDS *********************************/
@Override
public void onAcceptSuggestion(@NonNull String currentWord, boolean preserveWords) {
@ -89,9 +104,7 @@ public class ModeIdeograms extends ModeWords {
}
try {
String latinWord = ((IdeogramPredictions) predictions).getTranscription(currentWord);
String digits = language.getDigitSequenceForWord(latinWord);
((IdeogramPredictions) predictions).onAcceptTranscription(currentWord, latinWord, digits);
((IdeogramPredictions) predictions).onAcceptIdeogram(currentWord);
} catch (Exception e) {
Logger.e(LOG_TAG, "Failed incrementing priority of word: '" + currentWord + "'. " + e.getMessage());
}
@ -106,12 +119,6 @@ public class ModeIdeograms extends ModeWords {
}
@Override public void onCursorMove(@NonNull String word) {
isFiltering = false;
super.onCursorMove(word);
}
@Override
public boolean shouldAcceptPreviousSuggestion(String s) {
return
@ -174,6 +181,19 @@ public class ModeIdeograms extends ModeWords {
return isFiltering;
}
/********************************* FILTERING *********************************/
@Override
public boolean clearWordStem() {
if (!supportsFiltering()) {
return false;
}
isFiltering = false;
stem = "";
return true;
}
@Override
public boolean setWordStem(String newStem, boolean fromScrolling) {
@ -191,18 +211,6 @@ public class ModeIdeograms extends ModeWords {
}
@Override
public boolean clearWordStem() {
if (!supportsFiltering()) {
return false;
}
isFiltering = false;
stem = "";
return true;
}
@Override
public boolean supportsFiltering() {
return language.hasTranscriptionsEmbedded();
@ -213,4 +221,18 @@ public class ModeIdeograms extends ModeWords {
public boolean isStemFilterFuzzy() {
return isFiltering;
}
@Override public void onCursorMove(@NonNull String word) {
isFiltering = false;
super.onCursorMove(word);
}
/********************************* NAME *********************************/
@NonNull
@Override
public String toString() {
return NAME;
}
}

View file

@ -0,0 +1,40 @@
package io.github.sspanak.tt9.ime.modes;
import androidx.annotation.Nullable;
import io.github.sspanak.tt9.hacks.InputType;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
public class ModeKanji extends ModePinyin {
protected ModeKanji(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
super(settings, lang, inputType, textField);
NAME = language.getName().replace(" / ローマ字", "");
}
@Override
public boolean changeLanguage(@Nullable Language newLanguage) {
if (LanguageKind.isJapanese(newLanguage)) {
setLanguage(newLanguage);
return true;
}
return false;
}
@Override
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) {
if (digitSequence.isEmpty()) {
return false;
}
String nextSequence = digitSequence + (char)(nextKey + '0');
if (nextSequence.endsWith(PUNCTUATION_SEQUENCE) && !predictions.noDbWords()) {
return false;
}
return super.shouldAcceptPreviousSuggestion(nextKey, hold);
}
}

View file

@ -0,0 +1,25 @@
package io.github.sspanak.tt9.ime.modes;
import io.github.sspanak.tt9.hacks.InputType;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.ime.modes.predictions.KanaPredictions;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
public class ModeKatakana extends ModeHiragana {
protected ModeKatakana(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
super(settings, lang, inputType, textField);
NAME = "カタカナ";
}
@Override
protected void initPredictions() {
predictions = new KanaPredictions(settings, textField, true);
predictions.setWordsChangedHandler(this::onPredictions);
}
@Override
public int getId() {
return MODE_KATAKANA;
}
}

View file

@ -20,7 +20,23 @@ public class ModePinyin extends ModeIdeograms {
@Override
public boolean changeLanguage(@Nullable Language newLanguage) {
return LanguageKind.isChinese(newLanguage) && super.changeLanguage(newLanguage);
if (LanguageKind.isChinese(newLanguage)) {
setLanguage(newLanguage);
return true;
}
return false;
}
@Override
protected String getPreferredChar() {
final String preferredChar = settings.getDoubleZeroChar();
return switch (preferredChar) {
case "." -> Characters.ZH_FULL_STOP;
case "," -> Characters.ZH_COMMA_LIST;
default -> preferredChar;
};
}
@ -52,15 +68,4 @@ public class ModePinyin extends ModeIdeograms {
return super.shouldAcceptPreviousSuggestion(nextKey, hold);
}
@Override
protected String getPreferredChar() {
final String preferredChar = settings.getDoubleZeroChar();
return switch (preferredChar) {
case "." -> Characters.ZH_FULL_STOP;
case "," -> Characters.ZH_COMMA_LIST;
default -> preferredChar;
};
}
}

View file

@ -9,6 +9,7 @@ import java.util.HashSet;
import io.github.sspanak.tt9.db.DataStore;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.TextTools;
@ -16,6 +17,7 @@ public class IdeogramPredictions extends WordPredictions {
private boolean isTranscriptionFilterAllowed = false;
private String lastTypedWord = "";
@NonNull protected ArrayList<String> transcriptions = new ArrayList<>();
@NonNull protected ArrayList<String> lastTranscriptions = new ArrayList<>();
public IdeogramPredictions(SettingsStore settings, TextField textField) {
@ -34,13 +36,16 @@ public class IdeogramPredictions extends WordPredictions {
@Override
public void load() {
transcriptions.clear();
if (digitSequence.isEmpty()) {
transcriptions.clear();
}
super.load();
}
@Override
protected void onDbWords(ArrayList<String> dbWords, boolean isRetryAllowed) {
lastTranscriptions = new ArrayList<>(transcriptions); // backup in case of auto-accept, so that we can still find previous transcriptions
transcriptions = onlyExactMatches ? reduceFuzzyMatches(dbWords, SettingsStore.SUGGESTIONS_MAX) : dbWords;
words = new ArrayList<>(transcriptions);
areThereDbWords = !words.isEmpty();
@ -48,7 +53,9 @@ public class IdeogramPredictions extends WordPredictions {
}
public void onAcceptTranscription(String word, String transcription, String sequence) {
public void onAcceptIdeogram(String word) throws InvalidLanguageCharactersException {
String transcription = getTranscription(word);
String sequence = language.getDigitSequenceForWord(transcription);
super.onAccept(transcription + word, sequence);
}
@ -56,13 +63,24 @@ public class IdeogramPredictions extends WordPredictions {
@Override
@NonNull
protected String getPenultimateWord(@NonNull String currentWord) {
int currentWordLength = currentWord.length();
int lastWordLength = lastTypedWord.length();
int requiredTextLength = currentWordLength + lastWordLength;
String text = textField.getStringBeforeCursor(requiredTextLength);
// Logger.d("LOG_TAG", "====+> previous string: " + text);
final int lastWordLength = lastTypedWord.length();
if (lastWordLength == 0) {
return "";
}
return lastWordLength < text.length() ? text.substring(0, lastWordLength) : "";
final int currentWordLength = currentWord.length();
final int requiredTextLength = currentWordLength + lastWordLength;
String text = textField.getStringBeforeCursor(requiredTextLength);
final int textLength = text.length();
if (textLength == 0) {
return "";
}
if (text.endsWith(currentWord) && textLength > currentWordLength) {
text = text.substring(0, textLength - currentWordLength);
}
return text.contains(lastTypedWord) ? lastTypedWord : "";
}
@ -149,11 +167,7 @@ public class IdeogramPredictions extends WordPredictions {
HashSet<String> uniqueTranscriptions = new HashSet<>();
for (int i = 0; i < transcriptions.size(); i++) {
String transcription = transcriptions.get(i);
int firstNative = TextTools.lastIndexOfLatin(transcription) + 1;
uniqueTranscriptions.add(
firstNative < 1 || firstNative >= transcription.length() ? transcription : transcription.substring(0, firstNative)
);
uniqueTranscriptions.add(stripNativeWord(transcriptions.get((i))));
}
words.clear();
@ -162,6 +176,15 @@ public class IdeogramPredictions extends WordPredictions {
}
/**
* Does the actual stripping of the native word from the transcription for stripNativeWords().
*/
protected String stripNativeWord(@NonNull String dbTranscription) {
int firstNative = TextTools.lastIndexOfLatin(dbTranscription) + 1;
return firstNative < 1 || firstNative >= dbTranscription.length() ? dbTranscription : dbTranscription.substring(0, firstNative);
}
/**
* Removes the Latin transcriptions from native words. Directly modifies the words list, but the
* original is preserved in this.transcriptions.
@ -179,11 +202,19 @@ public class IdeogramPredictions extends WordPredictions {
/**
* Similar to "stripNativeWords()", but finds and returns the transcription of the given word.
* Returns an empty string if the word is not in the current suggestion list.
* In case of an auto-accept, the `transcriptions` would be empty, so we check the `lastTranscriptions`.
* If no transcription is found, an empty string is returned.
*/
@NonNull
public String getTranscription(@NonNull String word) {
for (String w : transcriptions) {
String transcription = getTranscription(word, transcriptions);
return transcription.isEmpty() ? getTranscription(word, lastTranscriptions) : transcription;
}
@NonNull
private String getTranscription(@NonNull String word, @NonNull ArrayList<String> transcriptionList) {
for (String w : transcriptionList) {
if (w.endsWith(word)) {
return w.replace(word, "");
}

View file

@ -0,0 +1,54 @@
package io.github.sspanak.tt9.ime.modes.predictions;
import androidx.annotation.NonNull;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
public class KanaPredictions extends IdeogramPredictions {
private final char SEQUENCE_PREFIX;
@NonNull private final String STEM_PREFIX;
private final int STEM_PREFIX_LENGTH;
public KanaPredictions(SettingsStore settings, TextField textField, boolean isKatakana) {
super(settings, textField);
SEQUENCE_PREFIX = isKatakana ? '1' : '0';
STEM_PREFIX = isKatakana ? "Qk" : "Qh";
STEM_PREFIX_LENGTH = STEM_PREFIX.length();
// Avoid incorrect order of words like "" and "じゃ". They have different char lengths,
// but share the same sequence, "52", so we must consider them equivalent.
orderWordsByLength = false;
}
@Override
public Predictions setDigitSequence(@NonNull String newSequence) {
super.setDigitSequence(newSequence);
digitSequence = digitSequence.isEmpty() ? digitSequence : SEQUENCE_PREFIX + digitSequence;
return this;
}
@Override
public WordPredictions setStem(String stem) {
return super.setStem(STEM_PREFIX + stem);
}
@Override
protected String stripNativeWord(@NonNull String dbTranscription) {
return stripStemPrefix(super.stripNativeWord(dbTranscription));
}
@NonNull
private String stripStemPrefix(@NonNull String transcription) {
return transcription.length() > STEM_PREFIX_LENGTH ? transcription.substring(STEM_PREFIX_LENGTH) : transcription;
}
@Override
public void onAcceptIdeogram(String word) throws InvalidLanguageCharactersException {
String transcription = getTranscription(word);
String sequence = SEQUENCE_PREFIX + language.getDigitSequenceForWord(stripStemPrefix(transcription));
super.onAccept(transcription + word, sequence);
}
}

View file

@ -15,6 +15,7 @@ abstract public class Predictions {
// settings
@NonNull protected String digitSequence = "";
@NonNull protected Language language = new NullLanguage();
protected boolean orderWordsByLength = true;
protected int minWords = SettingsStore.SUGGESTIONS_MIN;
protected final int maxWords = SettingsStore.SUGGESTIONS_MAX;
protected boolean onlyExactMatches = false;
@ -111,6 +112,7 @@ abstract public class Predictions {
digitSequence,
onlyExactMatches,
stem,
orderWordsByLength,
minWords,
maxWords
);

View file

@ -81,6 +81,7 @@ public class WordPredictions extends Predictions {
digitSequence.substring(1),
onlyExactMatches,
stem.length() > 1 ? stem.substring(1) : "",
orderWordsByLength,
minWords,
maxWords
);

View file

@ -5,6 +5,7 @@ import java.util.Locale;
public class LanguageKind {
public static final int KOREAN = 601579;
public static boolean isCJK(Language language) { return isChinese(language) || isJapanese(language) || isKorean(language); }
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }
public static boolean isRTL(Language language) { return isArabic(language) || isFarsi(language) || isHebrew(language); }
@ -14,6 +15,7 @@ public class LanguageKind {
public static boolean isChinese(Language language) { return language != null && language.getId() == 462106; }
public static boolean isFarsi(Language language) { return language != null && language.getId() == 599078; }
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
public static boolean isJapanese(Language language) { return language != null && language.getId() == 534570; }
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
public static boolean isGujarati(Language language) { return language != null && language.getId() == 468647; }
public static boolean isHebrew(Language language) { return language != null && (language.getId() == 305450 || language.getId() == 403177); }

View file

@ -10,8 +10,10 @@ abstract class TranscribedLanguage extends Language implements Comparable<Transc
return false;
}
return (LanguageKind.isKorean(this) && TextTools.isHangul(word))
|| (LanguageKind.isChinese(this) && TextTools.isChinese(word));
return
(LanguageKind.isKorean(this) && TextTools.isHangul(word))
|| (LanguageKind.isChinese(this) && TextTools.isChinese(word))
|| (LanguageKind.isJapanese(this) && TextTools.isJapanese(word));
}

View file

@ -14,7 +14,9 @@ class Validators {
private static final ArrayList<Integer> validInputModes = new ArrayList<>(Arrays.asList(
InputMode.MODE_123,
InputMode.MODE_PREDICTIVE,
InputMode.MODE_ABC
InputMode.MODE_ABC,
InputMode.MODE_HIRAGANA,
InputMode.MODE_KATAKANA
));
private static final ArrayList<Integer> validTextCases = new ArrayList<>(Arrays.asList(

View file

@ -60,7 +60,9 @@ public class SoftKeyPunctuation extends SoftKey {
if (tt9.isInputModePhone()) return "*";
if (tt9.isInputModeNumeric()) return ",";
if (LanguageKind.isChinese(tt9.getLanguage())) return Characters.ZH_EXCLAMATION_MARK;
if (LanguageKind.isChinese(tt9.getLanguage()) || LanguageKind.isJapanese(tt9.getLanguage())) {
return Characters.ZH_EXCLAMATION_MARK;
}
return "!";
}
@ -72,7 +74,9 @@ public class SoftKeyPunctuation extends SoftKey {
if (LanguageKind.isArabic(tt9.getLanguage())) return Characters.AR_QUESTION_MARK;
if (LanguageKind.isGreek(tt9.getLanguage())) return Characters.GR_QUESTION_MARK;
if (LanguageKind.isChinese(tt9.getLanguage())) return Characters.ZH_QUESTION_MARK;
if (LanguageKind.isChinese(tt9.getLanguage()) || LanguageKind.isJapanese(tt9.getLanguage())) {
return Characters.ZH_QUESTION_MARK;
}
return "?";
}

View file

@ -245,7 +245,7 @@ public class Text extends TextTools {
return "";
}
if (!LanguageKind.isChinese(language)) {
if (!LanguageKind.isCJK(language)) {
return text.substring(start, end);
}

View file

@ -15,6 +15,7 @@ public class TextTools {
private static final Pattern COMBINING_STRING = Pattern.compile("^\\p{M}+$");
private static final Pattern NEXT_IS_PUNCTUATION = Pattern.compile("^\\p{Punct}");
private static final Pattern IS_CHINESE = Pattern.compile("\\p{script=Han}+");
private static final Pattern IS_JAPANESE = Pattern.compile("\\p{script=Hiragana}+|\\p{script=Katakana}+|\\p{script=Han}+");
private static final Pattern IS_HANGUL = Pattern.compile("[\u1100-\u11FF\u302E-\u302F\u3131-\u318F\u3200-\u321F\u3260-\u327E\uA960-\uA97F\uAC00-\uD7FB\uFFA0-\uFFDF]+");
private static final Pattern NEXT_TO_WORD = Pattern.compile("\\b$");
private static final Pattern PREVIOUS_IS_LETTER = Pattern.compile("[\\p{L}\\p{M}]$");
@ -56,6 +57,11 @@ public class TextTools {
}
public static boolean isJapanese(String str) {
return str != null && IS_JAPANESE.matcher(str).find();
}
public static int indexOfIgnoreCase(List<String> list, String str) {
for (int i = 0, size = list != null && str != null ? list.size() : 0; i < size; i++) {
if (list.get(i).equalsIgnoreCase(str)) {

View file

@ -7,6 +7,7 @@ import io.github.sspanak.tt9.languages.Language;
public class Characters extends Emoji {
public static final String COMBINING_ZERO_BASE = "";
public static final String IDEOGRAPHIC_SPACE = " ";
final public static ArrayList<String> Currency = new ArrayList<>(Arrays.asList(
"$", "", "", "¢", "¤", "", "¥", "£"

View file

@ -48,7 +48,7 @@ class Punctuation {
));
final public static ArrayList<String> PunctuationChinese = new ArrayList<>(Arrays.asList(
"", ZH_COMMA_LIST, ZH_FULL_STOP, "", "", "", "", ".", "", "", "", "", "", "", "", "", "", "'", "", "", "", "", ZH_EXCLAMATION_MARK, ZH_QUESTION_MARK
"", ZH_COMMA_LIST, ZH_FULL_STOP, "", "", "", "", "", ".", "", "", "", "", "", "", "", "", "", "'", "", "", "", "", ZH_EXCLAMATION_MARK, ZH_QUESTION_MARK
));
final public static ArrayList<String> PunctuationEnglish = new ArrayList<>(Arrays.asList(

View file

@ -243,12 +243,13 @@ def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, Str
errorMsg += wordErrors
}
if (uniqueWords.contains(word)) {
final uniqueWord = transcription + word
if (uniqueWords.contains(uniqueWord)) {
lineHasErrors = true
errorCount++
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}${!transcription.isEmpty() ? ' [' + transcription + ']' : ''}' on line ${lineNumber}. Remove all duplicates.\n"
} else {
uniqueWords.add(word)
uniqueWords.add(uniqueWord)
}
if (lineHasErrors) {
@ -295,6 +296,10 @@ static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
allChars += '\u200C'
}
if (line.contains("PUNCTUATION") && languageName.contains("Japanese")) {
allChars += ''
}
return DEFAULT + allChars
}

View file

@ -0,0 +1,25 @@
Japanese wordlists by: EDICT Project
Source: https://www.edrdg.org
Dictionaries used: JMDICT, ENAMDICT
Version: 2025-04-01
License: https://www.edrdg.org/edrdg/licence.html (Creative Commons Attribution-ShareAlike Licence V4.0)
Verb conjugations generated using: Japanese Verb Conjugator V2
Source: https://pypi.org/project/japanese-verb-conjugator-v2/
Version: 2025-01-13
Verb conjugations converted to Hiragana using: WanaKana-py
Source: https://github.com/Starwort/wanakana-py
Version: fa43884 (2019-07-13)
Japanese frequency list by: Wortschatz Leipzig @ Uni Leipzig
Source: https://wortschatz.uni-leipzig.de/en/download/
Version: 2025-04-04
License: CC-BY
Reference:
> D. Goldhahn, T. Eckart & U. Quasthoff: Building Large Monolingual Dictionaries at the Leipzig Corpora Collection: From 100 to 200 Languages.
> In: Proceedings of the 8th International Language Resources and Evaluation (LREC'12), 2012
> http://www.lrec-conf.org/proceedings/lrec2012/pdf/327_Paper.pdf
Additional remarks:
Hiragana and Katakana for the respective modes were added manually. All words converted to Romaji manually.

BIN
downloads/ja-romaji.zip Normal file

Binary file not shown.