1
0
Fork 0

Chinese Pinyin

This commit is contained in:
sspanak 2025-03-07 12:50:50 +02:00 committed by Dimo Karaivanov
parent 51cd39fe27
commit c57877ce9a
46 changed files with 497243 additions and 151 deletions

View file

@ -8,13 +8,13 @@ ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionar
int errorCount = 0
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
def (_, sounds, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
def (_, sounds, prependSoundsToWords, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
errorCount += langFileErrorCount
if (!langFileErrorMsg.isEmpty()) {
return langFileErrorMsg
}
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, prependSoundsToWords, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
errorCount += conversionErrorCount
if (!conversionErrorMessages.isEmpty()) {
return conversionErrorMessages
@ -31,12 +31,12 @@ ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionar
// this cannot be static, because DictionaryTools will not be visible
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean prependSoundsToWords, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
return [0, ""]
}
final LATIN_ONLY_WORD = "^[A-Za-z]+\$"
int errorCount = 0
String errorMsg = ''
@ -63,7 +63,7 @@ def convertDictionary(File definition, File csvDictionary, String dictionariesOu
outputDictionary.put(digitSequence, new ArrayList<>())
}
// prefix the frequency to sort the words later
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${prependSoundsToWords && !(word =~ LATIN_ONLY_WORD) ? transcription : ''}${word}")
wordCount++
}
}

View file

@ -0,0 +1,45 @@
locale: zh-CN
dictionaryFile: zh-pinyin.csv
name: 中文 / 简体
hasABC: no
hasSpaceBetweenWords: no
hasUpperCase: no
layout:
- [SPECIAL] # 0
- [PUNCTUATION_ZH] # 1
- [a, b, c] # 2
- [d, e, f] # 3
- [g, h, i] # 4
- [j, k, l] # 5
- [m, n, o] # 6
- [p, q, r, s] # 7
- [t, u, v] # 8
- [w, x, y, z] # 9
filterBySounds: yes
sounds:
- [A,2]
- [B,2]
- [C,2]
- [D,3]
- [E,3]
- [F,3]
- [G,4]
- [H,4]
- [I,4]
- [J,5]
- [K,5]
- [L,5]
- [M,6]
- [N,6]
- [O,6]
- [P,7]
- [Q,7]
- [R,7]
- [S,7]
- [T,8]
- [U,8]
- [V,8]
- [W,9]
- [X,9]
- [Y,9]
- [Z,9]

View file

@ -1,6 +1,7 @@
locale: ko-KR
currency:
dictionaryFile: ko-utf8.csv
hasABC: no
hasUpperCase: no
layout: # only used for the virtual key labels
- [ㅇ,ㅁ,SPECIAL] # 0

File diff suppressed because it is too large Load diff

View file

@ -88,7 +88,7 @@ public class DataStore {
}
public static void getWords(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, String filter, int minWords, int maxWords) {
public static void getWords(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, int minWords, int maxWords) {
if (getWordsTask != null && !getWordsTask.isDone()) {
dataHandler.accept(new ArrayList<>());
getWordsCancellationSignal.cancel();
@ -96,14 +96,14 @@ public class DataStore {
}
getWordsCancellationSignal = new CancellationSignal();
getWordsTask = executor.submit(() -> getWordsSync(dataHandler, language, sequence, filter, minWords, maxWords));
getWordsTask = executor.submit(() -> getWordsSync(dataHandler, language, sequence, onlyExactSequence, filter, minWords, maxWords));
executor.submit(DataStore::setGetWordsTimeout);
}
private static void getWordsSync(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, String filter, int minWords, int maxWords) {
private static void getWordsSync(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, int minWords, int maxWords) {
try {
ArrayList<String> data = words.getMany(getWordsCancellationSignal, language, sequence, filter, minWords, maxWords);
ArrayList<String> data = words.getMany(getWordsCancellationSignal, language, sequence, onlyExactSequence, filter, minWords, maxWords);
asyncReturn.post(() -> dataHandler.accept(data));
} catch (Exception e) {
Logger.e(LOG_TAG, "Error fetching words: " + e.getMessage());

View file

@ -134,7 +134,7 @@ public class CustomWordsImporter extends AbstractFileProcessor {
return false;
}
if (customWord.language == null || customWord.language.isSyllabary() || readOps.exists(sqlite.getDb(), customWord.language, customWord.word)) {
if (customWord.language == null || customWord.language.isTranscribed() || readOps.exists(sqlite.getDb(), customWord.language, customWord.word)) {
ignoredWords++;
} else {
InsertOps.insertCustomWord(sqlite.getDb(), customWord.language, customWord.sequence, customWord.word);

View file

@ -42,7 +42,7 @@ public class WordFile extends AssetFile {
public WordFile(@NonNull Context context, Language language, AssetManager assets) {
super(assets, language != null ? language.getDictionaryFile() : "");
this.context = context;
hasSyllables = language != null && language.isSyllabary();
hasSyllables = language != null && language.isTranscribed();
lastCharCode = 0;
reader = null;

View file

@ -18,7 +18,6 @@ import io.github.sspanak.tt9.db.entities.WordList;
import io.github.sspanak.tt9.db.entities.WordPositionsStringBuilder;
import io.github.sspanak.tt9.db.wordPairs.WordPair;
import io.github.sspanak.tt9.db.words.SlowQueryStats;
import io.github.sspanak.tt9.db.words.WordStore;
import io.github.sspanak.tt9.languages.EmojiLanguage;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
@ -131,9 +130,6 @@ public class ReadOps {
return new WordList();
}
// EXACT_MATCHES concerns only the positions query
filter = filter.equals(WordStore.FILTER_EXACT_MATCHES_ONLY) ? "" : filter;
String wordsQuery = getWordsQuery(language, positions, filter, maximumWords, fullOutput);
if (wordsQuery.isEmpty() || (cancel != null && cancel.isCanceled())) {
return new WordList();
@ -157,10 +153,10 @@ public class ReadOps {
}
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, String wordFilter, int minPositions) {
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, boolean onlyExactSequenceMatches, String wordFilter, int minPositions) {
int generations;
if (wordFilter.equals(WordStore.FILTER_EXACT_MATCHES_ONLY)) {
if (onlyExactSequenceMatches) {
generations = 0;
} else {
generations = switch (sequence.length()) {
@ -176,7 +172,7 @@ public class ReadOps {
@NonNull
public String getWordPositions(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, int generations, int minPositions, String wordFilter) {
if (sequence.length() == 1 || (cancel != null && cancel.isCanceled())) {
if ((sequence.length() == 1 && !language.isTranscribed()) || (cancel != null && cancel.isCanceled())) {
return sequence;
}
@ -301,9 +297,13 @@ public class ReadOps {
sql.append(" AND word LIKE '").append(filter.replaceAll("'", "''")).append("%'");
}
sql
.append(" ORDER BY LENGTH(word), frequency DESC")
.append(" LIMIT ").append(maxWords);
sql.append(" ORDER BY LENGTH(word), frequency DESC");
if (maxWords < 0 && maxWordsPerSequence.containsKey(language)) {
Integer limit = maxWordsPerSequence.get(language);
maxWords = limit != null ? limit : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
}
sql.append(" LIMIT ").append(maxWords);
String wordsSql = sql.toString();
Logger.v(LOG_TAG, "Words SQL: " + wordsSql);

View file

@ -24,13 +24,16 @@ public class WordPair {
boolean isInvalid() {
Text w1 = new Text(word1);
Text w2 = new Text(word2);
return
language == null
|| word1.isEmpty() || word2.isEmpty()
|| (word1.length() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && word2.length() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|| word1.equals(word2)
|| sequence2 == null || word2.length() != sequence2.length() || !(new Text(sequence2).isNumeric())
|| !(new Text(word1).isAlphabetic()) || !(new Text(word2).isAlphabetic());
|| sequence2 == null || !(new Text(sequence2).isNumeric())
|| (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|| !w1.isAlphabetic() || !w2.isAlphabetic();
}

View file

@ -137,11 +137,6 @@ public class WordPairStore extends BaseSyncStore {
int totalPairs = 0;
for (Language language : languages) {
if (language.isSyllabary()) {
Logger.d(LOG_TAG, "Not loading word pairs for syllabary language: " + language.getId());
continue;
}
HashMap<WordPair, WordPair> wordPairs = pairs.get(language.getId());
if (wordPairs == null) {
wordPairs = new HashMap<>();

View file

@ -236,7 +236,7 @@ public class DictionaryLoader {
private int importLetters(Language language) throws InvalidLanguageCharactersException {
if (language.isSyllabary()) {
if (language.isTranscribed()) {
return 0;
}

View file

@ -26,7 +26,6 @@ import io.github.sspanak.tt9.util.Timer;
public class WordStore extends BaseSyncStore {
public static final String FILTER_EXACT_MATCHES_ONLY = "__exact__";
private final String LOG_TAG = "sqlite.WordStore";
private final ReadOps readOps;
@ -60,10 +59,10 @@ public class WordStore extends BaseSyncStore {
/**
* Loads words matching and similar to a given digit sequence
* For example: "7655" -> "roll" (exact match), but also: "rolled", "roller", "rolling", ...
* and other similar. When "wordFilter" is set to FILTER_EXACT_MATCHES_ONLY, the word list is
* constrained only to the words with length equal to the digit sequence length (exact matches).
* and other similar. When "onlyExactSequence" is TRUE, the word list is constrained only to
* the words with length equal to the digit sequence length (exact matches).
*/
public ArrayList<String> getMany(@NonNull CancellationSignal cancel, Language language, String sequence, String wordFilter, int minimumWords, int maximumWords) {
public ArrayList<String> getMany(@NonNull CancellationSignal cancel, Language language, String sequence, boolean onlyExactSequence, String wordFilter, int minimumWords, int maximumWords) {
if (!checkOrNotify()) {
return new ArrayList<>();
}
@ -83,11 +82,11 @@ public class WordStore extends BaseSyncStore {
long longPositionsTime = Timer.stop("cache_long_positions");
final int minWords = Math.max(minimumWords, 0);
final int maxWords = Math.max(maximumWords, minWords);
final int maxWords = maximumWords >= 0 ? Math.max(maximumWords, minWords) : maximumWords;
final String filter = wordFilter == null ? "" : wordFilter;
Timer.start("get_positions");
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, filter, minWords);
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, onlyExactSequence, filter, minWords);
long positionsTime = Timer.stop("get_positions");
Timer.start("get_words");

View file

@ -88,7 +88,7 @@ abstract public class CommandHandler extends TextEditingHandler {
return;
}
if (mLanguage.isSyllabary()) {
if (mLanguage.isTranscribed()) {
UI.toastShortSingle(this, R.string.function_add_word_not_available);
return;
}

View file

@ -36,7 +36,12 @@ public abstract class HotkeyHandler extends CommandHandler {
suggestionOps.cancelDelayedAccept();
if (!suggestionOps.isEmpty()) {
onAcceptSuggestionManually(suggestionOps.acceptCurrent(), KeyEvent.KEYCODE_ENTER);
if (mInputMode.shouldReplacePreviousSuggestion()) {
mInputMode.onReplaceSuggestion(suggestionOps.getCurrent());
} else {
onAcceptSuggestionManually(suggestionOps.acceptCurrent(), KeyEvent.KEYCODE_ENTER);
}
return true;
}
@ -192,7 +197,7 @@ public abstract class HotkeyHandler extends CommandHandler {
public boolean onKeyFilterClear(boolean validateOnly) {
if (suggestionOps.isEmpty() || mLanguage.isSyllabary()) {
if (suggestionOps.isEmpty() || !mInputMode.supportsFiltering()) {
return false;
}
@ -208,12 +213,13 @@ public abstract class HotkeyHandler extends CommandHandler {
// References:
// - https://github.com/sspanak/tt9/issues/698#issuecomment-2600441061
// - https://github.com/sspanak/tt9/issues/418
boolean isFilteringOn = mInputMode.isStemFilterFuzzy() || (mInputMode.getSequenceLength() != mInputMode.getWordStem().length());
int stemLength = mInputMode.getWordStem().length();
boolean isFilteringOn = mInputMode.isStemFilterFuzzy() || (stemLength > 0 && mInputMode.getSequenceLength() != stemLength);
if (mInputMode.clearWordStem() && isFilteringOn) {
mInputMode
.setOnSuggestionsUpdated(this::handleSuggestions)
.loadSuggestions(suggestionOps.getCurrent(mInputMode.getSequenceLength()));
.loadSuggestions(suggestionOps.getCurrent(mLanguage, mInputMode.getSequenceLength()));
return true;
}
@ -229,7 +235,7 @@ public abstract class HotkeyHandler extends CommandHandler {
return false;
}
if (mLanguage.isSyllabary()) {
if (!mInputMode.supportsFiltering()) {
UI.toastShortSingle(this, R.string.function_filter_suggestions_not_available);
return true; // prevent the default key action to acknowledge we have processed the event
}
@ -244,7 +250,7 @@ public abstract class HotkeyHandler extends CommandHandler {
if (repeat && !suggestionOps.get(1).isEmpty()) {
filter = suggestionOps.get(1);
} else {
filter = suggestionOps.getCurrent(mInputMode.getSequenceLength());
filter = suggestionOps.getCurrent(mLanguage, mInputMode.getSequenceLength());
}
if (filter.isEmpty()) {
@ -289,10 +295,11 @@ public abstract class HotkeyHandler extends CommandHandler {
detectRTL();
// for languages that do not have ABC or Predictive, make sure we remain in valid state
if (!mInputMode.changeLanguage(mLanguage)) {
if (mInputMode.changeLanguage(mLanguage)) {
mInputMode.clearWordStem();
} else {
mInputMode = InputMode.getInstance(settings, mLanguage, inputType, textField, determineInputModeId());
}
mInputMode.clearWordStem();
getSuggestions(null);
statusBar.setText(mInputMode);

View file

@ -64,11 +64,21 @@ abstract public class MainViewHandler extends HotkeyHandler {
}
public boolean isAddingWordsSupported() {
return mLanguage == null || !mLanguage.isTranscribed();
}
public boolean isDragResizeOn() {
return dragResize;
}
public boolean isFilteringSupported() {
return mInputMode.supportsFiltering();
}
public boolean isInputLimited() {
return inputType.isLimited();
}
@ -114,13 +124,8 @@ abstract public class MainViewHandler extends HotkeyHandler {
}
public boolean notLanguageSyllabary() {
return mLanguage == null || !mLanguage.isSyllabary();
}
public String getABCString() {
return mLanguage == null || mLanguage.isSyllabary() ? "ABC" : mLanguage.getAbcString().toUpperCase(mLanguage.getLocale());
return mLanguage == null ? "ABC" : mLanguage.getAbcString().toUpperCase(mLanguage.getLocale());
}

View file

@ -21,7 +21,6 @@ import io.github.sspanak.tt9.ime.modes.InputMode;
import io.github.sspanak.tt9.ime.modes.InputModeKind;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageCollection;
import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.ui.UI;
import io.github.sspanak.tt9.util.Text;
@ -294,7 +293,7 @@ public abstract class TypingHandler extends KeyPadHandler {
}
allowedInputModes = new ArrayList<>(inputType.determineInputModes(getApplicationContext()));
if (LanguageKind.isKorean(mLanguage)) {
if (!mLanguage.hasABC()) {
allowedInputModes.remove((Integer) InputMode.MODE_ABC);
} else if (!settings.getPredictiveMode()) {
allowedInputModes.remove((Integer) InputMode.MODE_PREDICTIVE);
@ -381,7 +380,7 @@ public abstract class TypingHandler extends KeyPadHandler {
// last key press makes up a compound word like: (it)'s, (I)'ve, l'(oiseau), or it is
// just the end of a sentence, like: "word." or "another?"
if (mInputMode.shouldAcceptPreviousSuggestion(suggestionOps.getCurrent())) {
String lastWord = suggestionOps.acceptPrevious(mInputMode.getSequenceLength());
String lastWord = suggestionOps.acceptPrevious(mLanguage, mInputMode.getSequenceLength());
onAcceptSuggestionAutomatically(lastWord);
}
@ -405,7 +404,7 @@ public abstract class TypingHandler extends KeyPadHandler {
// Otherwise, put the first suggestion in the text field,
// but cut it off to the length of the sequence (how many keys were pressed),
// for a more intuitive experience.
String trimmedWord = suggestionOps.getCurrent(mInputMode.getSequenceLength());
String trimmedWord = suggestionOps.getCurrent(mLanguage, mInputMode.getSequenceLength());
appHacks.setComposingTextWithHighlightedStem(trimmedWord, mInputMode);
forceShowWindow();

View file

@ -8,10 +8,12 @@ import androidx.annotation.Nullable;
import java.util.ArrayList;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.ui.main.ResizableMainView;
import io.github.sspanak.tt9.ui.tray.SuggestionsBar;
import io.github.sspanak.tt9.util.ConsumerCompat;
import io.github.sspanak.tt9.util.Text;
public class SuggestionOps {
@NonNull private final Handler delayedAcceptHandler;
@ -110,12 +112,12 @@ public class SuggestionOps {
}
public String acceptPrevious(int sequenceLength) {
public String acceptPrevious(Language language, int sequenceLength) {
if (sequenceLength <= 0) {
set(null);
}
String lastComposingText = getCurrent(sequenceLength - 1);
String lastComposingText = getCurrent(language, sequenceLength - 1);
commitCurrent(false);
return lastComposingText;
}
@ -143,17 +145,17 @@ public class SuggestionOps {
}
public String getCurrent(int maxLength) {
public String getCurrent(Language language, int maxLength) {
if (maxLength == 0 || isEmpty()) {
return "";
}
String text = getCurrent();
if (maxLength > 0 && !text.isEmpty() && text.length() > maxLength) {
text = text.substring(0, maxLength);
Text text = new Text(language, getCurrent());
if (maxLength > 0 && !text.isEmpty() && text.codePointLength() > maxLength) {
return text.substringCodePoints(0, maxLength);
}
return text;
return text.toString();
}

View file

@ -42,6 +42,7 @@ abstract public class InputMode {
protected InputMode(SettingsStore settings, InputType inputType) {
allowedTextCases.add(CASE_LOWER);
isEmailMode = inputType != null && inputType.isEmail() && !inputType.isDefectiveText();
this.settings = settings;
}
@ -50,7 +51,9 @@ abstract public class InputMode {
public static InputMode getInstance(SettingsStore settings, @Nullable Language language, InputType inputType, TextField textField, int mode) {
switch (mode) {
case MODE_PREDICTIVE:
return (LanguageKind.isKorean(language) ? new ModeCheonjiin(settings, inputType, textField) : new ModeWords(settings, language, inputType, textField));
if (LanguageKind.isChinese(language)) return new ModePinyin(settings, language, inputType, textField);
if (LanguageKind.isKorean(language)) return new ModeCheonjiin(settings, inputType, textField);
return new ModeWords(settings, language, inputType, textField);
case MODE_ABC:
return new ModeABC(settings, language, inputType);
case MODE_PASSTHROUGH:
@ -69,6 +72,7 @@ abstract public class InputMode {
// Suggestions
public void onAcceptSuggestion(@NonNull String word) { onAcceptSuggestion(word, false); }
public void onAcceptSuggestion(@NonNull String word, boolean preserveWordList) {}
public void onReplaceSuggestion(@NonNull String word) {}
/**
* loadSuggestions
@ -123,6 +127,7 @@ abstract public class InputMode {
// Interaction with the IME. Return "true" if it should perform the respective action.
public boolean shouldAcceptPreviousSuggestion(String unacceptedText) { return false; }
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) { return false; }
public boolean shouldReplacePreviousSuggestion() { return false; }
public boolean shouldAddTrailingSpace(boolean isWordAcceptedManually, int nextKey) { return false; }
public boolean shouldAddPrecedingSpace() { return false; }
public boolean shouldDeletePrecedingSpace() { return false; }
@ -251,4 +256,5 @@ abstract public class InputMode {
public boolean isStemFilterFuzzy() { return false; }
public String getWordStem() { return ""; }
public boolean setWordStem(String stem, boolean exact) { return false; }
public boolean supportsFiltering() { return false; }
}

View file

@ -92,7 +92,7 @@ class ModeABC extends InputMode {
@Override
public boolean changeLanguage(@Nullable Language newLanguage) {
if (newLanguage != null && newLanguage.isSyllabary()) {
if (newLanguage != null && newLanguage.isTranscribed()) {
return false;
}

View file

@ -48,13 +48,15 @@ class ModeCheonjiin extends InputMode {
SPECIAL_CHAR_SEQUENCE_PREFIX = "11";
super.setLanguage(LanguageCollection.getLanguage(LanguageKind.KOREAN));
autoSpace = new AutoSpace(settings).setLanguage(language);
digitSequence = "";
allowedTextCases.add(CASE_LOWER);
this.inputType = inputType;
this.textField = textField;
initPredictions();
setLanguage(LanguageCollection.getLanguage(LanguageKind.KOREAN));
setSpecialCharacterConstants();
if (isEmailMode) {
@ -64,8 +66,6 @@ class ModeCheonjiin extends InputMode {
} else {
setCustomSpecialCharacters();
}
autoSpace = new AutoSpace(settings).setLanguage(language);
}

View file

@ -0,0 +1,210 @@
package io.github.sspanak.tt9.ime.modes;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import io.github.sspanak.tt9.hacks.InputType;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.ime.modes.predictions.IdeogramPredictions;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Logger;
import io.github.sspanak.tt9.util.Text;
import io.github.sspanak.tt9.util.TextTools;
public class ModeIdeograms extends ModeWords {
private static final String LOG_TAG = ModeIdeograms.class.getSimpleName();
private boolean isFiltering = false;
protected ModeIdeograms(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
super(settings, lang, inputType, textField);
}
@Override public void determineNextWordTextCase() {}
@Override protected String adjustSuggestionTextCase(String word, int newTextCase) { return word; }
@Override
protected void initPredictions() {
predictions = new IdeogramPredictions(settings, textField);
predictions.setWordsChangedHandler(this::onPredictions);
}
@Override
public boolean changeLanguage(@Nullable Language newLanguage) {
if (newLanguage != null && !newLanguage.isTranscribed() || LanguageKind.isKorean(newLanguage)) {
return false;
}
setLanguage(newLanguage);
return true;
}
@Override
public void reset() {
super.reset();
isFiltering = false;
}
@Override
protected void onPredictions() {
if (language.hasTranscriptionsEmbedded()) {
if (isFiltering) {
((IdeogramPredictions) predictions).stripNativeWords();
} else {
((IdeogramPredictions) predictions).stripTranscriptions();
}
}
if (!isFiltering) {
// We can reorder by pairs only after stripping the transcriptions, if any.
// Otherwise, the input field words will not match with any pair.
((IdeogramPredictions) predictions).orderByPairs();
}
super.onPredictions();
}
@Override
public void onAcceptSuggestion(@NonNull String currentWord, boolean preserveWords) {
if (currentWord.isEmpty() || new Text(currentWord).isNumeric()) {
reset();
Logger.i(LOG_TAG, "Current word is empty or numeric. Nothing to accept.");
return;
}
if (isFiltering) {
isFiltering = false;
stem = currentWord;
loadSuggestions("");
return;
}
try {
String latinWord = ((IdeogramPredictions) predictions).getTranscription(currentWord);
String digits = language.getDigitSequenceForWord(latinWord);
((IdeogramPredictions) predictions).onAcceptTranscription(currentWord, latinWord, digits);
} catch (Exception e) {
Logger.e(LOG_TAG, "Failed incrementing priority of word: '" + currentWord + "'. " + e.getMessage());
}
int len = digitSequence.length();
if (preserveWords && len >= 2) {
digitSequence = digitSequence.substring(len - 1);
loadSuggestions("");
} else {
reset();
}
}
@Override
public boolean shouldAcceptPreviousSuggestion(String s) {
return
!digitSequence.isEmpty()
&& predictions.noDbWords()
&& !digitSequence.equals(EMOJI_SEQUENCE)
&& !digitSequence.equals(PUNCTUATION_SEQUENCE)
&& !digitSequence.equals(SPECIAL_CHAR_SEQUENCE);
}
@Override
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) {
if (digitSequence.isEmpty()) {
return false;
}
if (super.shouldAcceptPreviousSuggestion(nextKey, hold)) {
return true;
}
String nextSequence = digitSequence + (char)(nextKey + '0');
return
TextTools.containsOtherThan1(nextSequence)
&& (
nextSequence.endsWith(EMOJI_SEQUENCE) || nextSequence.startsWith(EMOJI_SEQUENCE) ||
nextSequence.endsWith(PUNCTUATION_SEQUENCE) || nextSequence.startsWith(PUNCTUATION_SEQUENCE)
);
}
/**
* When we want to filter by a Latin transcription, we must have discarded it from the text field,
* then give it to this method. It will filter the suggestions and show only the ones that match
* the given Latin word.
*/
@Override
public void onReplaceSuggestion(@NonNull String word) {
if (word.isEmpty() || new Text(word).isNumeric()) {
reset();
Logger.i(LOG_TAG, "Can not replace an empty or numeric word.");
return;
}
isFiltering = false;
stem = word;
loadSuggestions("");
}
/**
* This should be called before accepting a word. It says whether we should discard the current
* word. Discarding it means we want to erase it from the text field and instead display a
* filtered list of suggestions that matches the word. If we don't discard it, usually we should
* accept it.
*/
@Override
public boolean shouldReplacePreviousSuggestion() {
return isFiltering;
}
@Override
public boolean setWordStem(String newStem, boolean fromScrolling) {
if (!supportsFiltering()) {
return false;
}
if (!fromScrolling) {
isFiltering = true;
} else if (isFiltering) {
stem = newStem;
}
return true;
}
@Override
public boolean clearWordStem() {
if (!supportsFiltering()) {
return false;
}
isFiltering = false;
stem = "";
return true;
}
@Override
public boolean supportsFiltering() {
return language.hasTranscriptionsEmbedded();
}
@Override
public boolean isStemFilterFuzzy() {
return isFiltering;
}
}

View file

@ -10,7 +10,6 @@ class ModePassthrough extends InputMode {
protected ModePassthrough(SettingsStore settings, InputType inputType) {
super(settings, inputType);
reset();
allowedTextCases.add(CASE_LOWER);
}
@Override public int getId() { return MODE_PASSTHROUGH; }

View file

@ -0,0 +1,57 @@
package io.github.sspanak.tt9.ime.modes;
import io.github.sspanak.tt9.hacks.InputType;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.chars.Characters;
public class ModePinyin extends ModeIdeograms {
boolean ignoreNextSpace = false;
protected ModePinyin(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
super(settings, lang, inputType, textField);
}
@Override
protected void onNumberPress(int number) {
if (ignoreNextSpace && number == SPECIAL_CHAR_SEQUENCE.charAt(0) - '0') {
ignoreNextSpace = false;
return;
}
ignoreNextSpace = false;
super.onNumberPress(number);
}
@Override
protected void onNumberHold(int number) {
ignoreNextSpace = false;
super.onNumberHold(number);
}
@Override
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) {
// In East Asian languages, 0-key must accept the current word, or type a space when there is no word.
if (!digitSequence.isEmpty() && !digitSequence.endsWith(SPECIAL_CHAR_SEQUENCE) && nextKey == SPECIAL_CHAR_SEQUENCE.charAt(0) - '0') {
ignoreNextSpace = true;
}
return super.shouldAcceptPreviousSuggestion(nextKey, hold);
}
@Override
protected String getPreferredChar() {
final String preferredChar = settings.getDoubleZeroChar();
return switch (preferredChar) {
case "." -> Characters.ZH_FULL_STOP;
case "," -> Characters.ZH_COMMA_LIST;
default -> preferredChar;
};
}
}

View file

@ -27,7 +27,7 @@ class ModeWords extends ModeCheonjiin {
// stem filter
private boolean isStemFuzzy = false;
private String stem = "";
protected String stem = "";
// text analysis tools
private final AutoTextCase autoTextCase;
@ -110,12 +110,18 @@ class ModeWords extends ModeCheonjiin {
@Override
public boolean changeLanguage(@Nullable Language newLanguage) {
if (newLanguage != null && newLanguage.isSyllabary()) {
if (newLanguage != null && newLanguage.isTranscribed()) {
return false;
}
super.setLanguage(newLanguage);
setLanguage(newLanguage);
return true;
}
@Override
protected void setLanguage(@Nullable Language newLanguage) {
super.setLanguage(newLanguage);
autoSpace.setLanguage(language);
allowedTextCases.clear();
@ -124,14 +130,11 @@ class ModeWords extends ModeCheonjiin {
allowedTextCases.add(CASE_CAPITALIZE);
allowedTextCases.add(CASE_UPPER);
}
return true;
}
@Override
public boolean recompose(String word) {
if (!language.hasSpaceBetweenWords() || language.isSyllabary()) {
if (!language.hasSpaceBetweenWords() || language.isTranscribed()) {
return false;
}
@ -258,6 +261,12 @@ class ModeWords extends ModeCheonjiin {
}
@Override
public boolean supportsFiltering() {
return true;
}
/**
* loadSuggestions
* Loads the possible list of suggestions for the current digitSequence. "currentWord" is used
@ -284,7 +293,7 @@ class ModeWords extends ModeCheonjiin {
private boolean loadPreferredChar() {
if (digitSequence.startsWith(NaturalLanguage.PREFERRED_CHAR_SEQUENCE)) {
suggestions.clear();
suggestions.add(settings.getDoubleZeroChar());
suggestions.add(getPreferredChar());
return true;
}
@ -292,6 +301,11 @@ class ModeWords extends ModeCheonjiin {
}
protected String getPreferredChar() {
return settings.getDoubleZeroChar();
}
/**
* onAcceptSuggestion
* Bring this word up in the suggestions list next time and if necessary preserves the suggestion list

View file

@ -39,7 +39,7 @@ public class AutoSpace {
public AutoSpace setLanguage(Language lang) {
language = language == null ? new NullLanguage() : lang;
isLanguageFrench = LanguageKind.isFrench(lang);
isLanguageWithAlphabet = !language.isSyllabary();
isLanguageWithAlphabet = !language.isTranscribed();
isLanguageWithSpaceBetweenWords = language.hasSpaceBetweenWords();
return this;
}

View file

@ -0,0 +1,200 @@
package io.github.sspanak.tt9.ime.modes.predictions;
import androidx.annotation.NonNull;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import io.github.sspanak.tt9.db.DataStore;
import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.TextTools;
public class IdeogramPredictions extends WordPredictions {
private boolean isTranscriptionFilterAllowed = false;
private String lastTypedWord = "";
@NonNull protected ArrayList<String> transcriptions = new ArrayList<>();
public IdeogramPredictions(SettingsStore settings, TextField textField) {
super(settings, textField);
minWords = 1;
maxWords = -1;
onlyExactMatches = true;
}
@Override
public Predictions setLanguage(@NonNull Language language) {
isTranscriptionFilterAllowed = language.hasTranscriptionsEmbedded();
return super.setLanguage(language);
}
@Override
public void load() {
transcriptions.clear();
super.load();
}
@Override
protected void onDbWords(ArrayList<String> dbWords, boolean isRetryAllowed) {
transcriptions = onlyExactMatches ? reduceFuzzyMatches(dbWords, SettingsStore.SUGGESTIONS_MAX) : dbWords;
words = new ArrayList<>(transcriptions);
areThereDbWords = !words.isEmpty();
onWordsChanged.run();
}
public void onAcceptTranscription(String word, String transcription, String sequence) {
super.onAccept(transcription + word, sequence);
}
@Override
@NonNull
protected String getWordBeforeCursor(@NonNull String currentWord) {
int currentWordLength = currentWord.length();
int lastWordLength = lastTypedWord.length();
int requiredTextLength = currentWordLength + lastWordLength;
String text = textField.getStringBeforeCursor(requiredTextLength);
// Logger.d("LOG_TAG", "====+> previous string: " + text);
return lastWordLength < text.length() ? text.substring(0, lastWordLength) : "";
}
/**
* Tries to do a best guess what is the previous word and pairs it with the incoming one. Guessing
* is because East Asian languages do not have spaces between words, so we try to match the
* last typed or just give up.
*/
@Override
protected void pairWithPreviousWord(@NonNull String word, @NonNull String sequence) {
if (language.hasSpaceBetweenWords()) {
super.pairWithPreviousWord(word, sequence);
return;
}
if (!settings.getPredictWordPairs() || sequence.length() != digitSequence.length()) {
// Logger.d("LOG_TAG", "====+> sequence length mismatch: " + sequence.length() + " != " + digitSequence.length());
return;
}
int latinEnd = TextTools.lastIndexOfLatin(word);
String nativeWord = latinEnd < 0 || latinEnd >= word.length() ? word : word.substring(latinEnd + 1);
if (lastTypedWord.isEmpty() || (!words.isEmpty() && nativeWord.equals(words.get(0)))) {
lastTypedWord = nativeWord;
// Logger.d("LOG_TAG", "====+> Will not pair the first word. native word: " + nativeWord + " first suggestion: " + words.get(0));
// if (lastTypedWord.isEmpty()) {
// Logger.d("LOG_TAG", "====+> No previous word to pair with: " + lastTypedWord);
// }
return;
}
String previousWord = getWordBeforeCursor(nativeWord);
if (previousWord.equals(lastTypedWord)) {
// Logger.d("LOG_TAG", "====+> Pairing words: " + previousWord + " + " + nativeWord);
DataStore.addWordPair(language, previousWord, nativeWord, sequence);
// } else {
// Logger.d("LOG_TAG", "===> Last word mismatch: " + previousWord + " != " + lastTypedWord + ". Not pairing.");
}
lastTypedWord = nativeWord;
}
/**
* Keeps all exact matches and the first n fuzzy matches. Unlike Latin- or Cyrillic-based languages,
* ideograms do not "start with" a sequence of characters, so fuzzy matches have little value.
* Just keep some of them, in case there are no exact matches.
*/
@NonNull
public ArrayList<String> reduceFuzzyMatches(ArrayList<String> allWords, int maxWords) {
if (allWords.isEmpty()) {
return allWords;
}
ArrayList<String> shortWords = new ArrayList<>();
final int MAX_LENGTH = Math.max(digitSequence.length() + 1, allWords.get(0).length());
for (int i = 0, longWords = 0, end = allWords.size(); i < end; i++) {
String word = allWords.get(i);
int trueLength = isTranscriptionFilterAllowed ? TextTools.lastIndexOfLatin(word) : word.length();
if (trueLength < MAX_LENGTH) {
shortWords.add(word);
}
if (trueLength >= MAX_LENGTH && longWords <= maxWords) {
longWords++;
shortWords.add(word);
}
}
return shortWords;
}
/**
* Removes the native words and keeps only the unique transcriptions. Directly modifies the words
* list, but the original is preserved in this.transcriptions.
* Example operation: [SHIWU食物, SHIWU事物, SHIWU事务, SHIZU十足] -> [SHIWU, SHIZU]
*/
public void stripNativeWords() {
HashSet<String> uniqueTranscriptions = new HashSet<>();
for (int i = 0; i < transcriptions.size(); i++) {
String transcription = transcriptions.get(i);
int firstNative = TextTools.lastIndexOfLatin(transcription) + 1;
uniqueTranscriptions.add(
firstNative < 1 || firstNative >= transcription.length() ? transcription : transcription.substring(0, firstNative)
);
}
words.clear();
words.addAll(uniqueTranscriptions);
Collections.sort(words);
}
/**
* Removes the Latin transcriptions from native words. Directly modifies the words list, but the
* original is preserved in this.transcriptions.
* Example operation: [SHIWU食物, SHIZU十足] -> [食物, 十足]
*/
public void stripTranscriptions() {
words.clear();
for (int i = 0; i < transcriptions.size(); i++) {
String transcription = transcriptions.get(i);
int firstNative = TextTools.lastIndexOfLatin(transcription) + 1;
words.add(firstNative >= transcription.length() ? transcription : transcription.substring(firstNative));
}
}
/**
* Similar to "stripNativeWords()", but finds and returns the transcription of the given word.
* Returns an empty string if the word is not in the current suggestion list.
*/
@NonNull
public String getTranscription(@NonNull String word) {
for (String w : transcriptions) {
if (w.endsWith(word)) {
return w.replace(word, "");
}
}
return "";
}
public void orderByPairs() {
words = rearrangeByPairFrequency(words);
}
}

View file

@ -5,7 +5,6 @@ import androidx.annotation.NonNull;
import java.util.ArrayList;
import io.github.sspanak.tt9.db.DataStore;
import io.github.sspanak.tt9.db.words.WordStore;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.NullLanguage;
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
@ -41,7 +40,7 @@ abstract public class Predictions {
}
public Predictions setLanguage(Language language) {
public Predictions setLanguage(@NonNull Language language) {
this.language = language;
return this;
}
@ -110,7 +109,8 @@ abstract public class Predictions {
(dbWords) -> onDbWords(dbWords, isRetryAllowed()),
language,
digitSequence,
onlyExactMatches ? WordStore.FILTER_EXACT_MATCHES_ONLY : stem,
onlyExactMatches,
stem,
minWords,
maxWords
);

View file

@ -1,5 +1,7 @@
package io.github.sspanak.tt9.ime.modes.predictions;
import androidx.annotation.NonNull;
import java.util.ArrayList;
import io.github.sspanak.tt9.db.DataStore;
@ -11,7 +13,7 @@ import io.github.sspanak.tt9.util.TextTools;
import io.github.sspanak.tt9.util.chars.Characters;
public class WordPredictions extends Predictions {
private final TextField textField;
protected final TextField textField;
private LocaleWordsSorter localeWordsSorter;
private String inputWord;
@ -30,7 +32,7 @@ public class WordPredictions extends Predictions {
@Override
public Predictions setLanguage(Language language) {
public Predictions setLanguage(@NonNull Language language) {
super.setLanguage(language);
localeWordsSorter = new LocaleWordsSorter(language);
@ -67,6 +69,7 @@ public class WordPredictions extends Predictions {
},
language,
digitSequence.substring(1),
onlyExactMatches,
stem.length() > 1 ? stem.substring(1) : "",
SettingsStore.SUGGESTIONS_MIN,
SettingsStore.SUGGESTIONS_MAX
@ -262,14 +265,16 @@ public class WordPredictions extends Predictions {
return;
}
// Second condition note: If the accepted word is longer than the sequence, it is some different word,
// not a textonym of the fist suggestion. We don't need to store it.
if (settings.getPredictWordPairs() && word.length() == digitSequence.length()) {
DataStore.addWordPair(language, textField.getWordBeforeCursor(language, 1, true), word, sequence);
}
pairWithPreviousWord(word, sequence);
makeTopWord(word, sequence);
}
// Update the priority only if the user has selected the word, not when we have enforced it
// because it is in a popular word pair.
/**
* Update the priority only if the user has selected the word, not when we have enforced it
* because it is in a popular word pair.
*/
protected void makeTopWord(String word, String sequence) {
if (!word.equals(lastEnforcedTopWord)) {
DataStore.makeTopWord(language, word, sequence);
}
@ -283,7 +288,7 @@ public class WordPredictions extends Predictions {
* "onAccept()", we have remembered the "am" comes after "I" and "an" comes after "am", we will
* not suggest the textonyms "am" or "an" twice (depending on which has the highest frequency).
*/
private ArrayList<String> rearrangeByPairFrequency(ArrayList<String> words) {
protected ArrayList<String> rearrangeByPairFrequency(ArrayList<String> words) {
lastEnforcedTopWord = "";
if (!settings.getPredictWordPairs() || words.size() < 2) {
@ -291,7 +296,7 @@ public class WordPredictions extends Predictions {
}
ArrayList<String> rearrangedWords = new ArrayList<>();
String penultimateWord = textField.getWordBeforeCursor(language, 1, true);
String penultimateWord = getWordBeforeCursor(words.get(0));
String pairWord = DataStore.getWord2(language, penultimateWord, digitSequence);
int morePopularIndex = TextTools.indexOfIgnoreCase(words, pairWord);
@ -310,4 +315,26 @@ public class WordPredictions extends Predictions {
return rearrangedWords;
}
/**
* Pairs the given word and its digit sequence to the last word in the text field.
* Second condition note: If the accepted word is longer than the sequence, it is some different word,
* not a textonym of the fist suggestion. We don't need to store it.
*/
protected void pairWithPreviousWord(@NonNull String word, @NonNull String sequence) {
if (settings.getPredictWordPairs() && sequence.length() == digitSequence.length()) {
DataStore.addWordPair(language, getWordBeforeCursor(word), word, sequence);
}
}
/**
* Returns the last word in the text field. The way of finding it depends on the language, so
* we have a separate method for that.
*/
@NonNull
protected String getWordBeforeCursor(@NonNull String currentWord) {
return textField.getWordBeforeCursor(language, 1, true);
}
}

View file

@ -13,11 +13,13 @@ abstract public class Language {
protected String code;
protected String currency;
protected String dictionaryFile;
protected Locale locale = Locale.ROOT;
protected String name;
protected boolean hasABC = true;
protected boolean hasSpaceBetweenWords = true;
protected boolean hasUpperCase = true;
protected boolean isSyllabary = false;
protected boolean hasTranscriptionsEmbedded = false;
protected boolean isTranscribed = false;
protected Locale locale = Locale.ROOT;
protected String name;
public int getId() {
@ -63,6 +65,10 @@ abstract public class Language {
return name;
}
final public boolean hasABC() {
return hasABC;
}
final public boolean hasSpaceBetweenWords() {
return hasSpaceBetweenWords;
}
@ -71,8 +77,12 @@ abstract public class Language {
return hasUpperCase;
}
final public boolean isSyllabary() {
return isSyllabary;
final public boolean hasTranscriptionsEmbedded() {
return hasTranscriptionsEmbedded;
}
final public boolean isTranscribed() {
return isTranscribed;
}
@NonNull

View file

@ -24,9 +24,11 @@ public class LanguageDefinition {
public String abcString = "";
public String currency = "";
public String dictionaryFile = "";
public boolean filterBySounds = false;
public boolean hasABC = true;
public boolean hasSpaceBetweenWords = true;
public boolean hasUpperCase = true;
public boolean isSyllabary = false;
public boolean isTranscribed = false;
public final ArrayList<ArrayList<String>> layout = new ArrayList<>();
public String locale = "";
public String name = "";
@ -134,6 +136,12 @@ public class LanguageDefinition {
case "dictionaryFile":
dictionaryFile = value.replaceFirst("\\.\\w+$", "." + BuildConfig.DICTIONARY_EXTENSION);
return;
case "filterBySounds":
filterBySounds = parseYamlBoolean(value);
return;
case "hasABC":
hasABC = parseYamlBoolean(value);
return;
case "hasSpaceBetweenWords":
hasSpaceBetweenWords = parseYamlBoolean(value);
return;
@ -141,7 +149,7 @@ public class LanguageDefinition {
hasUpperCase = parseYamlBoolean(value);
return;
case "sounds":
isSyllabary = true;
isTranscribed = true;
return;
case "locale":
locale = value;

View file

@ -11,6 +11,7 @@ public class LanguageKind {
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
public static boolean isChinese(Language language) { return language != null && language.getId() == 462106; }
public static boolean isFarsi(Language language) { return language != null && language.getId() == 599078; }
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }

View file

@ -10,11 +10,10 @@ import java.util.Map;
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
import io.github.sspanak.tt9.util.Text;
import io.github.sspanak.tt9.util.TextTools;
import io.github.sspanak.tt9.util.chars.Characters;
public class NaturalLanguage extends Language implements Comparable<NaturalLanguage> {
public class NaturalLanguage extends TranscribedLanguage implements Comparable<NaturalLanguage> {
final public static String SPECIAL_CHAR_KEY = "0";
final public static String PUNCTUATION_KEY = "1";
final public static String PREFERRED_CHAR_SEQUENCE = "00";
@ -34,9 +33,11 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
lang.abcString = definition.abcString.isEmpty() ? null : definition.abcString;
lang.currency = definition.currency;
lang.dictionaryFile = definition.getDictionaryFile();
lang.hasABC = definition.hasABC;
lang.hasSpaceBetweenWords = definition.hasSpaceBetweenWords;
lang.hasUpperCase = definition.hasUpperCase;
lang.isSyllabary = definition.isSyllabary;
lang.hasTranscriptionsEmbedded = definition.filterBySounds;
lang.isTranscribed = definition.isTranscribed;
lang.name = definition.name.isEmpty() ? lang.name : definition.name;
lang.numerals = definition.numerals;
lang.setLocale(definition);
@ -86,6 +87,7 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
specialChars.put(SPECIAL_CHARS_PLACEHOLDER, Characters.Special);
specialChars.put(PUNCTUATION_PLACEHOLDER, Characters.PunctuationEnglish);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_AR", Characters.PunctuationArabic);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_ZH", Characters.PunctuationChinese);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FA", Characters.PunctuationFarsi);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FR", Characters.PunctuationFrench);
specialChars.put(PUNCTUATION_PLACEHOLDER + "_DE", Characters.PunctuationGerman);
@ -257,7 +259,7 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
if (
word == null
|| word.isEmpty()
|| (isSyllabary && LanguageKind.isKorean(this) && TextTools.isHangul(word))
|| (super.isValidWord(word))
|| (word.length() == 1 && Character.isDigit(word.charAt(0)))
) {
return true;

View file

@ -0,0 +1,16 @@
package io.github.sspanak.tt9.languages;
import io.github.sspanak.tt9.util.TextTools;
abstract class TranscribedLanguage extends Language {
@Override
public boolean isValidWord(String word) {
if (!isTranscribed) {
return false;
}
return (LanguageKind.isKorean(this) && TextTools.isHangul(word))
|| (LanguageKind.isChinese(this) && TextTools.isChinese(word));
}
}

View file

@ -21,8 +21,8 @@ class ItemSelectZeroKeyCharacter extends ItemDropDown {
public ItemSelectZeroKeyCharacter populate() {
LinkedHashMap<String, String> items = new LinkedHashMap<>();
items.put(".", context.getString(R.string.char_dot));
items.put(",", context.getString(R.string.char_comma));
items.put(".", ".");
items.put(",", ",");
items.put("\\n", context.getString(R.string.char_newline)); // SharedPreferences return a corrupted string when using the real "\n"... :(
items.put(" ", context.getString(R.string.char_space));

View file

@ -28,7 +28,7 @@ public class SoftKeyAddWord extends BaseSoftKeyWithIcons {
@Override
public void render() {
if (tt9 != null) {
setEnabled(!tt9.isVoiceInputActive() && tt9.notLanguageSyllabary() && !tt9.isTextEditingActive());
setEnabled(!tt9.isVoiceInputActive() && tt9.isAddingWordsSupported() && !tt9.isTextEditingActive());
}
super.render();
}

View file

@ -60,7 +60,7 @@ public class SoftKeyFilter extends BaseSoftKeyWithIcons {
&& !tt9.isVoiceInputActive()
&& (
LanguageKind.isKorean(tt9.getLanguage())
|| (tt9.notLanguageSyllabary() && !tt9.isTextEditingActive())
|| (tt9.isFilteringSupported() && !tt9.isTextEditingActive())
)
);
}

View file

@ -36,7 +36,12 @@ public class SoftKeyPunctuation extends SoftKey {
@Override
protected String getTitle() {
String keyChar = getKeyChar();
return "*".equals(keyChar) ? "" : keyChar;
return switch (keyChar) {
case "*" -> "";
case Characters.ZH_QUESTION_MARK -> "?";
case Characters.ZH_EXCLAMATION_MARK -> "!";
default -> keyChar;
};
}
protected String getKeyChar() {
@ -55,6 +60,8 @@ public class SoftKeyPunctuation extends SoftKey {
if (tt9.isInputModePhone()) return "*";
if (tt9.isInputModeNumeric()) return ",";
if (LanguageKind.isChinese(tt9.getLanguage())) return Characters.ZH_EXCLAMATION_MARK;
return "!";
}
@ -63,8 +70,9 @@ public class SoftKeyPunctuation extends SoftKey {
if (tt9.isInputModePhone()) return "#";
if (tt9.isInputModeNumeric()) return ".";
if (LanguageKind.isArabic(tt9.getLanguage())) return "؟";
if (LanguageKind.isArabic(tt9.getLanguage())) return Characters.AR_QUESTION_MARK;
if (LanguageKind.isGreek(tt9.getLanguage())) return Characters.GR_QUESTION_MARK;
if (LanguageKind.isChinese(tt9.getLanguage())) return Characters.ZH_QUESTION_MARK;
return "?";
}

View file

@ -206,7 +206,7 @@ public class SuggestionsBar {
return;
}
stem = containsGenerated ? newSuggestions.get(0).substring(0, newSuggestions.get(0).length() - 1) : "";
stem = containsGenerated && newSuggestions.get(0).length() > 1 ? newSuggestions.get(0).substring(0, newSuggestions.get(0).length() - 1) : "";
// Do not modify single letter + punctuation, such as "j'" or "l'". They look better as they are.
stem = (stem.length() == 1 && newSuggestions.get(0).length() == 2 && !Character.isAlphabetic(newSuggestions.get(0).charAt(1))) ? "" : stem;

View file

@ -7,6 +7,7 @@ import java.util.Locale;
import io.github.sspanak.tt9.ime.modes.InputMode;
import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.util.chars.Characters;
public class Text extends TextTools {
@ -172,11 +173,22 @@ public class Text extends TextTools {
}
/**
* Returns the number of regular 8-bit chars
*/
public int length() {
return text == null ? 0 : text.length();
}
/**
* Returns the number of UTF-16 chars
*/
public int codePointLength() {
return text == null ? 0 : text.codePointCount(0, text.length());
}
public int lastBoundaryIndex() {
if (text == null || text.length() < 2) {
return -1;
@ -224,6 +236,28 @@ public class Text extends TextTools {
}
/**
* A safe substring method that works with code points (UTF-16 chars), instead of 8-bit chars.
* Useful for languages with complex characters, like Chinese.
*/
public String substringCodePoints(int start, int end) {
if (text == null) {
return "";
}
if (!LanguageKind.isChinese(language)) {
return text.substring(start, end);
}
StringBuilder output = new StringBuilder();
for (int i = Math.max(start, 0), finish = Math.min(text.length(), end); i < finish; i++) {
output.append(text.charAt(i));
}
return output.toString();
}
public String subStringEndingWord(boolean keepApostrophe, boolean keepQuote) {
if (text == null) {
return "";

View file

@ -11,22 +11,23 @@ import java.util.regex.Pattern;
import io.github.sspanak.tt9.util.chars.Characters;
public class TextTools {
private static final Pattern containsOtherThan1 = Pattern.compile("[02-9]");
private static final Pattern combiningString = Pattern.compile("^\\p{M}+$");
private static final Pattern nextIsPunctuation = Pattern.compile("^\\p{Punct}");
private static final Pattern isHangul = Pattern.compile("[\u1100-\u11FF\u302E-\u302F\u3131-\u318F\u3200-\u321F\u3260-\u327E\uA960-\uA97F\uAC00-\uD7FB\uFFA0-\uFFDF]+");
private static final Pattern nextToWord = Pattern.compile("\\b$");
private static final Pattern previousIsLetter = Pattern.compile("[\\p{L}\\p{M}]$");
private static final Pattern startOfSentence = Pattern.compile("(?<!\\.)(^|[.?!؟¿¡])\\s+$");
private static final Pattern CONTAINS_OTHER_THAN_1 = Pattern.compile("[02-9]");
private static final Pattern COMBINING_STRING = Pattern.compile("^\\p{M}+$");
private static final Pattern NEXT_IS_PUNCTUATION = Pattern.compile("^\\p{Punct}");
private static final Pattern IS_CHINESE = Pattern.compile("\\p{script=Han}+");
private static final Pattern IS_HANGUL = Pattern.compile("[\u1100-\u11FF\u302E-\u302F\u3131-\u318F\u3200-\u321F\u3260-\u327E\uA960-\uA97F\uAC00-\uD7FB\uFFA0-\uFFDF]+");
private static final Pattern NEXT_TO_WORD = Pattern.compile("\\b$");
private static final Pattern PREVIOUS_IS_LETTER = Pattern.compile("[\\p{L}\\p{M}]$");
private static final Pattern START_OF_SENTENCE = Pattern.compile("(?<!\\.)(^|[.?!؟¿¡])\\s+$");
public static boolean containsOtherThan1(String str) {
return str != null && containsOtherThan1.matcher(str).find();
return str != null && CONTAINS_OTHER_THAN_1.matcher(str).find();
}
public static boolean isCombining(String str) {
return str != null && combiningString.matcher(str).find();
return str != null && COMBINING_STRING.matcher(str).find();
}
@ -45,8 +46,13 @@ public class TextTools {
}
public static boolean isChinese(String str) {
return str != null && IS_CHINESE.matcher(str).find();
}
public static boolean isHangul(String str) {
return str != null && isHangul.matcher(str).find();
return str != null && IS_HANGUL.matcher(str).find();
}
@ -61,23 +67,35 @@ public class TextTools {
}
public static int lastIndexOfLatin(String str) {
for (int i = str != null ? str.length() - 1 : -1; i >= 0; i--) {
char ch = str.charAt(i);
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
return i;
}
}
return -1;
}
public static boolean isStartOfSentence(String str) {
return str != null && startOfSentence.matcher(str).find();
return str != null && START_OF_SENTENCE.matcher(str).find();
}
public static boolean isNextToWord(String str) {
return str != null && nextToWord.matcher(str).find();
return str != null && NEXT_TO_WORD.matcher(str).find();
}
public static boolean nextIsPunctuation(String str) {
return str != null && !str.isEmpty() && nextIsPunctuation.matcher(str).find();
return str != null && !str.isEmpty() && NEXT_IS_PUNCTUATION.matcher(str).find();
}
public static boolean previousIsLetter(String str) {
return str != null && previousIsLetter.matcher(str).find();
return str != null && PREVIOUS_IS_LETTER.matcher(str).find();
}

View file

@ -10,7 +10,13 @@ import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageKind;
class Punctuation {
public static final String AR_QUESTION_MARK = "؟";
public static final String GR_QUESTION_MARK = ";";
public static final String ZH_COMMA_LIST = "";
public static final String ZH_FULL_STOP = "";
public static final String ZH_QUESTION_MARK = "";
public static final String ZH_EXCLAMATION_MARK = "";
public static final String NEW_LINE = DeviceInfo.AT_LEAST_ANDROID_6 && new Paint().hasGlyph("") ? "" : "\\n";
public static final String ZWJ = "\u200D";
public static final String ZWJ_GRAPHIC = "ZWJ";
@ -22,7 +28,7 @@ class Punctuation {
));
final public static ArrayList<Character> CombiningPunctuationFarsi = new ArrayList<>(Arrays.asList(
'،', ZWNJ.charAt(0), '-', '\'', ':', '؛', '!', ', '.'
'،', ZWNJ.charAt(0), '-', '\'', ':', AR_QUESTION_MARK.charAt(0), '!', ', '.'
));
final private static ArrayList<Character> CombiningPunctuationGujarati = new ArrayList<>(Arrays.asList(
@ -38,7 +44,11 @@ class Punctuation {
));
final public static ArrayList<String> PunctuationArabic = new ArrayList<>(Arrays.asList(
"،", ".", "-", "(", ")", "&", "~", "`", "'", "\"", "؛", ":", "!", "؟"
"،", ".", "-", "(", ")", "&", "~", "`", "'", "\"", "؛", ":", "!", AR_QUESTION_MARK
));
final public static ArrayList<String> PunctuationChinese = new ArrayList<>(Arrays.asList(
"", ZH_COMMA_LIST, ZH_FULL_STOP, "", "", "", "", ".", "", "", "", "", "", "", "", "", "", "'", "", "", "", "", ZH_EXCLAMATION_MARK, ZH_QUESTION_MARK
));
final public static ArrayList<String> PunctuationEnglish = new ArrayList<>(Arrays.asList(

View file

@ -224,8 +224,6 @@
<string name="key_yellow">Yellow Button</string>
<string name="key_blue">Blue Button</string>
<string name="char_comma" translatable="false">,</string>
<string name="char_dot" translatable="false">.</string>
<string name="char_newline">New Line</string>
<string name="char_space">Space</string>

View file

@ -9,7 +9,7 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
return "Too many errors! Skipping: ${definition}\n"
}
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
def (alphabet, sounds, _, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
@ -52,8 +52,11 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
int errorCount = 0
String errorMsg = ""
String abcString = ""
boolean hasABC = true
boolean hasLayout = false
boolean hasSounds = false
boolean filterBySounds = false
boolean areNumeralsValid = true
String localeString = ""
String dictionaryFileName = ""
@ -64,6 +67,8 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
&& !rawLine.startsWith("abcString")
&& !rawLine.startsWith("currency")
&& !rawLine.startsWith("dictionaryFile")
&& !rawLine.startsWith("filterBySounds")
&& !rawLine.startsWith("hasABC")
&& !rawLine.startsWith("hasSpaceBetweenWords")
&& !rawLine.startsWith("hasUpperCase")
&& !rawLine.startsWith("layout")
@ -71,6 +76,7 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
&& !rawLine.startsWith("name")
&& !rawLine.startsWith("numerals")
&& !rawLine.startsWith("sounds")
) {
def parts = rawLine.split(":")
def property = parts.length > 0 ? parts[0] : rawLine
@ -81,14 +87,29 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
String line = rawLine.replaceFirst("#[\\s\\S]+\$", "")
if (
(line.startsWith("hasUpperCase") || line.startsWith("hasSpaceBetweenWords"))
&& !line.endsWith("yes") && !line.endsWith("no")
) {
def property = line.replaceAll(":.*\$", "")
def invalidVal = line.replace("hasUpperCase:", "").trim()
errorCount++
errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
def booleanProperties = ["hasUpperCase", "hasSpaceBetweenWords", "filterBySounds", "hasABC"]
for (String property : booleanProperties) {
String booleanError = validateBooleanProperty(line, property, languageFile.name)
if (booleanError) {
errorCount++
errorMsg += booleanError
}
}
if (line.startsWith("abcString")) {
abcString = line.replace("abcString:", "").trim()
}
if (line.startsWith("dictionaryFile")) {
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
}
if (line.startsWith("filterBySounds")) {
filterBySounds = line.endsWith("yes")
}
if (line.startsWith("hasABC")) {
hasABC = line.endsWith("yes")
}
if (line.startsWith("numerals")) {
@ -107,10 +128,6 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
localeString = line.replace("locale:", "").trim()
}
if (line.startsWith("dictionaryFile")) {
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
}
// alphabet string
def lineCharacters = extractAlphabetCharsFromLine(line)
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
@ -135,6 +152,11 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
}
}
if (!hasABC && !abcString.isEmpty()) {
errorCount++
errorMsg += "Language '${languageFile.name}' is invalid. hasABC must be 'true' when abcString is provided.\n"
}
if (!hasLayout) {
errorCount++
errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
@ -150,6 +172,11 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
}
if (filterBySounds && !hasSounds) {
errorCount++
errorMsg += "Language '${languageFile.name}' is invalid. 'filterBySounds' property can only be used with 'sounds' property.\n"
}
if (!localeString.matches("^[a-z]{2,3}(?:-[A-Z]{2})?\$")) {
errorCount++
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
@ -176,7 +203,7 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
}
}
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
return [alphabet, sounds, filterBySounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
}
@ -298,6 +325,16 @@ static def isLayoutLine(String line) {
//////////////////// VALIDATION ////////////////////
static def validateBooleanProperty(String line, String propertyName, String languageFileName) {
if (line.startsWith(propertyName) && !line.endsWith("yes") && !line.endsWith("no")) {
def property = line.replaceAll(":.*\$", "")
def invalidVal = line.replace("hasUpperCase:", "").trim()
return "Language '${languageFileName}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
}
return ''
}
static def validateNoWhitespace(String line, int lineNumber) {
if (line == "") {
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"