Chinese Pinyin
This commit is contained in:
parent
51cd39fe27
commit
c57877ce9a
46 changed files with 497243 additions and 151 deletions
|
|
@ -8,13 +8,13 @@ ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionar
|
|||
int errorCount = 0
|
||||
|
||||
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
|
||||
def (_, sounds, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
||||
def (_, sounds, prependSoundsToWords, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
||||
errorCount += langFileErrorCount
|
||||
if (!langFileErrorMsg.isEmpty()) {
|
||||
return langFileErrorMsg
|
||||
}
|
||||
|
||||
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
|
||||
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, prependSoundsToWords, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
|
||||
errorCount += conversionErrorCount
|
||||
if (!conversionErrorMessages.isEmpty()) {
|
||||
return conversionErrorMessages
|
||||
|
|
@ -31,12 +31,12 @@ ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionar
|
|||
|
||||
|
||||
// this cannot be static, because DictionaryTools will not be visible
|
||||
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
|
||||
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean prependSoundsToWords, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
|
||||
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
|
||||
return [0, ""]
|
||||
}
|
||||
|
||||
|
||||
final LATIN_ONLY_WORD = "^[A-Za-z]+\$"
|
||||
int errorCount = 0
|
||||
String errorMsg = ''
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ def convertDictionary(File definition, File csvDictionary, String dictionariesOu
|
|||
outputDictionary.put(digitSequence, new ArrayList<>())
|
||||
}
|
||||
// prefix the frequency to sort the words later
|
||||
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
|
||||
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${prependSoundsToWords && !(word =~ LATIN_ONLY_WORD) ? transcription : ''}${word}")
|
||||
wordCount++
|
||||
}
|
||||
}
|
||||
|
|
|
|||
45
app/languages/definitions/ChinesePinyin.yml
Normal file
45
app/languages/definitions/ChinesePinyin.yml
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
locale: zh-CN
|
||||
dictionaryFile: zh-pinyin.csv
|
||||
name: 中文 / 简体
|
||||
hasABC: no
|
||||
hasSpaceBetweenWords: no
|
||||
hasUpperCase: no
|
||||
layout:
|
||||
- [SPECIAL] # 0
|
||||
- [PUNCTUATION_ZH] # 1
|
||||
- [a, b, c] # 2
|
||||
- [d, e, f] # 3
|
||||
- [g, h, i] # 4
|
||||
- [j, k, l] # 5
|
||||
- [m, n, o] # 6
|
||||
- [p, q, r, s] # 7
|
||||
- [t, u, v] # 8
|
||||
- [w, x, y, z] # 9
|
||||
filterBySounds: yes
|
||||
sounds:
|
||||
- [A,2]
|
||||
- [B,2]
|
||||
- [C,2]
|
||||
- [D,3]
|
||||
- [E,3]
|
||||
- [F,3]
|
||||
- [G,4]
|
||||
- [H,4]
|
||||
- [I,4]
|
||||
- [J,5]
|
||||
- [K,5]
|
||||
- [L,5]
|
||||
- [M,6]
|
||||
- [N,6]
|
||||
- [O,6]
|
||||
- [P,7]
|
||||
- [Q,7]
|
||||
- [R,7]
|
||||
- [S,7]
|
||||
- [T,8]
|
||||
- [U,8]
|
||||
- [V,8]
|
||||
- [W,9]
|
||||
- [X,9]
|
||||
- [Y,9]
|
||||
- [Z,9]
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
locale: ko-KR
|
||||
currency: ₩
|
||||
dictionaryFile: ko-utf8.csv
|
||||
hasABC: no
|
||||
hasUpperCase: no
|
||||
layout: # only used for the virtual key labels
|
||||
- [ㅇ,ㅁ,SPECIAL] # 0
|
||||
|
|
|
|||
496366
app/languages/dictionaries/zh-pinyin.csv
Normal file
496366
app/languages/dictionaries/zh-pinyin.csv
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -88,7 +88,7 @@ public class DataStore {
|
|||
}
|
||||
|
||||
|
||||
public static void getWords(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, String filter, int minWords, int maxWords) {
|
||||
public static void getWords(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, int minWords, int maxWords) {
|
||||
if (getWordsTask != null && !getWordsTask.isDone()) {
|
||||
dataHandler.accept(new ArrayList<>());
|
||||
getWordsCancellationSignal.cancel();
|
||||
|
|
@ -96,14 +96,14 @@ public class DataStore {
|
|||
}
|
||||
|
||||
getWordsCancellationSignal = new CancellationSignal();
|
||||
getWordsTask = executor.submit(() -> getWordsSync(dataHandler, language, sequence, filter, minWords, maxWords));
|
||||
getWordsTask = executor.submit(() -> getWordsSync(dataHandler, language, sequence, onlyExactSequence, filter, minWords, maxWords));
|
||||
executor.submit(DataStore::setGetWordsTimeout);
|
||||
}
|
||||
|
||||
|
||||
private static void getWordsSync(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, String filter, int minWords, int maxWords) {
|
||||
private static void getWordsSync(ConsumerCompat<ArrayList<String>> dataHandler, Language language, String sequence, boolean onlyExactSequence, String filter, int minWords, int maxWords) {
|
||||
try {
|
||||
ArrayList<String> data = words.getMany(getWordsCancellationSignal, language, sequence, filter, minWords, maxWords);
|
||||
ArrayList<String> data = words.getMany(getWordsCancellationSignal, language, sequence, onlyExactSequence, filter, minWords, maxWords);
|
||||
asyncReturn.post(() -> dataHandler.accept(data));
|
||||
} catch (Exception e) {
|
||||
Logger.e(LOG_TAG, "Error fetching words: " + e.getMessage());
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ public class CustomWordsImporter extends AbstractFileProcessor {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (customWord.language == null || customWord.language.isSyllabary() || readOps.exists(sqlite.getDb(), customWord.language, customWord.word)) {
|
||||
if (customWord.language == null || customWord.language.isTranscribed() || readOps.exists(sqlite.getDb(), customWord.language, customWord.word)) {
|
||||
ignoredWords++;
|
||||
} else {
|
||||
InsertOps.insertCustomWord(sqlite.getDb(), customWord.language, customWord.sequence, customWord.word);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ public class WordFile extends AssetFile {
|
|||
public WordFile(@NonNull Context context, Language language, AssetManager assets) {
|
||||
super(assets, language != null ? language.getDictionaryFile() : "");
|
||||
this.context = context;
|
||||
hasSyllables = language != null && language.isSyllabary();
|
||||
hasSyllables = language != null && language.isTranscribed();
|
||||
|
||||
lastCharCode = 0;
|
||||
reader = null;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ import io.github.sspanak.tt9.db.entities.WordList;
|
|||
import io.github.sspanak.tt9.db.entities.WordPositionsStringBuilder;
|
||||
import io.github.sspanak.tt9.db.wordPairs.WordPair;
|
||||
import io.github.sspanak.tt9.db.words.SlowQueryStats;
|
||||
import io.github.sspanak.tt9.db.words.WordStore;
|
||||
import io.github.sspanak.tt9.languages.EmojiLanguage;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
|
|
@ -131,9 +130,6 @@ public class ReadOps {
|
|||
return new WordList();
|
||||
}
|
||||
|
||||
// EXACT_MATCHES concerns only the positions query
|
||||
filter = filter.equals(WordStore.FILTER_EXACT_MATCHES_ONLY) ? "" : filter;
|
||||
|
||||
String wordsQuery = getWordsQuery(language, positions, filter, maximumWords, fullOutput);
|
||||
if (wordsQuery.isEmpty() || (cancel != null && cancel.isCanceled())) {
|
||||
return new WordList();
|
||||
|
|
@ -157,10 +153,10 @@ public class ReadOps {
|
|||
}
|
||||
|
||||
|
||||
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, String wordFilter, int minPositions) {
|
||||
public String getSimilarWordPositions(@NonNull SQLiteDatabase db, @NonNull CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, boolean onlyExactSequenceMatches, String wordFilter, int minPositions) {
|
||||
int generations;
|
||||
|
||||
if (wordFilter.equals(WordStore.FILTER_EXACT_MATCHES_ONLY)) {
|
||||
if (onlyExactSequenceMatches) {
|
||||
generations = 0;
|
||||
} else {
|
||||
generations = switch (sequence.length()) {
|
||||
|
|
@ -176,7 +172,7 @@ public class ReadOps {
|
|||
|
||||
@NonNull
|
||||
public String getWordPositions(@NonNull SQLiteDatabase db, @Nullable CancellationSignal cancel, @NonNull Language language, @NonNull String sequence, int generations, int minPositions, String wordFilter) {
|
||||
if (sequence.length() == 1 || (cancel != null && cancel.isCanceled())) {
|
||||
if ((sequence.length() == 1 && !language.isTranscribed()) || (cancel != null && cancel.isCanceled())) {
|
||||
return sequence;
|
||||
}
|
||||
|
||||
|
|
@ -301,9 +297,13 @@ public class ReadOps {
|
|||
sql.append(" AND word LIKE '").append(filter.replaceAll("'", "''")).append("%'");
|
||||
}
|
||||
|
||||
sql
|
||||
.append(" ORDER BY LENGTH(word), frequency DESC")
|
||||
.append(" LIMIT ").append(maxWords);
|
||||
sql.append(" ORDER BY LENGTH(word), frequency DESC");
|
||||
|
||||
if (maxWords < 0 && maxWordsPerSequence.containsKey(language)) {
|
||||
Integer limit = maxWordsPerSequence.get(language);
|
||||
maxWords = limit != null ? limit : SettingsStore.SUGGESTIONS_POSITIONS_LIMIT;
|
||||
}
|
||||
sql.append(" LIMIT ").append(maxWords);
|
||||
|
||||
String wordsSql = sql.toString();
|
||||
Logger.v(LOG_TAG, "Words SQL: " + wordsSql);
|
||||
|
|
|
|||
|
|
@ -24,13 +24,16 @@ public class WordPair {
|
|||
|
||||
|
||||
boolean isInvalid() {
|
||||
Text w1 = new Text(word1);
|
||||
Text w2 = new Text(word2);
|
||||
|
||||
return
|
||||
language == null
|
||||
|| word1.isEmpty() || word2.isEmpty()
|
||||
|| (word1.length() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && word2.length() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|
||||
|| word1.equals(word2)
|
||||
|| sequence2 == null || word2.length() != sequence2.length() || !(new Text(sequence2).isNumeric())
|
||||
|| !(new Text(word1).isAlphabetic()) || !(new Text(word2).isAlphabetic());
|
||||
|| sequence2 == null || !(new Text(sequence2).isNumeric())
|
||||
|| (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|
||||
|| !w1.isAlphabetic() || !w2.isAlphabetic();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -137,11 +137,6 @@ public class WordPairStore extends BaseSyncStore {
|
|||
|
||||
int totalPairs = 0;
|
||||
for (Language language : languages) {
|
||||
if (language.isSyllabary()) {
|
||||
Logger.d(LOG_TAG, "Not loading word pairs for syllabary language: " + language.getId());
|
||||
continue;
|
||||
}
|
||||
|
||||
HashMap<WordPair, WordPair> wordPairs = pairs.get(language.getId());
|
||||
if (wordPairs == null) {
|
||||
wordPairs = new HashMap<>();
|
||||
|
|
|
|||
|
|
@ -236,7 +236,7 @@ public class DictionaryLoader {
|
|||
|
||||
|
||||
private int importLetters(Language language) throws InvalidLanguageCharactersException {
|
||||
if (language.isSyllabary()) {
|
||||
if (language.isTranscribed()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ import io.github.sspanak.tt9.util.Timer;
|
|||
|
||||
|
||||
public class WordStore extends BaseSyncStore {
|
||||
public static final String FILTER_EXACT_MATCHES_ONLY = "__exact__";
|
||||
private final String LOG_TAG = "sqlite.WordStore";
|
||||
private final ReadOps readOps;
|
||||
|
||||
|
|
@ -60,10 +59,10 @@ public class WordStore extends BaseSyncStore {
|
|||
/**
|
||||
* Loads words matching and similar to a given digit sequence
|
||||
* For example: "7655" -> "roll" (exact match), but also: "rolled", "roller", "rolling", ...
|
||||
* and other similar. When "wordFilter" is set to FILTER_EXACT_MATCHES_ONLY, the word list is
|
||||
* constrained only to the words with length equal to the digit sequence length (exact matches).
|
||||
* and other similar. When "onlyExactSequence" is TRUE, the word list is constrained only to
|
||||
* the words with length equal to the digit sequence length (exact matches).
|
||||
*/
|
||||
public ArrayList<String> getMany(@NonNull CancellationSignal cancel, Language language, String sequence, String wordFilter, int minimumWords, int maximumWords) {
|
||||
public ArrayList<String> getMany(@NonNull CancellationSignal cancel, Language language, String sequence, boolean onlyExactSequence, String wordFilter, int minimumWords, int maximumWords) {
|
||||
if (!checkOrNotify()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
|
@ -83,11 +82,11 @@ public class WordStore extends BaseSyncStore {
|
|||
long longPositionsTime = Timer.stop("cache_long_positions");
|
||||
|
||||
final int minWords = Math.max(minimumWords, 0);
|
||||
final int maxWords = Math.max(maximumWords, minWords);
|
||||
final int maxWords = maximumWords >= 0 ? Math.max(maximumWords, minWords) : maximumWords;
|
||||
final String filter = wordFilter == null ? "" : wordFilter;
|
||||
|
||||
Timer.start("get_positions");
|
||||
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, filter, minWords);
|
||||
String positions = readOps.getSimilarWordPositions(sqlite.getDb(), cancel, language, sequence, onlyExactSequence, filter, minWords);
|
||||
long positionsTime = Timer.stop("get_positions");
|
||||
|
||||
Timer.start("get_words");
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ abstract public class CommandHandler extends TextEditingHandler {
|
|||
return;
|
||||
}
|
||||
|
||||
if (mLanguage.isSyllabary()) {
|
||||
if (mLanguage.isTranscribed()) {
|
||||
UI.toastShortSingle(this, R.string.function_add_word_not_available);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,12 @@ public abstract class HotkeyHandler extends CommandHandler {
|
|||
suggestionOps.cancelDelayedAccept();
|
||||
|
||||
if (!suggestionOps.isEmpty()) {
|
||||
onAcceptSuggestionManually(suggestionOps.acceptCurrent(), KeyEvent.KEYCODE_ENTER);
|
||||
if (mInputMode.shouldReplacePreviousSuggestion()) {
|
||||
mInputMode.onReplaceSuggestion(suggestionOps.getCurrent());
|
||||
} else {
|
||||
onAcceptSuggestionManually(suggestionOps.acceptCurrent(), KeyEvent.KEYCODE_ENTER);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -192,7 +197,7 @@ public abstract class HotkeyHandler extends CommandHandler {
|
|||
|
||||
|
||||
public boolean onKeyFilterClear(boolean validateOnly) {
|
||||
if (suggestionOps.isEmpty() || mLanguage.isSyllabary()) {
|
||||
if (suggestionOps.isEmpty() || !mInputMode.supportsFiltering()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -208,12 +213,13 @@ public abstract class HotkeyHandler extends CommandHandler {
|
|||
// References:
|
||||
// - https://github.com/sspanak/tt9/issues/698#issuecomment-2600441061
|
||||
// - https://github.com/sspanak/tt9/issues/418
|
||||
boolean isFilteringOn = mInputMode.isStemFilterFuzzy() || (mInputMode.getSequenceLength() != mInputMode.getWordStem().length());
|
||||
int stemLength = mInputMode.getWordStem().length();
|
||||
boolean isFilteringOn = mInputMode.isStemFilterFuzzy() || (stemLength > 0 && mInputMode.getSequenceLength() != stemLength);
|
||||
|
||||
if (mInputMode.clearWordStem() && isFilteringOn) {
|
||||
mInputMode
|
||||
.setOnSuggestionsUpdated(this::handleSuggestions)
|
||||
.loadSuggestions(suggestionOps.getCurrent(mInputMode.getSequenceLength()));
|
||||
.loadSuggestions(suggestionOps.getCurrent(mLanguage, mInputMode.getSequenceLength()));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -229,7 +235,7 @@ public abstract class HotkeyHandler extends CommandHandler {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (mLanguage.isSyllabary()) {
|
||||
if (!mInputMode.supportsFiltering()) {
|
||||
UI.toastShortSingle(this, R.string.function_filter_suggestions_not_available);
|
||||
return true; // prevent the default key action to acknowledge we have processed the event
|
||||
}
|
||||
|
|
@ -244,7 +250,7 @@ public abstract class HotkeyHandler extends CommandHandler {
|
|||
if (repeat && !suggestionOps.get(1).isEmpty()) {
|
||||
filter = suggestionOps.get(1);
|
||||
} else {
|
||||
filter = suggestionOps.getCurrent(mInputMode.getSequenceLength());
|
||||
filter = suggestionOps.getCurrent(mLanguage, mInputMode.getSequenceLength());
|
||||
}
|
||||
|
||||
if (filter.isEmpty()) {
|
||||
|
|
@ -289,10 +295,11 @@ public abstract class HotkeyHandler extends CommandHandler {
|
|||
detectRTL();
|
||||
|
||||
// for languages that do not have ABC or Predictive, make sure we remain in valid state
|
||||
if (!mInputMode.changeLanguage(mLanguage)) {
|
||||
if (mInputMode.changeLanguage(mLanguage)) {
|
||||
mInputMode.clearWordStem();
|
||||
} else {
|
||||
mInputMode = InputMode.getInstance(settings, mLanguage, inputType, textField, determineInputModeId());
|
||||
}
|
||||
mInputMode.clearWordStem();
|
||||
|
||||
getSuggestions(null);
|
||||
statusBar.setText(mInputMode);
|
||||
|
|
|
|||
|
|
@ -64,11 +64,21 @@ abstract public class MainViewHandler extends HotkeyHandler {
|
|||
}
|
||||
|
||||
|
||||
public boolean isAddingWordsSupported() {
|
||||
return mLanguage == null || !mLanguage.isTranscribed();
|
||||
}
|
||||
|
||||
|
||||
public boolean isDragResizeOn() {
|
||||
return dragResize;
|
||||
}
|
||||
|
||||
|
||||
public boolean isFilteringSupported() {
|
||||
return mInputMode.supportsFiltering();
|
||||
}
|
||||
|
||||
|
||||
public boolean isInputLimited() {
|
||||
return inputType.isLimited();
|
||||
}
|
||||
|
|
@ -114,13 +124,8 @@ abstract public class MainViewHandler extends HotkeyHandler {
|
|||
}
|
||||
|
||||
|
||||
public boolean notLanguageSyllabary() {
|
||||
return mLanguage == null || !mLanguage.isSyllabary();
|
||||
}
|
||||
|
||||
|
||||
public String getABCString() {
|
||||
return mLanguage == null || mLanguage.isSyllabary() ? "ABC" : mLanguage.getAbcString().toUpperCase(mLanguage.getLocale());
|
||||
return mLanguage == null ? "ABC" : mLanguage.getAbcString().toUpperCase(mLanguage.getLocale());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ import io.github.sspanak.tt9.ime.modes.InputMode;
|
|||
import io.github.sspanak.tt9.ime.modes.InputModeKind;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.languages.LanguageCollection;
|
||||
import io.github.sspanak.tt9.languages.LanguageKind;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.ui.UI;
|
||||
import io.github.sspanak.tt9.util.Text;
|
||||
|
|
@ -294,7 +293,7 @@ public abstract class TypingHandler extends KeyPadHandler {
|
|||
}
|
||||
|
||||
allowedInputModes = new ArrayList<>(inputType.determineInputModes(getApplicationContext()));
|
||||
if (LanguageKind.isKorean(mLanguage)) {
|
||||
if (!mLanguage.hasABC()) {
|
||||
allowedInputModes.remove((Integer) InputMode.MODE_ABC);
|
||||
} else if (!settings.getPredictiveMode()) {
|
||||
allowedInputModes.remove((Integer) InputMode.MODE_PREDICTIVE);
|
||||
|
|
@ -381,7 +380,7 @@ public abstract class TypingHandler extends KeyPadHandler {
|
|||
// last key press makes up a compound word like: (it)'s, (I)'ve, l'(oiseau), or it is
|
||||
// just the end of a sentence, like: "word." or "another?"
|
||||
if (mInputMode.shouldAcceptPreviousSuggestion(suggestionOps.getCurrent())) {
|
||||
String lastWord = suggestionOps.acceptPrevious(mInputMode.getSequenceLength());
|
||||
String lastWord = suggestionOps.acceptPrevious(mLanguage, mInputMode.getSequenceLength());
|
||||
onAcceptSuggestionAutomatically(lastWord);
|
||||
}
|
||||
|
||||
|
|
@ -405,7 +404,7 @@ public abstract class TypingHandler extends KeyPadHandler {
|
|||
// Otherwise, put the first suggestion in the text field,
|
||||
// but cut it off to the length of the sequence (how many keys were pressed),
|
||||
// for a more intuitive experience.
|
||||
String trimmedWord = suggestionOps.getCurrent(mInputMode.getSequenceLength());
|
||||
String trimmedWord = suggestionOps.getCurrent(mLanguage, mInputMode.getSequenceLength());
|
||||
appHacks.setComposingTextWithHighlightedStem(trimmedWord, mInputMode);
|
||||
|
||||
forceShowWindow();
|
||||
|
|
|
|||
|
|
@ -8,10 +8,12 @@ import androidx.annotation.Nullable;
|
|||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.ui.main.ResizableMainView;
|
||||
import io.github.sspanak.tt9.ui.tray.SuggestionsBar;
|
||||
import io.github.sspanak.tt9.util.ConsumerCompat;
|
||||
import io.github.sspanak.tt9.util.Text;
|
||||
|
||||
public class SuggestionOps {
|
||||
@NonNull private final Handler delayedAcceptHandler;
|
||||
|
|
@ -110,12 +112,12 @@ public class SuggestionOps {
|
|||
}
|
||||
|
||||
|
||||
public String acceptPrevious(int sequenceLength) {
|
||||
public String acceptPrevious(Language language, int sequenceLength) {
|
||||
if (sequenceLength <= 0) {
|
||||
set(null);
|
||||
}
|
||||
|
||||
String lastComposingText = getCurrent(sequenceLength - 1);
|
||||
String lastComposingText = getCurrent(language, sequenceLength - 1);
|
||||
commitCurrent(false);
|
||||
return lastComposingText;
|
||||
}
|
||||
|
|
@ -143,17 +145,17 @@ public class SuggestionOps {
|
|||
}
|
||||
|
||||
|
||||
public String getCurrent(int maxLength) {
|
||||
public String getCurrent(Language language, int maxLength) {
|
||||
if (maxLength == 0 || isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
String text = getCurrent();
|
||||
if (maxLength > 0 && !text.isEmpty() && text.length() > maxLength) {
|
||||
text = text.substring(0, maxLength);
|
||||
Text text = new Text(language, getCurrent());
|
||||
if (maxLength > 0 && !text.isEmpty() && text.codePointLength() > maxLength) {
|
||||
return text.substringCodePoints(0, maxLength);
|
||||
}
|
||||
|
||||
return text;
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ abstract public class InputMode {
|
|||
|
||||
|
||||
protected InputMode(SettingsStore settings, InputType inputType) {
|
||||
allowedTextCases.add(CASE_LOWER);
|
||||
isEmailMode = inputType != null && inputType.isEmail() && !inputType.isDefectiveText();
|
||||
this.settings = settings;
|
||||
}
|
||||
|
|
@ -50,7 +51,9 @@ abstract public class InputMode {
|
|||
public static InputMode getInstance(SettingsStore settings, @Nullable Language language, InputType inputType, TextField textField, int mode) {
|
||||
switch (mode) {
|
||||
case MODE_PREDICTIVE:
|
||||
return (LanguageKind.isKorean(language) ? new ModeCheonjiin(settings, inputType, textField) : new ModeWords(settings, language, inputType, textField));
|
||||
if (LanguageKind.isChinese(language)) return new ModePinyin(settings, language, inputType, textField);
|
||||
if (LanguageKind.isKorean(language)) return new ModeCheonjiin(settings, inputType, textField);
|
||||
return new ModeWords(settings, language, inputType, textField);
|
||||
case MODE_ABC:
|
||||
return new ModeABC(settings, language, inputType);
|
||||
case MODE_PASSTHROUGH:
|
||||
|
|
@ -69,6 +72,7 @@ abstract public class InputMode {
|
|||
// Suggestions
|
||||
public void onAcceptSuggestion(@NonNull String word) { onAcceptSuggestion(word, false); }
|
||||
public void onAcceptSuggestion(@NonNull String word, boolean preserveWordList) {}
|
||||
public void onReplaceSuggestion(@NonNull String word) {}
|
||||
|
||||
/**
|
||||
* loadSuggestions
|
||||
|
|
@ -123,6 +127,7 @@ abstract public class InputMode {
|
|||
// Interaction with the IME. Return "true" if it should perform the respective action.
|
||||
public boolean shouldAcceptPreviousSuggestion(String unacceptedText) { return false; }
|
||||
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) { return false; }
|
||||
public boolean shouldReplacePreviousSuggestion() { return false; }
|
||||
public boolean shouldAddTrailingSpace(boolean isWordAcceptedManually, int nextKey) { return false; }
|
||||
public boolean shouldAddPrecedingSpace() { return false; }
|
||||
public boolean shouldDeletePrecedingSpace() { return false; }
|
||||
|
|
@ -251,4 +256,5 @@ abstract public class InputMode {
|
|||
public boolean isStemFilterFuzzy() { return false; }
|
||||
public String getWordStem() { return ""; }
|
||||
public boolean setWordStem(String stem, boolean exact) { return false; }
|
||||
public boolean supportsFiltering() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ class ModeABC extends InputMode {
|
|||
|
||||
@Override
|
||||
public boolean changeLanguage(@Nullable Language newLanguage) {
|
||||
if (newLanguage != null && newLanguage.isSyllabary()) {
|
||||
if (newLanguage != null && newLanguage.isTranscribed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,13 +48,15 @@ class ModeCheonjiin extends InputMode {
|
|||
|
||||
SPECIAL_CHAR_SEQUENCE_PREFIX = "11";
|
||||
|
||||
super.setLanguage(LanguageCollection.getLanguage(LanguageKind.KOREAN));
|
||||
|
||||
autoSpace = new AutoSpace(settings).setLanguage(language);
|
||||
digitSequence = "";
|
||||
allowedTextCases.add(CASE_LOWER);
|
||||
this.inputType = inputType;
|
||||
this.textField = textField;
|
||||
|
||||
initPredictions();
|
||||
setLanguage(LanguageCollection.getLanguage(LanguageKind.KOREAN));
|
||||
setSpecialCharacterConstants();
|
||||
|
||||
if (isEmailMode) {
|
||||
|
|
@ -64,8 +66,6 @@ class ModeCheonjiin extends InputMode {
|
|||
} else {
|
||||
setCustomSpecialCharacters();
|
||||
}
|
||||
|
||||
autoSpace = new AutoSpace(settings).setLanguage(language);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,210 @@
|
|||
package io.github.sspanak.tt9.ime.modes;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
|
||||
import io.github.sspanak.tt9.hacks.InputType;
|
||||
import io.github.sspanak.tt9.ime.helpers.TextField;
|
||||
import io.github.sspanak.tt9.ime.modes.predictions.IdeogramPredictions;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.languages.LanguageKind;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.util.Logger;
|
||||
import io.github.sspanak.tt9.util.Text;
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
|
||||
public class ModeIdeograms extends ModeWords {
|
||||
private static final String LOG_TAG = ModeIdeograms.class.getSimpleName();
|
||||
|
||||
private boolean isFiltering = false;
|
||||
|
||||
|
||||
protected ModeIdeograms(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
|
||||
super(settings, lang, inputType, textField);
|
||||
}
|
||||
|
||||
|
||||
@Override public void determineNextWordTextCase() {}
|
||||
@Override protected String adjustSuggestionTextCase(String word, int newTextCase) { return word; }
|
||||
|
||||
|
||||
@Override
|
||||
protected void initPredictions() {
|
||||
predictions = new IdeogramPredictions(settings, textField);
|
||||
predictions.setWordsChangedHandler(this::onPredictions);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean changeLanguage(@Nullable Language newLanguage) {
|
||||
if (newLanguage != null && !newLanguage.isTranscribed() || LanguageKind.isKorean(newLanguage)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
setLanguage(newLanguage);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
super.reset();
|
||||
isFiltering = false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void onPredictions() {
|
||||
if (language.hasTranscriptionsEmbedded()) {
|
||||
if (isFiltering) {
|
||||
((IdeogramPredictions) predictions).stripNativeWords();
|
||||
} else {
|
||||
((IdeogramPredictions) predictions).stripTranscriptions();
|
||||
}
|
||||
}
|
||||
|
||||
if (!isFiltering) {
|
||||
// We can reorder by pairs only after stripping the transcriptions, if any.
|
||||
// Otherwise, the input field words will not match with any pair.
|
||||
((IdeogramPredictions) predictions).orderByPairs();
|
||||
}
|
||||
|
||||
super.onPredictions();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void onAcceptSuggestion(@NonNull String currentWord, boolean preserveWords) {
|
||||
if (currentWord.isEmpty() || new Text(currentWord).isNumeric()) {
|
||||
reset();
|
||||
Logger.i(LOG_TAG, "Current word is empty or numeric. Nothing to accept.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (isFiltering) {
|
||||
isFiltering = false;
|
||||
stem = currentWord;
|
||||
loadSuggestions("");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
String latinWord = ((IdeogramPredictions) predictions).getTranscription(currentWord);
|
||||
String digits = language.getDigitSequenceForWord(latinWord);
|
||||
((IdeogramPredictions) predictions).onAcceptTranscription(currentWord, latinWord, digits);
|
||||
} catch (Exception e) {
|
||||
Logger.e(LOG_TAG, "Failed incrementing priority of word: '" + currentWord + "'. " + e.getMessage());
|
||||
}
|
||||
|
||||
int len = digitSequence.length();
|
||||
if (preserveWords && len >= 2) {
|
||||
digitSequence = digitSequence.substring(len - 1);
|
||||
loadSuggestions("");
|
||||
} else {
|
||||
reset();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean shouldAcceptPreviousSuggestion(String s) {
|
||||
return
|
||||
!digitSequence.isEmpty()
|
||||
&& predictions.noDbWords()
|
||||
&& !digitSequence.equals(EMOJI_SEQUENCE)
|
||||
&& !digitSequence.equals(PUNCTUATION_SEQUENCE)
|
||||
&& !digitSequence.equals(SPECIAL_CHAR_SEQUENCE);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) {
|
||||
if (digitSequence.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (super.shouldAcceptPreviousSuggestion(nextKey, hold)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
String nextSequence = digitSequence + (char)(nextKey + '0');
|
||||
|
||||
return
|
||||
TextTools.containsOtherThan1(nextSequence)
|
||||
&& (
|
||||
nextSequence.endsWith(EMOJI_SEQUENCE) || nextSequence.startsWith(EMOJI_SEQUENCE) ||
|
||||
nextSequence.endsWith(PUNCTUATION_SEQUENCE) || nextSequence.startsWith(PUNCTUATION_SEQUENCE)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* When we want to filter by a Latin transcription, we must have discarded it from the text field,
|
||||
* then give it to this method. It will filter the suggestions and show only the ones that match
|
||||
* the given Latin word.
|
||||
*/
|
||||
@Override
|
||||
public void onReplaceSuggestion(@NonNull String word) {
|
||||
if (word.isEmpty() || new Text(word).isNumeric()) {
|
||||
reset();
|
||||
Logger.i(LOG_TAG, "Can not replace an empty or numeric word.");
|
||||
return;
|
||||
}
|
||||
|
||||
isFiltering = false;
|
||||
stem = word;
|
||||
loadSuggestions("");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This should be called before accepting a word. It says whether we should discard the current
|
||||
* word. Discarding it means we want to erase it from the text field and instead display a
|
||||
* filtered list of suggestions that matches the word. If we don't discard it, usually we should
|
||||
* accept it.
|
||||
*/
|
||||
@Override
|
||||
public boolean shouldReplacePreviousSuggestion() {
|
||||
return isFiltering;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean setWordStem(String newStem, boolean fromScrolling) {
|
||||
if (!supportsFiltering()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!fromScrolling) {
|
||||
isFiltering = true;
|
||||
} else if (isFiltering) {
|
||||
stem = newStem;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean clearWordStem() {
|
||||
if (!supportsFiltering()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
isFiltering = false;
|
||||
stem = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean supportsFiltering() {
|
||||
return language.hasTranscriptionsEmbedded();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isStemFilterFuzzy() {
|
||||
return isFiltering;
|
||||
}
|
||||
}
|
||||
|
|
@ -10,7 +10,6 @@ class ModePassthrough extends InputMode {
|
|||
protected ModePassthrough(SettingsStore settings, InputType inputType) {
|
||||
super(settings, inputType);
|
||||
reset();
|
||||
allowedTextCases.add(CASE_LOWER);
|
||||
}
|
||||
|
||||
@Override public int getId() { return MODE_PASSTHROUGH; }
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
package io.github.sspanak.tt9.ime.modes;
|
||||
|
||||
import io.github.sspanak.tt9.hacks.InputType;
|
||||
import io.github.sspanak.tt9.ime.helpers.TextField;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.util.chars.Characters;
|
||||
|
||||
public class ModePinyin extends ModeIdeograms {
|
||||
boolean ignoreNextSpace = false;
|
||||
|
||||
|
||||
protected ModePinyin(SettingsStore settings, Language lang, InputType inputType, TextField textField) {
|
||||
super(settings, lang, inputType, textField);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void onNumberPress(int number) {
|
||||
if (ignoreNextSpace && number == SPECIAL_CHAR_SEQUENCE.charAt(0) - '0') {
|
||||
ignoreNextSpace = false;
|
||||
return;
|
||||
}
|
||||
|
||||
ignoreNextSpace = false;
|
||||
super.onNumberPress(number);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void onNumberHold(int number) {
|
||||
ignoreNextSpace = false;
|
||||
super.onNumberHold(number);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean shouldAcceptPreviousSuggestion(int nextKey, boolean hold) {
|
||||
// In East Asian languages, 0-key must accept the current word, or type a space when there is no word.
|
||||
if (!digitSequence.isEmpty() && !digitSequence.endsWith(SPECIAL_CHAR_SEQUENCE) && nextKey == SPECIAL_CHAR_SEQUENCE.charAt(0) - '0') {
|
||||
ignoreNextSpace = true;
|
||||
}
|
||||
|
||||
return super.shouldAcceptPreviousSuggestion(nextKey, hold);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected String getPreferredChar() {
|
||||
final String preferredChar = settings.getDoubleZeroChar();
|
||||
return switch (preferredChar) {
|
||||
case "." -> Characters.ZH_FULL_STOP;
|
||||
case "," -> Characters.ZH_COMMA_LIST;
|
||||
default -> preferredChar;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -27,7 +27,7 @@ class ModeWords extends ModeCheonjiin {
|
|||
|
||||
// stem filter
|
||||
private boolean isStemFuzzy = false;
|
||||
private String stem = "";
|
||||
protected String stem = "";
|
||||
|
||||
// text analysis tools
|
||||
private final AutoTextCase autoTextCase;
|
||||
|
|
@ -110,12 +110,18 @@ class ModeWords extends ModeCheonjiin {
|
|||
|
||||
@Override
|
||||
public boolean changeLanguage(@Nullable Language newLanguage) {
|
||||
if (newLanguage != null && newLanguage.isSyllabary()) {
|
||||
if (newLanguage != null && newLanguage.isTranscribed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
super.setLanguage(newLanguage);
|
||||
setLanguage(newLanguage);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void setLanguage(@Nullable Language newLanguage) {
|
||||
super.setLanguage(newLanguage);
|
||||
autoSpace.setLanguage(language);
|
||||
|
||||
allowedTextCases.clear();
|
||||
|
|
@ -124,14 +130,11 @@ class ModeWords extends ModeCheonjiin {
|
|||
allowedTextCases.add(CASE_CAPITALIZE);
|
||||
allowedTextCases.add(CASE_UPPER);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean recompose(String word) {
|
||||
if (!language.hasSpaceBetweenWords() || language.isSyllabary()) {
|
||||
if (!language.hasSpaceBetweenWords() || language.isTranscribed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -258,6 +261,12 @@ class ModeWords extends ModeCheonjiin {
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean supportsFiltering() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* loadSuggestions
|
||||
* Loads the possible list of suggestions for the current digitSequence. "currentWord" is used
|
||||
|
|
@ -284,7 +293,7 @@ class ModeWords extends ModeCheonjiin {
|
|||
private boolean loadPreferredChar() {
|
||||
if (digitSequence.startsWith(NaturalLanguage.PREFERRED_CHAR_SEQUENCE)) {
|
||||
suggestions.clear();
|
||||
suggestions.add(settings.getDoubleZeroChar());
|
||||
suggestions.add(getPreferredChar());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -292,6 +301,11 @@ class ModeWords extends ModeCheonjiin {
|
|||
}
|
||||
|
||||
|
||||
protected String getPreferredChar() {
|
||||
return settings.getDoubleZeroChar();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* onAcceptSuggestion
|
||||
* Bring this word up in the suggestions list next time and if necessary preserves the suggestion list
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public class AutoSpace {
|
|||
public AutoSpace setLanguage(Language lang) {
|
||||
language = language == null ? new NullLanguage() : lang;
|
||||
isLanguageFrench = LanguageKind.isFrench(lang);
|
||||
isLanguageWithAlphabet = !language.isSyllabary();
|
||||
isLanguageWithAlphabet = !language.isTranscribed();
|
||||
isLanguageWithSpaceBetweenWords = language.hasSpaceBetweenWords();
|
||||
return this;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,200 @@
|
|||
package io.github.sspanak.tt9.ime.modes.predictions;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
|
||||
import io.github.sspanak.tt9.db.DataStore;
|
||||
import io.github.sspanak.tt9.ime.helpers.TextField;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
|
||||
public class IdeogramPredictions extends WordPredictions {
|
||||
private boolean isTranscriptionFilterAllowed = false;
|
||||
private String lastTypedWord = "";
|
||||
@NonNull protected ArrayList<String> transcriptions = new ArrayList<>();
|
||||
|
||||
|
||||
public IdeogramPredictions(SettingsStore settings, TextField textField) {
|
||||
super(settings, textField);
|
||||
minWords = 1;
|
||||
maxWords = -1;
|
||||
onlyExactMatches = true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Predictions setLanguage(@NonNull Language language) {
|
||||
isTranscriptionFilterAllowed = language.hasTranscriptionsEmbedded();
|
||||
return super.setLanguage(language);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void load() {
|
||||
transcriptions.clear();
|
||||
super.load();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void onDbWords(ArrayList<String> dbWords, boolean isRetryAllowed) {
|
||||
transcriptions = onlyExactMatches ? reduceFuzzyMatches(dbWords, SettingsStore.SUGGESTIONS_MAX) : dbWords;
|
||||
words = new ArrayList<>(transcriptions);
|
||||
areThereDbWords = !words.isEmpty();
|
||||
onWordsChanged.run();
|
||||
}
|
||||
|
||||
|
||||
public void onAcceptTranscription(String word, String transcription, String sequence) {
|
||||
super.onAccept(transcription + word, sequence);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@NonNull
|
||||
protected String getWordBeforeCursor(@NonNull String currentWord) {
|
||||
int currentWordLength = currentWord.length();
|
||||
int lastWordLength = lastTypedWord.length();
|
||||
int requiredTextLength = currentWordLength + lastWordLength;
|
||||
String text = textField.getStringBeforeCursor(requiredTextLength);
|
||||
// Logger.d("LOG_TAG", "====+> previous string: " + text);
|
||||
|
||||
return lastWordLength < text.length() ? text.substring(0, lastWordLength) : "";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tries to do a best guess what is the previous word and pairs it with the incoming one. Guessing
|
||||
* is because East Asian languages do not have spaces between words, so we try to match the
|
||||
* last typed or just give up.
|
||||
*/
|
||||
@Override
|
||||
protected void pairWithPreviousWord(@NonNull String word, @NonNull String sequence) {
|
||||
if (language.hasSpaceBetweenWords()) {
|
||||
super.pairWithPreviousWord(word, sequence);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!settings.getPredictWordPairs() || sequence.length() != digitSequence.length()) {
|
||||
// Logger.d("LOG_TAG", "====+> sequence length mismatch: " + sequence.length() + " != " + digitSequence.length());
|
||||
return;
|
||||
}
|
||||
|
||||
int latinEnd = TextTools.lastIndexOfLatin(word);
|
||||
String nativeWord = latinEnd < 0 || latinEnd >= word.length() ? word : word.substring(latinEnd + 1);
|
||||
|
||||
if (lastTypedWord.isEmpty() || (!words.isEmpty() && nativeWord.equals(words.get(0)))) {
|
||||
lastTypedWord = nativeWord;
|
||||
// Logger.d("LOG_TAG", "====+> Will not pair the first word. native word: " + nativeWord + " first suggestion: " + words.get(0));
|
||||
// if (lastTypedWord.isEmpty()) {
|
||||
// Logger.d("LOG_TAG", "====+> No previous word to pair with: " + lastTypedWord);
|
||||
// }
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
String previousWord = getWordBeforeCursor(nativeWord);
|
||||
if (previousWord.equals(lastTypedWord)) {
|
||||
// Logger.d("LOG_TAG", "====+> Pairing words: " + previousWord + " + " + nativeWord);
|
||||
DataStore.addWordPair(language, previousWord, nativeWord, sequence);
|
||||
// } else {
|
||||
// Logger.d("LOG_TAG", "===> Last word mismatch: " + previousWord + " != " + lastTypedWord + ". Not pairing.");
|
||||
}
|
||||
|
||||
lastTypedWord = nativeWord;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Keeps all exact matches and the first n fuzzy matches. Unlike Latin- or Cyrillic-based languages,
|
||||
* ideograms do not "start with" a sequence of characters, so fuzzy matches have little value.
|
||||
* Just keep some of them, in case there are no exact matches.
|
||||
*/
|
||||
@NonNull
|
||||
public ArrayList<String> reduceFuzzyMatches(ArrayList<String> allWords, int maxWords) {
|
||||
if (allWords.isEmpty()) {
|
||||
return allWords;
|
||||
}
|
||||
|
||||
ArrayList<String> shortWords = new ArrayList<>();
|
||||
final int MAX_LENGTH = Math.max(digitSequence.length() + 1, allWords.get(0).length());
|
||||
|
||||
for (int i = 0, longWords = 0, end = allWords.size(); i < end; i++) {
|
||||
String word = allWords.get(i);
|
||||
int trueLength = isTranscriptionFilterAllowed ? TextTools.lastIndexOfLatin(word) : word.length();
|
||||
|
||||
if (trueLength < MAX_LENGTH) {
|
||||
shortWords.add(word);
|
||||
}
|
||||
|
||||
if (trueLength >= MAX_LENGTH && longWords <= maxWords) {
|
||||
longWords++;
|
||||
shortWords.add(word);
|
||||
}
|
||||
}
|
||||
|
||||
return shortWords;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes the native words and keeps only the unique transcriptions. Directly modifies the words
|
||||
* list, but the original is preserved in this.transcriptions.
|
||||
* Example operation: [SHIWU食物, SHIWU事物, SHIWU事务, SHIZU十足] -> [SHIWU, SHIZU]
|
||||
*/
|
||||
public void stripNativeWords() {
|
||||
HashSet<String> uniqueTranscriptions = new HashSet<>();
|
||||
|
||||
for (int i = 0; i < transcriptions.size(); i++) {
|
||||
String transcription = transcriptions.get(i);
|
||||
int firstNative = TextTools.lastIndexOfLatin(transcription) + 1;
|
||||
uniqueTranscriptions.add(
|
||||
firstNative < 1 || firstNative >= transcription.length() ? transcription : transcription.substring(0, firstNative)
|
||||
);
|
||||
}
|
||||
|
||||
words.clear();
|
||||
words.addAll(uniqueTranscriptions);
|
||||
Collections.sort(words);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes the Latin transcriptions from native words. Directly modifies the words list, but the
|
||||
* original is preserved in this.transcriptions.
|
||||
* Example operation: [SHIWU食物, SHIZU十足] -> [食物, 十足]
|
||||
*/
|
||||
public void stripTranscriptions() {
|
||||
words.clear();
|
||||
for (int i = 0; i < transcriptions.size(); i++) {
|
||||
String transcription = transcriptions.get(i);
|
||||
int firstNative = TextTools.lastIndexOfLatin(transcription) + 1;
|
||||
words.add(firstNative >= transcription.length() ? transcription : transcription.substring(firstNative));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Similar to "stripNativeWords()", but finds and returns the transcription of the given word.
|
||||
* Returns an empty string if the word is not in the current suggestion list.
|
||||
*/
|
||||
@NonNull
|
||||
public String getTranscription(@NonNull String word) {
|
||||
for (String w : transcriptions) {
|
||||
if (w.endsWith(word)) {
|
||||
return w.replace(word, "");
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
public void orderByPairs() {
|
||||
words = rearrangeByPairFrequency(words);
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,6 @@ import androidx.annotation.NonNull;
|
|||
import java.util.ArrayList;
|
||||
|
||||
import io.github.sspanak.tt9.db.DataStore;
|
||||
import io.github.sspanak.tt9.db.words.WordStore;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.languages.NullLanguage;
|
||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||
|
|
@ -41,7 +40,7 @@ abstract public class Predictions {
|
|||
}
|
||||
|
||||
|
||||
public Predictions setLanguage(Language language) {
|
||||
public Predictions setLanguage(@NonNull Language language) {
|
||||
this.language = language;
|
||||
return this;
|
||||
}
|
||||
|
|
@ -110,7 +109,8 @@ abstract public class Predictions {
|
|||
(dbWords) -> onDbWords(dbWords, isRetryAllowed()),
|
||||
language,
|
||||
digitSequence,
|
||||
onlyExactMatches ? WordStore.FILTER_EXACT_MATCHES_ONLY : stem,
|
||||
onlyExactMatches,
|
||||
stem,
|
||||
minWords,
|
||||
maxWords
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package io.github.sspanak.tt9.ime.modes.predictions;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import io.github.sspanak.tt9.db.DataStore;
|
||||
|
|
@ -11,7 +13,7 @@ import io.github.sspanak.tt9.util.TextTools;
|
|||
import io.github.sspanak.tt9.util.chars.Characters;
|
||||
|
||||
public class WordPredictions extends Predictions {
|
||||
private final TextField textField;
|
||||
protected final TextField textField;
|
||||
private LocaleWordsSorter localeWordsSorter;
|
||||
|
||||
private String inputWord;
|
||||
|
|
@ -30,7 +32,7 @@ public class WordPredictions extends Predictions {
|
|||
|
||||
|
||||
@Override
|
||||
public Predictions setLanguage(Language language) {
|
||||
public Predictions setLanguage(@NonNull Language language) {
|
||||
super.setLanguage(language);
|
||||
localeWordsSorter = new LocaleWordsSorter(language);
|
||||
|
||||
|
|
@ -67,6 +69,7 @@ public class WordPredictions extends Predictions {
|
|||
},
|
||||
language,
|
||||
digitSequence.substring(1),
|
||||
onlyExactMatches,
|
||||
stem.length() > 1 ? stem.substring(1) : "",
|
||||
SettingsStore.SUGGESTIONS_MIN,
|
||||
SettingsStore.SUGGESTIONS_MAX
|
||||
|
|
@ -262,14 +265,16 @@ public class WordPredictions extends Predictions {
|
|||
return;
|
||||
}
|
||||
|
||||
// Second condition note: If the accepted word is longer than the sequence, it is some different word,
|
||||
// not a textonym of the fist suggestion. We don't need to store it.
|
||||
if (settings.getPredictWordPairs() && word.length() == digitSequence.length()) {
|
||||
DataStore.addWordPair(language, textField.getWordBeforeCursor(language, 1, true), word, sequence);
|
||||
}
|
||||
pairWithPreviousWord(word, sequence);
|
||||
makeTopWord(word, sequence);
|
||||
}
|
||||
|
||||
// Update the priority only if the user has selected the word, not when we have enforced it
|
||||
// because it is in a popular word pair.
|
||||
|
||||
/**
|
||||
* Update the priority only if the user has selected the word, not when we have enforced it
|
||||
* because it is in a popular word pair.
|
||||
*/
|
||||
protected void makeTopWord(String word, String sequence) {
|
||||
if (!word.equals(lastEnforcedTopWord)) {
|
||||
DataStore.makeTopWord(language, word, sequence);
|
||||
}
|
||||
|
|
@ -283,7 +288,7 @@ public class WordPredictions extends Predictions {
|
|||
* "onAccept()", we have remembered the "am" comes after "I" and "an" comes after "am", we will
|
||||
* not suggest the textonyms "am" or "an" twice (depending on which has the highest frequency).
|
||||
*/
|
||||
private ArrayList<String> rearrangeByPairFrequency(ArrayList<String> words) {
|
||||
protected ArrayList<String> rearrangeByPairFrequency(ArrayList<String> words) {
|
||||
lastEnforcedTopWord = "";
|
||||
|
||||
if (!settings.getPredictWordPairs() || words.size() < 2) {
|
||||
|
|
@ -291,7 +296,7 @@ public class WordPredictions extends Predictions {
|
|||
}
|
||||
|
||||
ArrayList<String> rearrangedWords = new ArrayList<>();
|
||||
String penultimateWord = textField.getWordBeforeCursor(language, 1, true);
|
||||
String penultimateWord = getWordBeforeCursor(words.get(0));
|
||||
|
||||
String pairWord = DataStore.getWord2(language, penultimateWord, digitSequence);
|
||||
int morePopularIndex = TextTools.indexOfIgnoreCase(words, pairWord);
|
||||
|
|
@ -310,4 +315,26 @@ public class WordPredictions extends Predictions {
|
|||
|
||||
return rearrangedWords;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Pairs the given word and its digit sequence to the last word in the text field.
|
||||
* Second condition note: If the accepted word is longer than the sequence, it is some different word,
|
||||
* not a textonym of the fist suggestion. We don't need to store it.
|
||||
*/
|
||||
protected void pairWithPreviousWord(@NonNull String word, @NonNull String sequence) {
|
||||
if (settings.getPredictWordPairs() && sequence.length() == digitSequence.length()) {
|
||||
DataStore.addWordPair(language, getWordBeforeCursor(word), word, sequence);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the last word in the text field. The way of finding it depends on the language, so
|
||||
* we have a separate method for that.
|
||||
*/
|
||||
@NonNull
|
||||
protected String getWordBeforeCursor(@NonNull String currentWord) {
|
||||
return textField.getWordBeforeCursor(language, 1, true);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,11 +13,13 @@ abstract public class Language {
|
|||
protected String code;
|
||||
protected String currency;
|
||||
protected String dictionaryFile;
|
||||
protected Locale locale = Locale.ROOT;
|
||||
protected String name;
|
||||
protected boolean hasABC = true;
|
||||
protected boolean hasSpaceBetweenWords = true;
|
||||
protected boolean hasUpperCase = true;
|
||||
protected boolean isSyllabary = false;
|
||||
protected boolean hasTranscriptionsEmbedded = false;
|
||||
protected boolean isTranscribed = false;
|
||||
protected Locale locale = Locale.ROOT;
|
||||
protected String name;
|
||||
|
||||
|
||||
public int getId() {
|
||||
|
|
@ -63,6 +65,10 @@ abstract public class Language {
|
|||
return name;
|
||||
}
|
||||
|
||||
final public boolean hasABC() {
|
||||
return hasABC;
|
||||
}
|
||||
|
||||
final public boolean hasSpaceBetweenWords() {
|
||||
return hasSpaceBetweenWords;
|
||||
}
|
||||
|
|
@ -71,8 +77,12 @@ abstract public class Language {
|
|||
return hasUpperCase;
|
||||
}
|
||||
|
||||
final public boolean isSyllabary() {
|
||||
return isSyllabary;
|
||||
final public boolean hasTranscriptionsEmbedded() {
|
||||
return hasTranscriptionsEmbedded;
|
||||
}
|
||||
|
||||
final public boolean isTranscribed() {
|
||||
return isTranscribed;
|
||||
}
|
||||
|
||||
@NonNull
|
||||
|
|
|
|||
|
|
@ -24,9 +24,11 @@ public class LanguageDefinition {
|
|||
public String abcString = "";
|
||||
public String currency = "";
|
||||
public String dictionaryFile = "";
|
||||
public boolean filterBySounds = false;
|
||||
public boolean hasABC = true;
|
||||
public boolean hasSpaceBetweenWords = true;
|
||||
public boolean hasUpperCase = true;
|
||||
public boolean isSyllabary = false;
|
||||
public boolean isTranscribed = false;
|
||||
public final ArrayList<ArrayList<String>> layout = new ArrayList<>();
|
||||
public String locale = "";
|
||||
public String name = "";
|
||||
|
|
@ -134,6 +136,12 @@ public class LanguageDefinition {
|
|||
case "dictionaryFile":
|
||||
dictionaryFile = value.replaceFirst("\\.\\w+$", "." + BuildConfig.DICTIONARY_EXTENSION);
|
||||
return;
|
||||
case "filterBySounds":
|
||||
filterBySounds = parseYamlBoolean(value);
|
||||
return;
|
||||
case "hasABC":
|
||||
hasABC = parseYamlBoolean(value);
|
||||
return;
|
||||
case "hasSpaceBetweenWords":
|
||||
hasSpaceBetweenWords = parseYamlBoolean(value);
|
||||
return;
|
||||
|
|
@ -141,7 +149,7 @@ public class LanguageDefinition {
|
|||
hasUpperCase = parseYamlBoolean(value);
|
||||
return;
|
||||
case "sounds":
|
||||
isSyllabary = true;
|
||||
isTranscribed = true;
|
||||
return;
|
||||
case "locale":
|
||||
locale = value;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ public class LanguageKind {
|
|||
|
||||
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
|
||||
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
|
||||
public static boolean isChinese(Language language) { return language != null && language.getId() == 462106; }
|
||||
public static boolean isFarsi(Language language) { return language != null && language.getId() == 599078; }
|
||||
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
|
||||
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
|
||||
|
|
|
|||
|
|
@ -10,11 +10,10 @@ import java.util.Map;
|
|||
|
||||
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
|
||||
import io.github.sspanak.tt9.util.Text;
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
import io.github.sspanak.tt9.util.chars.Characters;
|
||||
|
||||
|
||||
public class NaturalLanguage extends Language implements Comparable<NaturalLanguage> {
|
||||
public class NaturalLanguage extends TranscribedLanguage implements Comparable<NaturalLanguage> {
|
||||
final public static String SPECIAL_CHAR_KEY = "0";
|
||||
final public static String PUNCTUATION_KEY = "1";
|
||||
final public static String PREFERRED_CHAR_SEQUENCE = "00";
|
||||
|
|
@ -34,9 +33,11 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
|
|||
lang.abcString = definition.abcString.isEmpty() ? null : definition.abcString;
|
||||
lang.currency = definition.currency;
|
||||
lang.dictionaryFile = definition.getDictionaryFile();
|
||||
lang.hasABC = definition.hasABC;
|
||||
lang.hasSpaceBetweenWords = definition.hasSpaceBetweenWords;
|
||||
lang.hasUpperCase = definition.hasUpperCase;
|
||||
lang.isSyllabary = definition.isSyllabary;
|
||||
lang.hasTranscriptionsEmbedded = definition.filterBySounds;
|
||||
lang.isTranscribed = definition.isTranscribed;
|
||||
lang.name = definition.name.isEmpty() ? lang.name : definition.name;
|
||||
lang.numerals = definition.numerals;
|
||||
lang.setLocale(definition);
|
||||
|
|
@ -86,6 +87,7 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
|
|||
specialChars.put(SPECIAL_CHARS_PLACEHOLDER, Characters.Special);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER, Characters.PunctuationEnglish);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_AR", Characters.PunctuationArabic);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_ZH", Characters.PunctuationChinese);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FA", Characters.PunctuationFarsi);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FR", Characters.PunctuationFrench);
|
||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_DE", Characters.PunctuationGerman);
|
||||
|
|
@ -257,7 +259,7 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
|
|||
if (
|
||||
word == null
|
||||
|| word.isEmpty()
|
||||
|| (isSyllabary && LanguageKind.isKorean(this) && TextTools.isHangul(word))
|
||||
|| (super.isValidWord(word))
|
||||
|| (word.length() == 1 && Character.isDigit(word.charAt(0)))
|
||||
) {
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
package io.github.sspanak.tt9.languages;
|
||||
|
||||
import io.github.sspanak.tt9.util.TextTools;
|
||||
|
||||
abstract class TranscribedLanguage extends Language {
|
||||
|
||||
@Override
|
||||
public boolean isValidWord(String word) {
|
||||
if (!isTranscribed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (LanguageKind.isKorean(this) && TextTools.isHangul(word))
|
||||
|| (LanguageKind.isChinese(this) && TextTools.isChinese(word));
|
||||
}
|
||||
}
|
||||
|
|
@ -21,8 +21,8 @@ class ItemSelectZeroKeyCharacter extends ItemDropDown {
|
|||
|
||||
public ItemSelectZeroKeyCharacter populate() {
|
||||
LinkedHashMap<String, String> items = new LinkedHashMap<>();
|
||||
items.put(".", context.getString(R.string.char_dot));
|
||||
items.put(",", context.getString(R.string.char_comma));
|
||||
items.put(".", ".");
|
||||
items.put(",", ",");
|
||||
items.put("\\n", context.getString(R.string.char_newline)); // SharedPreferences return a corrupted string when using the real "\n"... :(
|
||||
items.put(" ", context.getString(R.string.char_space));
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class SoftKeyAddWord extends BaseSoftKeyWithIcons {
|
|||
@Override
|
||||
public void render() {
|
||||
if (tt9 != null) {
|
||||
setEnabled(!tt9.isVoiceInputActive() && tt9.notLanguageSyllabary() && !tt9.isTextEditingActive());
|
||||
setEnabled(!tt9.isVoiceInputActive() && tt9.isAddingWordsSupported() && !tt9.isTextEditingActive());
|
||||
}
|
||||
super.render();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class SoftKeyFilter extends BaseSoftKeyWithIcons {
|
|||
&& !tt9.isVoiceInputActive()
|
||||
&& (
|
||||
LanguageKind.isKorean(tt9.getLanguage())
|
||||
|| (tt9.notLanguageSyllabary() && !tt9.isTextEditingActive())
|
||||
|| (tt9.isFilteringSupported() && !tt9.isTextEditingActive())
|
||||
)
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,12 @@ public class SoftKeyPunctuation extends SoftKey {
|
|||
@Override
|
||||
protected String getTitle() {
|
||||
String keyChar = getKeyChar();
|
||||
return "*".equals(keyChar) ? "✱" : keyChar;
|
||||
return switch (keyChar) {
|
||||
case "*" -> "✱";
|
||||
case Characters.ZH_QUESTION_MARK -> "?";
|
||||
case Characters.ZH_EXCLAMATION_MARK -> "!";
|
||||
default -> keyChar;
|
||||
};
|
||||
}
|
||||
|
||||
protected String getKeyChar() {
|
||||
|
|
@ -55,6 +60,8 @@ public class SoftKeyPunctuation extends SoftKey {
|
|||
if (tt9.isInputModePhone()) return "*";
|
||||
if (tt9.isInputModeNumeric()) return ",";
|
||||
|
||||
if (LanguageKind.isChinese(tt9.getLanguage())) return Characters.ZH_EXCLAMATION_MARK;
|
||||
|
||||
return "!";
|
||||
}
|
||||
|
||||
|
|
@ -63,8 +70,9 @@ public class SoftKeyPunctuation extends SoftKey {
|
|||
if (tt9.isInputModePhone()) return "#";
|
||||
if (tt9.isInputModeNumeric()) return ".";
|
||||
|
||||
if (LanguageKind.isArabic(tt9.getLanguage())) return "؟";
|
||||
if (LanguageKind.isArabic(tt9.getLanguage())) return Characters.AR_QUESTION_MARK;
|
||||
if (LanguageKind.isGreek(tt9.getLanguage())) return Characters.GR_QUESTION_MARK;
|
||||
if (LanguageKind.isChinese(tt9.getLanguage())) return Characters.ZH_QUESTION_MARK;
|
||||
|
||||
return "?";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ public class SuggestionsBar {
|
|||
return;
|
||||
}
|
||||
|
||||
stem = containsGenerated ? newSuggestions.get(0).substring(0, newSuggestions.get(0).length() - 1) : "";
|
||||
stem = containsGenerated && newSuggestions.get(0).length() > 1 ? newSuggestions.get(0).substring(0, newSuggestions.get(0).length() - 1) : "";
|
||||
|
||||
// Do not modify single letter + punctuation, such as "j'" or "l'". They look better as they are.
|
||||
stem = (stem.length() == 1 && newSuggestions.get(0).length() == 2 && !Character.isAlphabetic(newSuggestions.get(0).charAt(1))) ? "" : stem;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import java.util.Locale;
|
|||
|
||||
import io.github.sspanak.tt9.ime.modes.InputMode;
|
||||
import io.github.sspanak.tt9.languages.Language;
|
||||
import io.github.sspanak.tt9.languages.LanguageKind;
|
||||
import io.github.sspanak.tt9.util.chars.Characters;
|
||||
|
||||
public class Text extends TextTools {
|
||||
|
|
@ -172,11 +173,22 @@ public class Text extends TextTools {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of regular 8-bit chars
|
||||
*/
|
||||
public int length() {
|
||||
return text == null ? 0 : text.length();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of UTF-16 chars
|
||||
*/
|
||||
public int codePointLength() {
|
||||
return text == null ? 0 : text.codePointCount(0, text.length());
|
||||
}
|
||||
|
||||
|
||||
public int lastBoundaryIndex() {
|
||||
if (text == null || text.length() < 2) {
|
||||
return -1;
|
||||
|
|
@ -224,6 +236,28 @@ public class Text extends TextTools {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* A safe substring method that works with code points (UTF-16 chars), instead of 8-bit chars.
|
||||
* Useful for languages with complex characters, like Chinese.
|
||||
*/
|
||||
public String substringCodePoints(int start, int end) {
|
||||
if (text == null) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (!LanguageKind.isChinese(language)) {
|
||||
return text.substring(start, end);
|
||||
}
|
||||
|
||||
StringBuilder output = new StringBuilder();
|
||||
for (int i = Math.max(start, 0), finish = Math.min(text.length(), end); i < finish; i++) {
|
||||
output.append(text.charAt(i));
|
||||
}
|
||||
|
||||
return output.toString();
|
||||
}
|
||||
|
||||
|
||||
public String subStringEndingWord(boolean keepApostrophe, boolean keepQuote) {
|
||||
if (text == null) {
|
||||
return "";
|
||||
|
|
|
|||
|
|
@ -11,22 +11,23 @@ import java.util.regex.Pattern;
|
|||
import io.github.sspanak.tt9.util.chars.Characters;
|
||||
|
||||
public class TextTools {
|
||||
private static final Pattern containsOtherThan1 = Pattern.compile("[02-9]");
|
||||
private static final Pattern combiningString = Pattern.compile("^\\p{M}+$");
|
||||
private static final Pattern nextIsPunctuation = Pattern.compile("^\\p{Punct}");
|
||||
private static final Pattern isHangul = Pattern.compile("[\u1100-\u11FF\u302E-\u302F\u3131-\u318F\u3200-\u321F\u3260-\u327E\uA960-\uA97F\uAC00-\uD7FB\uFFA0-\uFFDF]+");
|
||||
private static final Pattern nextToWord = Pattern.compile("\\b$");
|
||||
private static final Pattern previousIsLetter = Pattern.compile("[\\p{L}\\p{M}]$");
|
||||
private static final Pattern startOfSentence = Pattern.compile("(?<!\\.)(^|[.?!؟¿¡])\\s+$");
|
||||
private static final Pattern CONTAINS_OTHER_THAN_1 = Pattern.compile("[02-9]");
|
||||
private static final Pattern COMBINING_STRING = Pattern.compile("^\\p{M}+$");
|
||||
private static final Pattern NEXT_IS_PUNCTUATION = Pattern.compile("^\\p{Punct}");
|
||||
private static final Pattern IS_CHINESE = Pattern.compile("\\p{script=Han}+");
|
||||
private static final Pattern IS_HANGUL = Pattern.compile("[\u1100-\u11FF\u302E-\u302F\u3131-\u318F\u3200-\u321F\u3260-\u327E\uA960-\uA97F\uAC00-\uD7FB\uFFA0-\uFFDF]+");
|
||||
private static final Pattern NEXT_TO_WORD = Pattern.compile("\\b$");
|
||||
private static final Pattern PREVIOUS_IS_LETTER = Pattern.compile("[\\p{L}\\p{M}]$");
|
||||
private static final Pattern START_OF_SENTENCE = Pattern.compile("(?<!\\.)(^|[.?!؟¿¡])\\s+$");
|
||||
|
||||
|
||||
public static boolean containsOtherThan1(String str) {
|
||||
return str != null && containsOtherThan1.matcher(str).find();
|
||||
return str != null && CONTAINS_OTHER_THAN_1.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
public static boolean isCombining(String str) {
|
||||
return str != null && combiningString.matcher(str).find();
|
||||
return str != null && COMBINING_STRING.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -45,8 +46,13 @@ public class TextTools {
|
|||
}
|
||||
|
||||
|
||||
public static boolean isChinese(String str) {
|
||||
return str != null && IS_CHINESE.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
public static boolean isHangul(String str) {
|
||||
return str != null && isHangul.matcher(str).find();
|
||||
return str != null && IS_HANGUL.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -61,23 +67,35 @@ public class TextTools {
|
|||
}
|
||||
|
||||
|
||||
public static int lastIndexOfLatin(String str) {
|
||||
for (int i = str != null ? str.length() - 1 : -1; i >= 0; i--) {
|
||||
char ch = str.charAt(i);
|
||||
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
public static boolean isStartOfSentence(String str) {
|
||||
return str != null && startOfSentence.matcher(str).find();
|
||||
return str != null && START_OF_SENTENCE.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
public static boolean isNextToWord(String str) {
|
||||
return str != null && nextToWord.matcher(str).find();
|
||||
return str != null && NEXT_TO_WORD.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
public static boolean nextIsPunctuation(String str) {
|
||||
return str != null && !str.isEmpty() && nextIsPunctuation.matcher(str).find();
|
||||
return str != null && !str.isEmpty() && NEXT_IS_PUNCTUATION.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
public static boolean previousIsLetter(String str) {
|
||||
return str != null && previousIsLetter.matcher(str).find();
|
||||
return str != null && PREVIOUS_IS_LETTER.matcher(str).find();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,13 @@ import io.github.sspanak.tt9.languages.Language;
|
|||
import io.github.sspanak.tt9.languages.LanguageKind;
|
||||
|
||||
class Punctuation {
|
||||
public static final String AR_QUESTION_MARK = "؟";
|
||||
public static final String GR_QUESTION_MARK = ";";
|
||||
public static final String ZH_COMMA_LIST = "、";
|
||||
public static final String ZH_FULL_STOP = "。";
|
||||
public static final String ZH_QUESTION_MARK = "?";
|
||||
public static final String ZH_EXCLAMATION_MARK = "!";
|
||||
|
||||
public static final String NEW_LINE = DeviceInfo.AT_LEAST_ANDROID_6 && new Paint().hasGlyph("⏎") ? "⏎" : "\\n";
|
||||
public static final String ZWJ = "\u200D";
|
||||
public static final String ZWJ_GRAPHIC = "ZWJ";
|
||||
|
|
@ -22,7 +28,7 @@ class Punctuation {
|
|||
));
|
||||
|
||||
final public static ArrayList<Character> CombiningPunctuationFarsi = new ArrayList<>(Arrays.asList(
|
||||
'،', ZWNJ.charAt(0), '-', '\'', ':', '؛', '!', '؟', '.'
|
||||
'،', ZWNJ.charAt(0), '-', '\'', ':', AR_QUESTION_MARK.charAt(0), '!', '؛', '.'
|
||||
));
|
||||
|
||||
final private static ArrayList<Character> CombiningPunctuationGujarati = new ArrayList<>(Arrays.asList(
|
||||
|
|
@ -38,7 +44,11 @@ class Punctuation {
|
|||
));
|
||||
|
||||
final public static ArrayList<String> PunctuationArabic = new ArrayList<>(Arrays.asList(
|
||||
"،", ".", "-", "(", ")", "&", "~", "`", "'", "\"", "؛", ":", "!", "؟"
|
||||
"،", ".", "-", "(", ")", "&", "~", "`", "'", "\"", "؛", ":", "!", AR_QUESTION_MARK
|
||||
));
|
||||
|
||||
final public static ArrayList<String> PunctuationChinese = new ArrayList<>(Arrays.asList(
|
||||
",", ZH_COMMA_LIST, ZH_FULL_STOP, "—", "~", "(", ")", ".", "「", "」", "『", "』", "•", "《", "》", "〈", "〉", "'", "“", "”", ";", ":", ZH_EXCLAMATION_MARK, ZH_QUESTION_MARK
|
||||
));
|
||||
|
||||
final public static ArrayList<String> PunctuationEnglish = new ArrayList<>(Arrays.asList(
|
||||
|
|
|
|||
|
|
@ -224,8 +224,6 @@
|
|||
<string name="key_yellow">Yellow Button</string>
|
||||
<string name="key_blue">Blue Button</string>
|
||||
|
||||
<string name="char_comma" translatable="false">,</string>
|
||||
<string name="char_dot" translatable="false">.</string>
|
||||
<string name="char_newline">New Line</string>
|
||||
<string name="char_space">Space</string>
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
|||
return "Too many errors! Skipping: ${definition}\n"
|
||||
}
|
||||
|
||||
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
||||
def (alphabet, sounds, _, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
||||
|
||||
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
|
||||
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
|
||||
|
|
@ -52,8 +52,11 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
int errorCount = 0
|
||||
String errorMsg = ""
|
||||
|
||||
String abcString = ""
|
||||
boolean hasABC = true
|
||||
boolean hasLayout = false
|
||||
boolean hasSounds = false
|
||||
boolean filterBySounds = false
|
||||
boolean areNumeralsValid = true
|
||||
String localeString = ""
|
||||
String dictionaryFileName = ""
|
||||
|
|
@ -64,6 +67,8 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
&& !rawLine.startsWith("abcString")
|
||||
&& !rawLine.startsWith("currency")
|
||||
&& !rawLine.startsWith("dictionaryFile")
|
||||
&& !rawLine.startsWith("filterBySounds")
|
||||
&& !rawLine.startsWith("hasABC")
|
||||
&& !rawLine.startsWith("hasSpaceBetweenWords")
|
||||
&& !rawLine.startsWith("hasUpperCase")
|
||||
&& !rawLine.startsWith("layout")
|
||||
|
|
@ -71,6 +76,7 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
&& !rawLine.startsWith("name")
|
||||
&& !rawLine.startsWith("numerals")
|
||||
&& !rawLine.startsWith("sounds")
|
||||
|
||||
) {
|
||||
def parts = rawLine.split(":")
|
||||
def property = parts.length > 0 ? parts[0] : rawLine
|
||||
|
|
@ -81,14 +87,29 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
|
||||
String line = rawLine.replaceFirst("#[\\s\\S]+\$", "")
|
||||
|
||||
if (
|
||||
(line.startsWith("hasUpperCase") || line.startsWith("hasSpaceBetweenWords"))
|
||||
&& !line.endsWith("yes") && !line.endsWith("no")
|
||||
) {
|
||||
def property = line.replaceAll(":.*\$", "")
|
||||
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
||||
def booleanProperties = ["hasUpperCase", "hasSpaceBetweenWords", "filterBySounds", "hasABC"]
|
||||
for (String property : booleanProperties) {
|
||||
String booleanError = validateBooleanProperty(line, property, languageFile.name)
|
||||
if (booleanError) {
|
||||
errorCount++
|
||||
errorMsg += booleanError
|
||||
}
|
||||
}
|
||||
|
||||
if (line.startsWith("abcString")) {
|
||||
abcString = line.replace("abcString:", "").trim()
|
||||
}
|
||||
|
||||
if (line.startsWith("dictionaryFile")) {
|
||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||
}
|
||||
|
||||
if (line.startsWith("filterBySounds")) {
|
||||
filterBySounds = line.endsWith("yes")
|
||||
}
|
||||
|
||||
if (line.startsWith("hasABC")) {
|
||||
hasABC = line.endsWith("yes")
|
||||
}
|
||||
|
||||
if (line.startsWith("numerals")) {
|
||||
|
|
@ -107,10 +128,6 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
localeString = line.replace("locale:", "").trim()
|
||||
}
|
||||
|
||||
if (line.startsWith("dictionaryFile")) {
|
||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||
}
|
||||
|
||||
// alphabet string
|
||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
|
||||
|
|
@ -135,6 +152,11 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
}
|
||||
}
|
||||
|
||||
if (!hasABC && !abcString.isEmpty()) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. hasABC must be 'true' when abcString is provided.\n"
|
||||
}
|
||||
|
||||
if (!hasLayout) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
|
||||
|
|
@ -150,6 +172,11 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
|
||||
}
|
||||
|
||||
if (filterBySounds && !hasSounds) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. 'filterBySounds' property can only be used with 'sounds' property.\n"
|
||||
}
|
||||
|
||||
if (!localeString.matches("^[a-z]{2,3}(?:-[A-Z]{2})?\$")) {
|
||||
errorCount++
|
||||
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
||||
|
|
@ -176,7 +203,7 @@ ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
|||
}
|
||||
}
|
||||
|
||||
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
||||
return [alphabet, sounds, filterBySounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -298,6 +325,16 @@ static def isLayoutLine(String line) {
|
|||
|
||||
//////////////////// VALIDATION ////////////////////
|
||||
|
||||
static def validateBooleanProperty(String line, String propertyName, String languageFileName) {
|
||||
if (line.startsWith(propertyName) && !line.endsWith("yes") && !line.endsWith("no")) {
|
||||
def property = line.replaceAll(":.*\$", "")
|
||||
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
||||
return "Language '${languageFileName}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
static def validateNoWhitespace(String line, int lineNumber) {
|
||||
if (line == "") {
|
||||
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue