diff --git a/app/src/main/java/io/github/sspanak/tt9/db/wordPairs/WordPair.java b/app/src/main/java/io/github/sspanak/tt9/db/wordPairs/WordPair.java index 97efa9ce..bc2cf100 100644 --- a/app/src/main/java/io/github/sspanak/tt9/db/wordPairs/WordPair.java +++ b/app/src/main/java/io/github/sspanak/tt9/db/wordPairs/WordPair.java @@ -24,8 +24,8 @@ public class WordPair { boolean isInvalid() { - Text w1 = new Text(word1); - Text w2 = new Text(word2); + Text w1 = new Text(language, word1); + Text w2 = new Text(language, word2); return language == null @@ -33,7 +33,7 @@ public class WordPair { || word1.equals(word2) || sequence2 == null || !(new Text(sequence2).isNumeric()) || (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH) - || !w1.isAlphabetic() || !w2.isAlphabetic(); + || !w1.isWord() || !w2.isWord(); } diff --git a/app/src/main/java/io/github/sspanak/tt9/ime/modes/predictions/WordPredictions.java b/app/src/main/java/io/github/sspanak/tt9/ime/modes/predictions/WordPredictions.java index c3ffd364..220e2d5a 100644 --- a/app/src/main/java/io/github/sspanak/tt9/ime/modes/predictions/WordPredictions.java +++ b/app/src/main/java/io/github/sspanak/tt9/ime/modes/predictions/WordPredictions.java @@ -8,7 +8,9 @@ import io.github.sspanak.tt9.db.DataStore; import io.github.sspanak.tt9.ime.helpers.TextField; import io.github.sspanak.tt9.languages.EmojiLanguage; import io.github.sspanak.tt9.languages.Language; +import io.github.sspanak.tt9.languages.LanguageKind; import io.github.sspanak.tt9.preferences.settings.SettingsStore; +import io.github.sspanak.tt9.util.Text; import io.github.sspanak.tt9.util.TextTools; import io.github.sspanak.tt9.util.chars.Characters; @@ -348,6 +350,23 @@ public class WordPredictions extends Predictions { */ @NonNull protected String getPenultimateWord(@NonNull String currentWord) { - return textField.getWordBeforeCursor(language, 1, true); + // We are in the middle of a word or at the beginning of a new one. Pairing makes no sense. + Text after = textField.getTextAfterCursor(1); + if (after.startsWithWord()) { + return ""; + } + + Text before = textField.getTextBeforeCursor(); + + // We are at the end of word. The user is probably typing a compound word. We do not want to + // pair with the first part of the compound word. + if (before.length() > currentWord.length() && before.toString().endsWith(currentWord) && Character.isAlphabetic(before.toString().charAt(before.length() - currentWord.length() - 1))) { + return ""; + } + + return before.getPreviousWord( + !currentWord.isEmpty(), + LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language) + ); } } diff --git a/app/src/main/java/io/github/sspanak/tt9/util/Text.java b/app/src/main/java/io/github/sspanak/tt9/util/Text.java index fd5942f4..12e10be5 100644 --- a/app/src/main/java/io/github/sspanak/tt9/util/Text.java +++ b/app/src/main/java/io/github/sspanak/tt9/util/Text.java @@ -16,7 +16,11 @@ public class Text extends TextTools { private final Language language; private final String text; - private final static Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。,،]{2,})|([^、。,\\s\\u3000]*.))$"); + private static final Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。,،]{2,})|([^、。,\\s\\u3000]*.))$"); + private static final Pattern PREVIOUS_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)$"); + private static final Pattern PREVIOUS_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)$"); + private static final Pattern PENULTIMATE_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)[\\s'][^\\s']*$"); + private static final Pattern PENULTIMATE_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)\\s\\S*$"); public Text(Language language, String text) { @@ -58,6 +62,24 @@ public class Text extends TextTools { } + @NonNull + public String getPreviousWord(boolean skipOne, boolean isLanguageWithApostrophes) { + if (text == null || text.isEmpty()) { + return ""; + } + + Matcher matcher; + if (isLanguageWithApostrophes) { + matcher = skipOne ? PENULTIMATE_WORD_WITH_APOSTROPHES.matcher(text) : PREVIOUS_WORD_WITH_APOSTROPHES.matcher(text); + } else { + matcher = skipOne ? PENULTIMATE_WORD.matcher(text) : PREVIOUS_WORD.matcher(text); + } + + String word = matcher.find() ? matcher.group(1) : null; + return word == null ? "" : word; + } + + public int getTextCase() { if (isUpperCase()) { return InputMode.CASE_UPPER; @@ -97,6 +119,18 @@ public class Text extends TextTools { } + public boolean isWord() { + boolean isApostropheAllowed = LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language); + for (int i = 0, end = text == null ? 0 : text.length(); i < end; i++) { + if (!Character.isAlphabetic(text.charAt(i)) && !(isApostropheAllowed && text.charAt(i) == '\'')) { + return false; + } + } + + return true; + } + + public boolean isEmpty() { return text == null || text.isEmpty(); }