Fixed the word pair predictions not working when one of the words contains an apostrophe. This will result in significantly more accurate suggestions in Ukrainian and Hebrew, and slightly better in French, Italian and other languages that use apostrophes a lot
This commit is contained in:
parent
c7c2613db4
commit
546d20a9bc
3 changed files with 58 additions and 5 deletions
|
|
@ -24,8 +24,8 @@ public class WordPair {
|
||||||
|
|
||||||
|
|
||||||
boolean isInvalid() {
|
boolean isInvalid() {
|
||||||
Text w1 = new Text(word1);
|
Text w1 = new Text(language, word1);
|
||||||
Text w2 = new Text(word2);
|
Text w2 = new Text(language, word2);
|
||||||
|
|
||||||
return
|
return
|
||||||
language == null
|
language == null
|
||||||
|
|
@ -33,7 +33,7 @@ public class WordPair {
|
||||||
|| word1.equals(word2)
|
|| word1.equals(word2)
|
||||||
|| sequence2 == null || !(new Text(sequence2).isNumeric())
|
|| sequence2 == null || !(new Text(sequence2).isNumeric())
|
||||||
|| (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|
|| (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|
||||||
|| !w1.isAlphabetic() || !w2.isAlphabetic();
|
|| !w1.isWord() || !w2.isWord();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,9 @@ import io.github.sspanak.tt9.db.DataStore;
|
||||||
import io.github.sspanak.tt9.ime.helpers.TextField;
|
import io.github.sspanak.tt9.ime.helpers.TextField;
|
||||||
import io.github.sspanak.tt9.languages.EmojiLanguage;
|
import io.github.sspanak.tt9.languages.EmojiLanguage;
|
||||||
import io.github.sspanak.tt9.languages.Language;
|
import io.github.sspanak.tt9.languages.Language;
|
||||||
|
import io.github.sspanak.tt9.languages.LanguageKind;
|
||||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||||
|
import io.github.sspanak.tt9.util.Text;
|
||||||
import io.github.sspanak.tt9.util.TextTools;
|
import io.github.sspanak.tt9.util.TextTools;
|
||||||
import io.github.sspanak.tt9.util.chars.Characters;
|
import io.github.sspanak.tt9.util.chars.Characters;
|
||||||
|
|
||||||
|
|
@ -348,6 +350,23 @@ public class WordPredictions extends Predictions {
|
||||||
*/
|
*/
|
||||||
@NonNull
|
@NonNull
|
||||||
protected String getPenultimateWord(@NonNull String currentWord) {
|
protected String getPenultimateWord(@NonNull String currentWord) {
|
||||||
return textField.getWordBeforeCursor(language, 1, true);
|
// We are in the middle of a word or at the beginning of a new one. Pairing makes no sense.
|
||||||
|
Text after = textField.getTextAfterCursor(1);
|
||||||
|
if (after.startsWithWord()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
Text before = textField.getTextBeforeCursor();
|
||||||
|
|
||||||
|
// We are at the end of word. The user is probably typing a compound word. We do not want to
|
||||||
|
// pair with the first part of the compound word.
|
||||||
|
if (before.length() > currentWord.length() && before.toString().endsWith(currentWord) && Character.isAlphabetic(before.toString().charAt(before.length() - currentWord.length() - 1))) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return before.getPreviousWord(
|
||||||
|
!currentWord.isEmpty(),
|
||||||
|
LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,11 @@ public class Text extends TextTools {
|
||||||
private final Language language;
|
private final Language language;
|
||||||
private final String text;
|
private final String text;
|
||||||
|
|
||||||
private final static Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。,،]{2,})|([^、。,\\s\\u3000]*.))$");
|
private static final Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。,،]{2,})|([^、。,\\s\\u3000]*.))$");
|
||||||
|
private static final Pattern PREVIOUS_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)$");
|
||||||
|
private static final Pattern PREVIOUS_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)$");
|
||||||
|
private static final Pattern PENULTIMATE_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)[\\s'][^\\s']*$");
|
||||||
|
private static final Pattern PENULTIMATE_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)\\s\\S*$");
|
||||||
|
|
||||||
|
|
||||||
public Text(Language language, String text) {
|
public Text(Language language, String text) {
|
||||||
|
|
@ -58,6 +62,24 @@ public class Text extends TextTools {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@NonNull
|
||||||
|
public String getPreviousWord(boolean skipOne, boolean isLanguageWithApostrophes) {
|
||||||
|
if (text == null || text.isEmpty()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
Matcher matcher;
|
||||||
|
if (isLanguageWithApostrophes) {
|
||||||
|
matcher = skipOne ? PENULTIMATE_WORD_WITH_APOSTROPHES.matcher(text) : PREVIOUS_WORD_WITH_APOSTROPHES.matcher(text);
|
||||||
|
} else {
|
||||||
|
matcher = skipOne ? PENULTIMATE_WORD.matcher(text) : PREVIOUS_WORD.matcher(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
String word = matcher.find() ? matcher.group(1) : null;
|
||||||
|
return word == null ? "" : word;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public int getTextCase() {
|
public int getTextCase() {
|
||||||
if (isUpperCase()) {
|
if (isUpperCase()) {
|
||||||
return InputMode.CASE_UPPER;
|
return InputMode.CASE_UPPER;
|
||||||
|
|
@ -97,6 +119,18 @@ public class Text extends TextTools {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isWord() {
|
||||||
|
boolean isApostropheAllowed = LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language);
|
||||||
|
for (int i = 0, end = text == null ? 0 : text.length(); i < end; i++) {
|
||||||
|
if (!Character.isAlphabetic(text.charAt(i)) && !(isApostropheAllowed && text.charAt(i) == '\'')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return text == null || text.isEmpty();
|
return text == null || text.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue