1
0
Fork 0

Fixed the word pair predictions not working when one of the words contains an apostrophe. This will result in significantly more accurate suggestions in Ukrainian and Hebrew, and slightly better in French, Italian and other languages that use apostrophes a lot

This commit is contained in:
sspanak 2025-04-16 17:15:19 +03:00 committed by Dimo Karaivanov
parent c7c2613db4
commit 546d20a9bc
3 changed files with 58 additions and 5 deletions

View file

@ -24,8 +24,8 @@ public class WordPair {
boolean isInvalid() { boolean isInvalid() {
Text w1 = new Text(word1); Text w1 = new Text(language, word1);
Text w2 = new Text(word2); Text w2 = new Text(language, word2);
return return
language == null language == null
@ -33,7 +33,7 @@ public class WordPair {
|| word1.equals(word2) || word1.equals(word2)
|| sequence2 == null || !(new Text(sequence2).isNumeric()) || sequence2 == null || !(new Text(sequence2).isNumeric())
|| (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH) || (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
|| !w1.isAlphabetic() || !w2.isAlphabetic(); || !w1.isWord() || !w2.isWord();
} }

View file

@ -8,7 +8,9 @@ import io.github.sspanak.tt9.db.DataStore;
import io.github.sspanak.tt9.ime.helpers.TextField; import io.github.sspanak.tt9.ime.helpers.TextField;
import io.github.sspanak.tt9.languages.EmojiLanguage; import io.github.sspanak.tt9.languages.EmojiLanguage;
import io.github.sspanak.tt9.languages.Language; import io.github.sspanak.tt9.languages.Language;
import io.github.sspanak.tt9.languages.LanguageKind;
import io.github.sspanak.tt9.preferences.settings.SettingsStore; import io.github.sspanak.tt9.preferences.settings.SettingsStore;
import io.github.sspanak.tt9.util.Text;
import io.github.sspanak.tt9.util.TextTools; import io.github.sspanak.tt9.util.TextTools;
import io.github.sspanak.tt9.util.chars.Characters; import io.github.sspanak.tt9.util.chars.Characters;
@ -348,6 +350,23 @@ public class WordPredictions extends Predictions {
*/ */
@NonNull @NonNull
protected String getPenultimateWord(@NonNull String currentWord) { protected String getPenultimateWord(@NonNull String currentWord) {
return textField.getWordBeforeCursor(language, 1, true); // We are in the middle of a word or at the beginning of a new one. Pairing makes no sense.
Text after = textField.getTextAfterCursor(1);
if (after.startsWithWord()) {
return "";
}
Text before = textField.getTextBeforeCursor();
// We are at the end of word. The user is probably typing a compound word. We do not want to
// pair with the first part of the compound word.
if (before.length() > currentWord.length() && before.toString().endsWith(currentWord) && Character.isAlphabetic(before.toString().charAt(before.length() - currentWord.length() - 1))) {
return "";
}
return before.getPreviousWord(
!currentWord.isEmpty(),
LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language)
);
} }
} }

View file

@ -16,7 +16,11 @@ public class Text extends TextTools {
private final Language language; private final Language language;
private final String text; private final String text;
private final static Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。,،]{2,})|([^、。,\\s\\u3000]*.))$"); private static final Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。,،]{2,})|([^、。,\\s\\u3000]*.))$");
private static final Pattern PREVIOUS_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)$");
private static final Pattern PREVIOUS_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)$");
private static final Pattern PENULTIMATE_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)[\\s'][^\\s']*$");
private static final Pattern PENULTIMATE_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)\\s\\S*$");
public Text(Language language, String text) { public Text(Language language, String text) {
@ -58,6 +62,24 @@ public class Text extends TextTools {
} }
@NonNull
public String getPreviousWord(boolean skipOne, boolean isLanguageWithApostrophes) {
if (text == null || text.isEmpty()) {
return "";
}
Matcher matcher;
if (isLanguageWithApostrophes) {
matcher = skipOne ? PENULTIMATE_WORD_WITH_APOSTROPHES.matcher(text) : PREVIOUS_WORD_WITH_APOSTROPHES.matcher(text);
} else {
matcher = skipOne ? PENULTIMATE_WORD.matcher(text) : PREVIOUS_WORD.matcher(text);
}
String word = matcher.find() ? matcher.group(1) : null;
return word == null ? "" : word;
}
public int getTextCase() { public int getTextCase() {
if (isUpperCase()) { if (isUpperCase()) {
return InputMode.CASE_UPPER; return InputMode.CASE_UPPER;
@ -97,6 +119,18 @@ public class Text extends TextTools {
} }
public boolean isWord() {
boolean isApostropheAllowed = LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language);
for (int i = 0, end = text == null ? 0 : text.length(); i < end; i++) {
if (!Character.isAlphabetic(text.charAt(i)) && !(isApostropheAllowed && text.charAt(i) == '\'')) {
return false;
}
}
return true;
}
public boolean isEmpty() { public boolean isEmpty() {
return text == null || text.isEmpty(); return text == null || text.isEmpty();
} }