Fixed the word pair predictions not working when one of the words contains an apostrophe. This will result in significantly more accurate suggestions in Ukrainian and Hebrew, and slightly better in French, Italian and other languages that use apostrophes a lot

2025-04-16 17:15:19 +03:00 · 2025-04-16 17:15:19 +03:00 · 546d20a9bc
commit 546d20a9bc
parent c7c2613db4
3 changed files with 58 additions and 5 deletions
--- a/app/src/main/java/io/github/sspanak/tt9/db/wordPairs/WordPair.java
+++ b/app/src/main/java/io/github/sspanak/tt9/db/wordPairs/WordPair.java
@ -24,8 +24,8 @@ public class WordPair {
 	boolean isInvalid() {
-		Text w1 = new Text(word1);
+		Text w1 = new Text(language, word1);
-		Text w2 = new Text(word2);
+		Text w2 = new Text(language, word2);
 		return
 			language == null
@ -33,7 +33,7 @@ public class WordPair {
 			|| word1.equals(word2)
 			|| sequence2 == null || !(new Text(sequence2).isNumeric())
 			|| (w1.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH && w2.codePointLength() > SettingsStore.WORD_PAIR_MAX_WORD_LENGTH)
-			|| !w1.isAlphabetic() || !w2.isAlphabetic();
+			|| !w1.isWord() || !w2.isWord();
 	}
--- a/app/src/main/java/io/github/sspanak/tt9/ime/modes/predictions/WordPredictions.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/modes/predictions/WordPredictions.java
@ -8,7 +8,9 @@ import io.github.sspanak.tt9.db.DataStore;
 import io.github.sspanak.tt9.ime.helpers.TextField;
 import io.github.sspanak.tt9.languages.EmojiLanguage;
 import io.github.sspanak.tt9.languages.Language;
 import io.github.sspanak.tt9.languages.LanguageKind;
 import io.github.sspanak.tt9.preferences.settings.SettingsStore;
 import io.github.sspanak.tt9.util.Text;
 import io.github.sspanak.tt9.util.TextTools;
 import io.github.sspanak.tt9.util.chars.Characters;
@ -348,6 +350,23 @@ public class WordPredictions extends Predictions {
 	 */
 	@NonNull
 	protected String getPenultimateWord(@NonNull String currentWord) {
-		return textField.getWordBeforeCursor(language, 1, true);
+		// We are in the middle of a word or at the beginning of a new one. Pairing makes no sense.
 		Text after = textField.getTextAfterCursor(1);
 		if (after.startsWithWord()) {
 			return "";
 		}
 		Text before = textField.getTextBeforeCursor();
 		// We are at the end of word. The user is probably typing a compound word. We do not want to
 		// pair with the first part of the compound word.
 		if (before.length() > currentWord.length() && before.toString().endsWith(currentWord) && Character.isAlphabetic(before.toString().charAt(before.length() - currentWord.length() - 1))) {
 			return "";
 		}
 		return before.getPreviousWord(
 			!currentWord.isEmpty(),
 			LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language)
 		);
 	}
 }
--- a/app/src/main/java/io/github/sspanak/tt9/util/Text.java
+++ b/app/src/main/java/io/github/sspanak/tt9/util/Text.java
@ -16,7 +16,11 @@ public class Text extends TextTools {
 	private final Language language;
 	private final String text;
-	private final static Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。，،]{2,})|([^、。，\\s\\u3000]*.))$");
+	private static final Pattern QUICK_DELETE_GROUP = Pattern.compile("(?:([\\s\\u3000]{2,})|([.,、。，،]{2,})|([^、。，\\s\\u3000]*.))$");
 	private static final Pattern PREVIOUS_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)$");
 	private static final Pattern PREVIOUS_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)$");
 	private static final Pattern PENULTIMATE_WORD = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}]+)[\\s'][^\\s']*$");
 	private static final Pattern PENULTIMATE_WORD_WITH_APOSTROPHES = Pattern.compile("(?<=\\s|^)([\\p{L}\\p{Mc}\\p{Mn}\\p{Me}\\x{200D}\\x{200C}']+)\\s\\S*$");
 	public Text(Language language, String text) {
@ -58,6 +62,24 @@ public class Text extends TextTools {
 	}
 	@NonNull
 	public String getPreviousWord(boolean skipOne, boolean isLanguageWithApostrophes) {
 		if (text == null || text.isEmpty()) {
 			return "";
 		}
 		Matcher matcher;
 		if (isLanguageWithApostrophes) {
 			matcher = skipOne ? PENULTIMATE_WORD_WITH_APOSTROPHES.matcher(text) : PREVIOUS_WORD_WITH_APOSTROPHES.matcher(text);
 		} else {
 			matcher = skipOne ? PENULTIMATE_WORD.matcher(text) : PREVIOUS_WORD.matcher(text);
 		}
 		String word = matcher.find() ? matcher.group(1) : null;
 		return word == null ? "" : word;
 	}
 	public int getTextCase() {
 		if (isUpperCase()) {
 			return InputMode.CASE_UPPER;
@ -97,6 +119,18 @@ public class Text extends TextTools {
 	}
 	public boolean isWord() {
 		boolean isApostropheAllowed = LanguageKind.isUkrainian(language) || LanguageKind.isHebrew(language);
 		for (int i = 0, end = text == null ? 0 : text.length(); i < end; i++) {
 			if (!Character.isAlphabetic(text.charAt(i)) && !(isApostropheAllowed && text.charAt(i) == '\'')) {
 				return false;
 			}
 		}
 		return true;
 	}
 	public boolean isEmpty() {
 		return text == null || text.isEmpty();
 	}