Text and Language tools code cleanup

2024-02-15 11:45:46 +02:00 · 2024-02-15 11:45:46 +02:00 · 68e47cc49f
commit 68e47cc49f
parent c239233bbe
10 changed files with 184 additions and 153 deletions
--- a/app/src/main/java/io/github/sspanak/tt9/TextTools.java
+++ b/app/src/main/java/io/github/sspanak/tt9/TextTools.java
@ -1,54 +0,0 @@
-package io.github.sspanak.tt9;
-
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Locale;
-import java.util.TimeZone;
-import java.util.regex.Pattern;
-
-public class TextTools {
-	private static final Pattern containsOtherThan1 = Pattern.compile("[02-9]");
-	private static final Pattern previousIsLetter = Pattern.compile("\\p{L}$");
-	private static final Pattern nextIsPunctuation = Pattern.compile("^\\p{Punct}");
-	private static final Pattern nextToWord = Pattern.compile("\\b$");
-	private static final Pattern startOfSentence = Pattern.compile("(?<!\\.)(^|[.?!؟¿¡])\\s+$");
-
-	public static boolean containsOtherThan1(String str) {
-		return str != null && containsOtherThan1.matcher(str).find();
-	}
-
-	public static boolean isNextToWord(String str) {
-		return str != null && nextToWord.matcher(str).find();
-	}
-
-	public static boolean isStartOfSentence(String str) {
-		return str != null && startOfSentence.matcher(str).find();
-	}
-
-	public static boolean nextIsPunctuation(String str) {
-		return str != null && nextIsPunctuation.matcher(str).find();
-	}
-
-	public static boolean previousIsLetter(String str) {
-		return str != null && previousIsLetter.matcher(str).find();
-	}
-
-	public static boolean startsWithWhitespace(String str) {
-		return str != null && !str.isEmpty() && (str.charAt(0) == ' ' || str.charAt(0) == '\n' || str.charAt(0) == '\t');
-	}
-
-	public static boolean startsWithNumber(String str) {
-		return str != null && !str.isEmpty() && (str.charAt(0) >= '0' && str.charAt(0) <= '9');
-	}
-
-	public static String unixTimestampToISODate(long timestamp) {
-		if (timestamp < 0) {
-			return "--";
-		}
-
-		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US);
-		sdf.setTimeZone(TimeZone.getDefault());
-
-		return sdf.format(new Date(timestamp));
-	}
-}
--- a/app/src/main/java/io/github/sspanak/tt9/db/SlowQueryStats.java
+++ b/app/src/main/java/io/github/sspanak/tt9/db/SlowQueryStats.java
@ -3,7 +3,7 @@ package io.github.sspanak.tt9.db;
 import java.util.HashMap;

 import io.github.sspanak.tt9.Logger;
-import io.github.sspanak.tt9.TextTools;
+import io.github.sspanak.tt9.languages.Text;
 import io.github.sspanak.tt9.languages.Language;
 import io.github.sspanak.tt9.preferences.SettingsStore;

@ -63,7 +63,7 @@ public class SlowQueryStats {
 			"Queries: " + totalQueries + ". Average time: " + averageTime + " ms." +
 			"\nSlow: " + slowQueries.size() + ". Average time: " + slowAverageTime + " ms." +
 			"\nSlowest: " + maxQueryTime + " ms." +
-			"\nFirst: " + TextTools.unixTimestampToISODate(firstQueryTime);
+			"\nFirst: " + Text.unixTimestampToISODate(firstQueryTime);
 	}

 	public static String getList() {
--- a/app/src/main/java/io/github/sspanak/tt9/ime/helpers/AppHacks.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/helpers/AppHacks.java
@ -141,7 +141,7 @@ public class AppHacks {
 			return false;
 		}

-		String oldText = textField.getTextBeforeCursor() + textField.getTextAfterCursor();
+		String oldText = textField.getStringBeforeCursor() + textField.getStringAfterCursor();

 		sendDownUpKeyEvents(KeyEvent.KEYCODE_DPAD_CENTER);

@ -155,7 +155,7 @@ public class AppHacks {
 			// In Android there is no strictly defined confirmation key, hence DPAD_CENTER may have done nothing.
 			// If so, send an alternative key code as a final resort.
 			Thread.sleep(80);
-			String newText = textField.getTextBeforeCursor() + textField.getTextAfterCursor();
+			String newText = textField.getStringBeforeCursor() + textField.getStringAfterCursor();
 			if (newText.equals(oldText)) {
 				sendDownUpKeyEvents(KeyEvent.KEYCODE_ENTER);
 			}
--- a/app/src/main/java/io/github/sspanak/tt9/ime/helpers/TextField.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/helpers/TextField.java
@ -19,6 +19,7 @@ import java.util.regex.Pattern;
 import io.github.sspanak.tt9.Logger;
 import io.github.sspanak.tt9.ime.modes.InputMode;
 import io.github.sspanak.tt9.languages.Language;
+import io.github.sspanak.tt9.languages.Text;

 public class TextField {
 	public static final int IME_ACTION_ENTER = EditorInfo.IME_MASK_ACTION + 1;
@ -48,26 +49,6 @@ public class TextField {
 	}


-	/**
-	 * getPreviousChar
-	 * Gets the character before the cursor.
-	 */
-	public String getPreviousChars(int numberOfChars) {
-		CharSequence character = connection != null ? connection.getTextBeforeCursor(numberOfChars, 0) : null;
-		return character != null ? character.toString() : "";
-	}
-
-
-	/**
-	 * getNextChar
-	 * Gets the character after the cursor.
-	 */
-	public String getNextChars(int numberOfChars) {
-		CharSequence character = connection != null ? connection.getTextAfterCursor(numberOfChars, 0) : null;
-		return character != null ? character.toString() : "";
-	}
-
-
 	/**
 	 * determineInputModes
 	 * Determine the typing mode based on the input field being edited. Returns an ArrayList of the allowed modes.
@ -149,31 +130,43 @@ public class TextField {
 	}


-	/**
-	 * getTextBeforeCursor
-	 * A simplified helper that return up to 50 characters before the cursor and "just works".
-	 */
-	public String getTextBeforeCursor() {
-		if (connection == null) {
-			return "";
-		}
+	public String getStringAfterCursor(int numberOfChars) {
+		CharSequence character = connection != null ? connection.getTextAfterCursor(numberOfChars, 0) : null;
+		return character != null ? character.toString() : "";
+	}

-		CharSequence before = connection.getTextBeforeCursor(50, 0);
-		return before != null ? before.toString() : "";
+
+	public String getStringBeforeCursor(int numberOfChars) {
+		CharSequence character = connection != null ? connection.getTextBeforeCursor(numberOfChars, 0) : null;
+		return character != null ? character.toString() : "";
 	}


 	/**
-	 * getTextBeforeCursor
+	 * getStringAfterCursor
 	 * A simplified helper that return up to 50 characters after the cursor and "just works".
 	 */
-	public String getTextAfterCursor() {
-		if (connection == null) {
-			return "";
-		}
+	public String getStringAfterCursor() {
+		return getStringAfterCursor(50);
+	}

-		CharSequence before = connection.getTextAfterCursor(50, 0);
-		return before != null ? before.toString() : "";
+
+	/**
+	 * getStringBeforeCursor
+	 * A simplified helper that return up to 50 characters before the cursor and "just works".
+	 */
+	public String getStringBeforeCursor() {
+		return getStringBeforeCursor(50);
+	}
+
+
+	public Text getTextAfterCursor(int numberOfChars) {
+		return new Text(getStringBeforeCursor(numberOfChars));
+	}
+
+
+	public Text getTextBeforeCursor() {
+		return new Text(getStringBeforeCursor());
 	}


@ -187,13 +180,13 @@ public class TextField {

 		if (language != null && (language.isHebrew() || language.isUkrainian())) {
 			// Hebrew and Ukrainian use apostrophes as letters
-			before = beforeCursorUkrainianRegex.matcher(getTextBeforeCursor());
-			after = afterCursorUkrainianRegex.matcher(getTextAfterCursor());
+			before = beforeCursorUkrainianRegex.matcher(getStringBeforeCursor());
+			after = afterCursorUkrainianRegex.matcher(getStringAfterCursor());
 		} else {
 			// In other languages, special characters in words will cause automatic word break to fail,
 			// resulting in unexpected suggestions. Therefore, they are not allowed.
-			before = beforeCursorWordRegex.matcher(getTextBeforeCursor());
-			after = afterCursorWordRegex.matcher(getTextAfterCursor());
+			before = beforeCursorWordRegex.matcher(getStringBeforeCursor());
+			after = afterCursorWordRegex.matcher(getStringAfterCursor());
 		}

 		return (before.find() ? before.group(1) : "") + (after.find() ? after.group(1) : "");
--- a/app/src/main/java/io/github/sspanak/tt9/ime/modes/InputMode.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/modes/InputMode.java
@ -8,6 +8,7 @@ import io.github.sspanak.tt9.Logger;
 import io.github.sspanak.tt9.ime.helpers.InputType;
 import io.github.sspanak.tt9.ime.helpers.TextField;
 import io.github.sspanak.tt9.languages.Language;
+import io.github.sspanak.tt9.languages.Text;
 import io.github.sspanak.tt9.preferences.SettingsStore;

 abstract public class InputMode {
@ -164,7 +165,7 @@ abstract public class InputMode {
 		return true;
 	}

-	public void determineNextWordTextCase(String textBeforeCursor) {}
+	public void determineNextWordTextCase(Text textBeforeCursor) {}

 	// Based on the internal logic of the mode (punctuation or grammar rules), re-adjust the text case for when getSuggestions() is called.
 	protected String adjustSuggestionTextCase(String word, int newTextCase) { return word; }
--- a/app/src/main/java/io/github/sspanak/tt9/ime/modes/ModePredictive.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/modes/ModePredictive.java
@ -5,7 +5,7 @@ import androidx.annotation.NonNull;
 import java.util.ArrayList;

 import io.github.sspanak.tt9.Logger;
-import io.github.sspanak.tt9.TextTools;
+import io.github.sspanak.tt9.languages.Text;
 import io.github.sspanak.tt9.db.WordStoreAsync;
 import io.github.sspanak.tt9.ime.helpers.InputType;
 import io.github.sspanak.tt9.ime.helpers.TextField;
@ -311,7 +311,7 @@ public class ModePredictive extends InputMode {

 	@Override
 	protected String adjustSuggestionTextCase(String word, int newTextCase) {
-		return autoTextCase.adjustSuggestionTextCase(language, word, newTextCase);
+		return autoTextCase.adjustSuggestionTextCase(new Text(language, word), newTextCase);
 	}

 	@Override
@ -320,7 +320,7 @@ public class ModePredictive extends InputMode {
 	}

 	@Override
-	public void determineNextWordTextCase(String textBeforeCursor) {
+	public void determineNextWordTextCase(Text textBeforeCursor) {
 		textCase = autoTextCase.determineNextWordTextCase(textCase, textFieldTextCase, textBeforeCursor);
 	}

@ -382,7 +382,7 @@ public class ModePredictive extends InputMode {
 			!digitSequence.isEmpty()
 			&& predictions.noDbWords()
 			&& digitSequence.contains(Language.PUNCTUATION_KEY)
-			&& TextTools.containsOtherThan1(digitSequence);
+			&& Text.containsOtherThan1(digitSequence);
 	}


--- a/app/src/main/java/io/github/sspanak/tt9/ime/modes/helpers/AutoSpace.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/modes/helpers/AutoSpace.java
@ -1,6 +1,6 @@
 package io.github.sspanak.tt9.ime.modes.helpers;

-import io.github.sspanak.tt9.TextTools;
+import io.github.sspanak.tt9.languages.Text;
 import io.github.sspanak.tt9.ime.helpers.InputType;
 import io.github.sspanak.tt9.ime.helpers.TextField;
 import io.github.sspanak.tt9.preferences.SettingsStore;
@ -44,14 +44,14 @@ public class AutoSpace {
 	 * See the helper functions for the list of rules.
 	 */
 	public boolean shouldAddAutoSpace(boolean isWordAcceptedManually, int nextKey) {
-		String previousChars = textField.getPreviousChars(2);
-		String nextChars = textField.getNextChars(2);
+		String previousChars = textField.getStringBeforeCursor(2);
+		Text nextChars = textField.getTextAfterCursor(2);

 		return
 			settings.getAutoSpace()
 			&& !inputType.isSpecialized()
 			&& nextKey != 0
-			&& !TextTools.startsWithWhitespace(nextChars)
+			&& !nextChars.startsWithWhitespace()
 			&& (
 				shouldAddAfterWord(isWordAcceptedManually, previousChars, nextChars, nextKey)
 				|| shouldAddAfterPunctuation(previousChars, nextChars, nextKey)
@ -65,13 +65,13 @@ public class AutoSpace {
 	 * The rules are similar to the ones in the standard Android keyboard (with some exceptions,
 	 * because we are not using a QWERTY keyboard here).
 	 */
-	private boolean shouldAddAfterPunctuation(String previousChars, String nextChars, int nextKey) {
+	private boolean shouldAddAfterPunctuation(String previousChars, Text nextChars, int nextKey) {
 		char previousChar = previousChars.isEmpty() ? 0 : previousChars.charAt(previousChars.length() - 1);

 		return
 			nextKey != 1
-			&& !TextTools.nextIsPunctuation(nextChars)
-			&& !TextTools.startsWithNumber(nextChars)
+			&& !nextChars.nextIsPunctuation()
+			&& !nextChars.startsWithNumber()
 			&& (
 				previousChar == '.'
 				|| previousChar == ','
@ -95,12 +95,12 @@ public class AutoSpace {
 	 * shouldAddAfterWord
 	 * Similar to "shouldAddAfterPunctuation()", but determines whether to add a space after words.
 	 */
-	private boolean shouldAddAfterWord(boolean isWordAcceptedManually, String previousChars, String nextChars, int nextKey) {
+	private boolean shouldAddAfterWord(boolean isWordAcceptedManually, String previousChars, Text nextChars, int nextKey) {
 		return
 			isWordAcceptedManually // Do not add space when auto-accepting words, because it feels very confusing when typing.
 			&& nextKey != 1
 			&& nextChars.isEmpty()
-			&& TextTools.previousIsLetter(previousChars);
+			&& Text.previousIsLetter(previousChars);
 	}


--- a/app/src/main/java/io/github/sspanak/tt9/ime/modes/helpers/AutoTextCase.java
+++ b/app/src/main/java/io/github/sspanak/tt9/ime/modes/helpers/AutoTextCase.java
@ -1,6 +1,6 @@
 package io.github.sspanak.tt9.ime.modes.helpers;

-import io.github.sspanak.tt9.TextTools;
+import io.github.sspanak.tt9.languages.Text;
 import io.github.sspanak.tt9.ime.modes.InputMode;
 import io.github.sspanak.tt9.languages.Language;
 import io.github.sspanak.tt9.preferences.SettingsStore;
@ -18,20 +18,20 @@ public class AutoTextCase {
 	 * In addition to uppercase/lowercase, here we use the result from determineNextWordTextCase(),
 	 * to conveniently start sentences with capitals or whatnot.
 	 *
-	 * Also, by default we preserve any  mixed case words in the dictionary,
+	 * Also, by default we preserve any mixed case words in the dictionary,
 	 * for example: "dB", "Mb", proper names, German nouns, that always start with a capital,
 	 * or Dutch words such as: "'s-Hertogenbosch".
 	 */
-	public String adjustSuggestionTextCase(Language language, String word, int newTextCase) {
+	public String adjustSuggestionTextCase(Text word, int newTextCase) {
 		switch (newTextCase) {
 			case InputMode.CASE_UPPER:
-				return word.toUpperCase(language.getLocale());
+				return word.toUpperCase();
 			case InputMode.CASE_LOWER:
-				return word.toLowerCase(language.getLocale());
+				return word.toLowerCase();
 			case InputMode.CASE_CAPITALIZE:
-				return language.isMixedCaseWord(word) || language.isUpperCaseWord(word) ? word : language.capitalize(word);
+				return word.isMixedCase() || word.isUpperCase() ? word.toString() : word.capitalize();
 			default:
-				return word;
+				return word.toString();
 		}
 	}

@ -42,7 +42,7 @@ public class AutoTextCase {
 	 * For example, this function will return CASE_LOWER by default, but CASE_UPPER at the beginning
 	 * of a sentence.
 	 */
-	public int determineNextWordTextCase(int currentTextCase, int textFieldTextCase, String textBeforeCursor) {
+	public int determineNextWordTextCase(int currentTextCase, int textFieldTextCase, Text textBeforeCursor) {
 		if (
 			// When the setting is off, don't do any changes.
 			!settings.getAutoTextCase()
@ -63,12 +63,12 @@ public class AutoTextCase {
 		}

 		// start of sentence, excluding after "..."
-		if (TextTools.isStartOfSentence(textBeforeCursor)) {
+		if (textBeforeCursor.isStartOfSentence()) {
 			return InputMode.CASE_CAPITALIZE;
 		}

 		// this is mostly for English "I"
-		if (TextTools.isNextToWord(textBeforeCursor)) {
+		if (textBeforeCursor.isNextToWord()) {
 			return InputMode.CASE_LOWER;
 		}

--- a/app/src/main/java/io/github/sspanak/tt9/languages/Language.java
+++ b/app/src/main/java/io/github/sspanak/tt9/languages/Language.java
@ -111,7 +111,7 @@ public class Language {

 	final public String getName() {
 		if (name == null) {
-			name = locale != null ? capitalize(locale.getDisplayLanguage(locale)) : "";
+			name = new Text(this, locale.getDisplayLanguage(locale)).capitalize();
 		}

 		return name;
@ -214,35 +214,6 @@ public class Language {
 		}
 	}

-	public String capitalize(String word) {
-		if (word == null) {
-			return null;
-		}
-
-		String capitalizedWord = "";
-
-		if (!word.isEmpty()) {
-			capitalizedWord += word.substring(0, 1).toUpperCase(locale);
-		}
-
-		if (word.length() > 1) {
-			capitalizedWord += word.substring(1).toLowerCase(locale);
-		}
-
-		return capitalizedWord;
-	}
-
-	public boolean isMixedCaseWord(String word) {
-		return
-			word != null
-			&& !word.toLowerCase(locale).equals(word)
-			&& !word.toUpperCase(locale).equals(word);
-	}
-
-	public boolean isUpperCaseWord(String word) {
-		return word != null && word.toUpperCase(locale).equals(word);
-	}
-
 	public ArrayList<String> getKeyCharacters(int key, int characterGroup) {
 		if (key < 0 || key >= layout.size()) {
 			return new ArrayList<>();
--- a/app/src/main/java/io/github/sspanak/tt9/languages/Text.java
+++ b/app/src/main/java/io/github/sspanak/tt9/languages/Text.java
@ -0,0 +1,120 @@
+package io.github.sspanak.tt9.languages;
+
+import androidx.annotation.NonNull;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+import java.util.TimeZone;
+import java.util.regex.Pattern;
+
+public class Text {
+	private static final Pattern containsOtherThan1 = Pattern.compile("[02-9]");
+	private static final Pattern previousIsLetter = Pattern.compile("\\p{L}$");
+	private static final Pattern nextIsPunctuation = Pattern.compile("^\\p{Punct}");
+	private static final Pattern nextToWord = Pattern.compile("\\b$");
+	private static final Pattern startOfSentence = Pattern.compile("(?<!\\.)(^|[.?!؟¿¡])\\s+$");
+
+
+	private final Language language;
+	private final String text;
+
+	public Text(Language language, String text) {
+		this.language = language;
+		this.text = text;
+	}
+
+	public Text(String text) {
+		this.language = null;
+		this.text = text;
+	}
+
+	public String capitalize() {
+		if (language == null || text == null || text.isEmpty() || !language.hasUpperCase()) {
+			return text;
+		}
+
+		if (text.length() == 1) {
+			return text.toUpperCase(language.getLocale());
+		} else {
+			return text.substring(0, 1).toUpperCase(language.getLocale()) + text.substring(1);
+		}
+	}
+
+	public static boolean containsOtherThan1(String str) {
+		return str != null && containsOtherThan1.matcher(str).find();
+	}
+
+	public boolean isEmpty() {
+		return text == null || text.isEmpty();
+	}
+
+	public boolean isMixedCase() {
+		return
+			language != null
+			&& text != null
+			&& !text.toLowerCase(language.getLocale()).equals(text)
+			&& !text.toUpperCase(language.getLocale()).equals(text);
+	}
+
+	public boolean isNextToWord() {
+		return text != null && nextToWord.matcher(text).find();
+	}
+
+	public boolean isStartOfSentence() {
+		return text != null && startOfSentence.matcher(text).find();
+	}
+
+	public boolean isUpperCase() {
+		return language != null && text != null && text.toUpperCase(language.getLocale()).equals(text);
+	}
+
+	public boolean nextIsPunctuation() {
+		return text != null && !text.isEmpty() && nextIsPunctuation.matcher(text).find();
+	}
+
+	public static boolean previousIsLetter(String str) {
+		return str != null && previousIsLetter.matcher(str).find();
+	}
+
+	public boolean startsWithWhitespace() {
+		return text != null && !text.isEmpty() && Character.isWhitespace(text.charAt(0));
+	}
+
+	public boolean startsWithNumber() {
+		return text != null && !text.isEmpty() && Character.isDigit(text.charAt(0));
+	}
+
+	public String toLowerCase() {
+		if (text == null) {
+			return "";
+		} else {
+			return text.toLowerCase(language != null ? language.getLocale() : Locale.getDefault());
+		}
+	}
+
+	public String toUpperCase() {
+		if (text == null) {
+			return "";
+		} else {
+			return text.toUpperCase(language != null ? language.getLocale() : Locale.getDefault());
+		}
+	}
+
+	public static String unixTimestampToISODate(long timestamp) {
+		if (timestamp < 0) {
+			return "--";
+		}
+
+		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US);
+		sdf.setTimeZone(TimeZone.getDefault());
+
+		return sdf.format(new Date(timestamp));
+	}
+
+	@NonNull
+	@Override
+	public String toString() {
+		return text == null ? "" : text;
+	}
+}