diff --git a/assets/uk-utf8.csv b/assets/uk-utf8.csv index f431addc..8e7fe3d1 100644 --- a/assets/uk-utf8.csv +++ b/assets/uk-utf8.csv @@ -4332,7 +4332,6 @@ маті 62 махи 38 маху 49 - маца 61 маци 44 мацу 51 diff --git a/build.gradle b/build.gradle index 3d2907a2..4adeda98 100644 --- a/build.gradle +++ b/build.gradle @@ -80,16 +80,41 @@ def getReleaseVersion = { -> return "${getVersionName()} (${getCurrentGitHash()})" } +def isPunctuationInWordsAllowed (String dictionaryFile) { + boolean isAllowed = false + + file("${project.projectDir}/src/io/github/sspanak/tt9/languages/definitions").listFiles().each { file -> + boolean isTheDefinitionFile = false + file.eachLine {line -> + if (line.contains(dictionaryFile)) { + isTheDefinitionFile = true + } + } + + if (isTheDefinitionFile) { + file.eachLine {line -> + if (line.matches(".+?isPunctuationPartOfWords\\s*=\\s*true.+?")) { + isAllowed = true + } + } + } + } + + return isAllowed +} + task validateDictionaries { inputs.dir fileTree(dir:'assets', excludes:['dict.properties']) outputs.file "${project.buildDir}/dict.validation.txt" doLast { - final String csvDelimiter = ' ' // TAB + final String CSV_DELIMITER = ' ' // TAB + final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+" + final PUNCTUATION_CHARS = ~".*?\\p{Punct}(? 1 ? parts[1] : "" if (frequency.length() > 0 && !frequency.matches("^\\d+\$")) { isFileValid = false errorCount++ - errors += "Dictionary '" + file.name + "' is invalid. Found out-of-range word frequency: '" + frequency + "' on line " + lineNumber + ". Frequency must be a non-negative integer. \n" + errors += "Dictionary '" + file.name + "' is invalid. Found out-of-range word frequency: '" + frequency + "' on line " + lineNumber + ". Frequency must be a non-negative integer.\n" } if (word.matches("(\\d.+?|.+?\\d|\\d)")) { isFileValid = false errorCount++ - errors += "Dictionary '" + file.name + "' is invalid. Found numbers on line " + lineNumber + ". Please, remove all numbers.\n" + errors += "Dictionary '" + file.name + "' is invalid. Found numbers on line " + lineNumber + ". Remove all numbers.\n" } if (word.matches("^\\P{L}+\$")) { @@ -141,7 +173,13 @@ task validateDictionaries { errors += "Dictionary '" + file.name + "' is invalid. Found a single letter: '" + word + "' on line " + lineNumber + ". Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n" } - String uniqueWordKey = word ==~ geographicalName ? word : word.toLowerCase() + if (!isPunctuationAllowed && word.matches(PUNCTUATION_CHARS)) { + isFileValid = false + errorCount++ + errors += "Dictionary '" + file.name + "' is invalid. Found a punctuation mark in word: '" + word + "' on line " + lineNumber + ". Remove all punctuation characters when the language definition disallows them or update the definition.\n" + } + + String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase() if (uniqueWords[uniqueWordKey] != null && uniqueWords[uniqueWordKey] == true) { isFileValid = false errorCount++ @@ -150,7 +188,7 @@ task validateDictionaries { uniqueWords[uniqueWordKey] = true } - if (errorCount >= MAX_ERRORS ) { + if (errorCount >= MAX_ERRORS) { errors += "Too many errors! Aborting.\n" } } diff --git a/src/io/github/sspanak/tt9/db/DictionaryLoader.java b/src/io/github/sspanak/tt9/db/DictionaryLoader.java index 0fa31234..a9824030 100644 --- a/src/io/github/sspanak/tt9/db/DictionaryLoader.java +++ b/src/io/github/sspanak/tt9/db/DictionaryLoader.java @@ -25,7 +25,6 @@ public class DictionaryLoader { private final AssetManager assets; private final SettingsStore settings; - private final Pattern containsPunctuation = Pattern.compile("\\p{Punct}(?