From e74a532e665c919c82c54c3fea0516d7c9581d45 Mon Sep 17 00:00:00 2001 From: sspanak Date: Tue, 14 May 2024 11:31:00 +0300 Subject: [PATCH] fixed incorrect validation of words with capital letters --- app/validate-languages.gradle | 15 +++++++++------ scripts/remove-dictionary-repeating-words.js | 10 +++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/app/validate-languages.gradle b/app/validate-languages.gradle index 4a81f777..d2e90bc8 100644 --- a/app/validate-languages.gradle +++ b/app/validate-languages.gradle @@ -119,13 +119,16 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) { errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n" } - return [alphabet, dictionaryFile, errorCount, errorMsg] + String[] localeParts = localeString.split(("[-_]")) + Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "") + + return [alphabet, locale, dictionaryFile, errorCount, errorMsg] } -static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) { +static def parseDictionaryFile(String alphabet, Locale locale, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) { final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+" - final VALID_CHARS = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-\\.']+\$" + final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase(locale)}\\-\\.']+\$" final int MAX_SORTING_ERRORS = Math.ceil(MAX_ERRORS / 10) def uniqueWords = [:] @@ -175,7 +178,7 @@ static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERR errorCount += wordErrorCount errorMsg += wordErrors - String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase() + String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase(locale) if (uniqueWords[uniqueWordKey] != null && uniqueWords[uniqueWordKey] == true) { errorCount++ errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found a repeating word: '${word}' on line ${lineNumber}. Ensure all words appear only once.\n" @@ -205,14 +208,14 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir -> return "Too many errors! Skipping: ${languageFile}\n" } - def (alphabet, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageFile(languageFile, dictionariesDir) + def (alphabet, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageFile(languageFile, dictionariesDir) errorCount += langFileErrorCount if (!langFileErrorMsg.isEmpty()) { outputFile.text += "${contentHash} INVALID" return langFileErrorMsg } - def (dictionaryErrorMsg, dictionaryErrorCount) = parseDictionaryFile(alphabet, dictionaryFile, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY) + def (dictionaryErrorMsg, dictionaryErrorCount) = parseDictionaryFile(alphabet, locale, dictionaryFile, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY) errorCount += dictionaryErrorCount if (!dictionaryErrorMsg.isEmpty()) { outputFile.text += "${contentHash} INVALID" diff --git a/scripts/remove-dictionary-repeating-words.js b/scripts/remove-dictionary-repeating-words.js index 5142ebd9..d46b0153 100644 --- a/scripts/remove-dictionary-repeating-words.js +++ b/scripts/remove-dictionary-repeating-words.js @@ -66,11 +66,15 @@ async function removeRepeatingWords({ fileName, locale, preferLowercase }) { continue; } - if (!wordMap.has(lowercaseKey)) { - wordMap.set(lowercaseKey, line); + if (wordMap.has(lowercaseKey) && wordMap.get(lowercaseKey) !== line) { + if (preferLowercase && lowercaseKey === line) { + wordMap.set(lowercaseKey, line); + } else if (!preferLowercase && lowercaseKey !== line) { + wordMap.set(lowercaseKey, line); + } } - if (!preferLowercase && wordMap.has(lowercaseKey) && !wordMap.has(line)) { + if (!wordMap.has(lowercaseKey)) { wordMap.set(lowercaseKey, line); } }