1
0
Fork 0

fixed incorrect validation of words with capital letters

This commit is contained in:
sspanak 2024-05-14 11:31:00 +03:00
parent 0830c7a87b
commit e74a532e66
2 changed files with 16 additions and 9 deletions

View file

@ -119,13 +119,16 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n"
}
return [alphabet, dictionaryFile, errorCount, errorMsg]
String[] localeParts = localeString.split(("[-_]"))
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
return [alphabet, locale, dictionaryFile, errorCount, errorMsg]
}
static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) {
static def parseDictionaryFile(String alphabet, Locale locale, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) {
final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+"
final VALID_CHARS = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-\\.']+\$"
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase(locale)}\\-\\.']+\$"
final int MAX_SORTING_ERRORS = Math.ceil(MAX_ERRORS / 10)
def uniqueWords = [:]
@ -175,7 +178,7 @@ static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERR
errorCount += wordErrorCount
errorMsg += wordErrors
String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase()
String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase(locale)
if (uniqueWords[uniqueWordKey] != null && uniqueWords[uniqueWordKey] == true) {
errorCount++
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found a repeating word: '${word}' on line ${lineNumber}. Ensure all words appear only once.\n"
@ -205,14 +208,14 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
return "Too many errors! Skipping: ${languageFile}\n"
}
def (alphabet, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageFile(languageFile, dictionariesDir)
def (alphabet, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageFile(languageFile, dictionariesDir)
errorCount += langFileErrorCount
if (!langFileErrorMsg.isEmpty()) {
outputFile.text += "${contentHash} INVALID"
return langFileErrorMsg
}
def (dictionaryErrorMsg, dictionaryErrorCount) = parseDictionaryFile(alphabet, dictionaryFile, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
def (dictionaryErrorMsg, dictionaryErrorCount) = parseDictionaryFile(alphabet, locale, dictionaryFile, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
errorCount += dictionaryErrorCount
if (!dictionaryErrorMsg.isEmpty()) {
outputFile.text += "${contentHash} INVALID"

View file

@ -66,11 +66,15 @@ async function removeRepeatingWords({ fileName, locale, preferLowercase }) {
continue;
}
if (!wordMap.has(lowercaseKey)) {
if (wordMap.has(lowercaseKey) && wordMap.get(lowercaseKey) !== line) {
if (preferLowercase && lowercaseKey === line) {
wordMap.set(lowercaseKey, line);
} else if (!preferLowercase && lowercaseKey !== line) {
wordMap.set(lowercaseKey, line);
}
}
if (!preferLowercase && wordMap.has(lowercaseKey) && !wordMap.has(line)) {
if (!wordMap.has(lowercaseKey)) {
wordMap.set(lowercaseKey, line);
}
}