1
0
Fork 0

dictionary word order is now validated during build

This commit is contained in:
sspanak 2024-04-14 11:33:21 +03:00 committed by Dimo Karaivanov
parent 4422d41918
commit 2418c9c4c3

View file

@ -126,20 +126,22 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) {
final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+"
final VALID_CHARS = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-\\.']+\$"
final int MAX_SORTING_ERRORS = Math.ceil(MAX_ERRORS / 10)
def uniqueWords = [:]
int errorCount = 0
int sortingErrorCount = 0
String errorMsg = ""
def fileContents = dictionaryFile.readLines()
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < MAX_ERRORS; lineNumber++) {
for (int lineNumber = 1, previousWordLength = 0; lineNumber <= fileContents.size() && errorCount < MAX_ERRORS; lineNumber++) {
String line = fileContents.get(lineNumber - 1)
String error = validateDictionaryLine(line, lineNumber)
if (!error.isEmpty()) {
errorCount++
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. ${error}.\n"
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. ${error}\n"
break
}
@ -157,6 +159,18 @@ static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERR
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${parts[1]}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n"
}
if (sortingErrorCount < MAX_SORTING_ERRORS && word.length() < previousWordLength) {
sortingErrorCount++
errorCount++
if (sortingErrorCount == MAX_SORTING_ERRORS) {
errorMsg += "Too many sorting errors in '${dictionaryFile.name}'. Disabling sorting check until the end of the file.\n"
} else {
errorMsg += "Dictionary '${dictionaryFile.name}' is not sorted. Word: '${word}' on line ${lineNumber} is shorter than the previous one. Ensure all words are sorted by length and sequence.\n"
}
}
previousWordLength = word.length()
def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, VALID_CHARS, "Dictionary '${dictionaryFile.name}' is invalid")
errorCount += wordErrorCount
errorMsg += wordErrors