dictionary word order is now validated during build
This commit is contained in:
parent
4422d41918
commit
2418c9c4c3
1 changed files with 41 additions and 27 deletions
|
|
@ -126,20 +126,22 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
||||||
static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) {
|
static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) {
|
||||||
final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+"
|
final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+"
|
||||||
final VALID_CHARS = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-\\.']+\$"
|
final VALID_CHARS = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-\\.']+\$"
|
||||||
|
final int MAX_SORTING_ERRORS = Math.ceil(MAX_ERRORS / 10)
|
||||||
|
|
||||||
def uniqueWords = [:]
|
def uniqueWords = [:]
|
||||||
|
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
|
int sortingErrorCount = 0
|
||||||
String errorMsg = ""
|
String errorMsg = ""
|
||||||
|
|
||||||
def fileContents = dictionaryFile.readLines()
|
def fileContents = dictionaryFile.readLines()
|
||||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < MAX_ERRORS; lineNumber++) {
|
for (int lineNumber = 1, previousWordLength = 0; lineNumber <= fileContents.size() && errorCount < MAX_ERRORS; lineNumber++) {
|
||||||
String line = fileContents.get(lineNumber - 1)
|
String line = fileContents.get(lineNumber - 1)
|
||||||
|
|
||||||
String error = validateDictionaryLine(line, lineNumber)
|
String error = validateDictionaryLine(line, lineNumber)
|
||||||
if (!error.isEmpty()) {
|
if (!error.isEmpty()) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. ${error}.\n"
|
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. ${error}\n"
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -157,6 +159,18 @@ static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERR
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${parts[1]}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n"
|
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${parts[1]}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sortingErrorCount < MAX_SORTING_ERRORS && word.length() < previousWordLength) {
|
||||||
|
sortingErrorCount++
|
||||||
|
errorCount++
|
||||||
|
|
||||||
|
if (sortingErrorCount == MAX_SORTING_ERRORS) {
|
||||||
|
errorMsg += "Too many sorting errors in '${dictionaryFile.name}'. Disabling sorting check until the end of the file.\n"
|
||||||
|
} else {
|
||||||
|
errorMsg += "Dictionary '${dictionaryFile.name}' is not sorted. Word: '${word}' on line ${lineNumber} is shorter than the previous one. Ensure all words are sorted by length and sequence.\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
previousWordLength = word.length()
|
||||||
|
|
||||||
def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, VALID_CHARS, "Dictionary '${dictionaryFile.name}' is invalid")
|
def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, VALID_CHARS, "Dictionary '${dictionaryFile.name}' is invalid")
|
||||||
errorCount += wordErrorCount
|
errorCount += wordErrorCount
|
||||||
errorMsg += wordErrors
|
errorMsg += wordErrors
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue