diff --git a/gradle/scripts/validate-languages.gradle b/gradle/scripts/validate-languages.gradle index a08fbb4e..c6ceb77a 100644 --- a/gradle/scripts/validate-languages.gradle +++ b/gradle/scripts/validate-languages.gradle @@ -1,21 +1,3 @@ -static def validateDictionaryLine(String line, int lineNumber) { - if (line == "") { - return "There is no word on line ${lineNumber}. Remove all empty lines." - } else if (line.contains(" ")) { - return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed." - } - - return '' -} - -static def extractAlphabetCharsFromLine(String line) { - if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !line.matches('\\s+- \\[.+?\\].*')) { - return '' - } - - return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '') -} - static def validateDictionaryWord(String word, int lineNumber, String validCharacters, String errorMsgPrefix) { int errorCount = 0 def errors = '' @@ -43,117 +25,122 @@ static def validateDictionaryWord(String word, int lineNumber, String validChara return [errorCount, errors] } -ext.validateLanguageFiles = { definitionsDir, dictionariesDir, outputFile -> - final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+" - String errors = "" +static def validateDictionaryLine(String line, int lineNumber) { + if (line == "") { + return "There is no word on line ${lineNumber}. Remove all empty lines." + } else if (line.contains(" ")) { + return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed." + } + + return '' +} + + +static def extractAlphabetCharsFromLine(String line) { + if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !line.matches('\\s+- \\[.+?\\].*')) { + return '' + } + + return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '') +} + + +static def parseLanguageFile(File languageFile, String dictionariesDir) { + String alphabet = languageFile.name.contains("Hebrew") ? '"' : '' + File dictionaryFile int errorCount = 0 + String errorMsg = "" - outputFile.text = "" + boolean hasLayout = false + boolean isLocaleValid = false + String localeString = "" + String dictionaryFileName = "" - fileTree(definitionsDir).getFiles().each { File languageFile -> - if (errorCount >= MAX_ERRORS) { - return - } + for (String line : languageFile.readLines()) { + if ( + line.matches("^[a-zA-Z].*") + && !line.startsWith("abcString") + && !line.startsWith("dictionaryFile") + && !line.startsWith("hasUpperCase") + && !line.startsWith("layout") + && !line.startsWith("locale") + && !line.startsWith("name") + ) { + def parts = line.split(":") + def property = parts.length > 0 ? parts[0] : line - println "Validating language: ${languageFile.name}" - - boolean isFileValid = true - - boolean hasLayout = false - boolean isLocaleValid = false - String localeString = '' - String dictionaryFileName = '' - - String alphabet = languageFile.name.contains("Hebrew") ? '"' : '' - - languageFile.eachLine { line -> - if ( - line.matches("^[a-zA-Z].*") - && !line.startsWith("abcString") - && !line.startsWith("dictionaryFile") - && !line.startsWith("hasUpperCase") - && !line.startsWith("layout") - && !line.startsWith("locale") - && !line.startsWith("name") - ) { - isFileValid = false - def parts = line.split(":") - def property = parts.length > 0 ? parts[0] : line - - errorCount++ - errors += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n" - } - - if (line.startsWith("hasUpperCase") && !line.endsWith("yes") && !line.endsWith("no")) { - def invalidVal = line.replace("hasUpperCase:", "").trim() - isFileValid = false - errorCount++ - errors += "Language '${languageFile.name}' is invalid. Unrecognized 'hasUpperCase' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n" - } - - if (line.startsWith("layout")) { - hasLayout = true - } - - if (line.startsWith("locale")) { - localeString = line.replace("locale:", "").trim() - isLocaleValid = line.matches("^locale:\\s*[a-z]{2}(?:-[A-Z]{2})?") - } - - if (line.startsWith("dictionaryFile")) { - dictionaryFileName = line.replace("dictionaryFile:", "").trim() - } - - def lineCharacters = extractAlphabetCharsFromLine(line) - alphabet += lineCharacters - } - - if (!hasLayout) { - isFileValid = false errorCount++ - errors += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n" + errorMsg += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n" } - if (alphabet.isEmpty()) { - isFileValid = false + if (line.startsWith("hasUpperCase") && !line.endsWith("yes") && !line.endsWith("no")) { + def invalidVal = line.replace("hasUpperCase:", "").trim() errorCount++ - errors += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n" + errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized 'hasUpperCase' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n" } - if (!isLocaleValid) { - isFileValid = false - errorCount++ - def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'" - errors += "Language '${languageFile.name}' is invalid. ${msg}\n" + if (line.startsWith("layout")) { + hasLayout = true } - def dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}") - if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) { - errorCount++ - errors += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n" - - outputFile.text += "${languageFile.name} INVALID \n" - return + if (line.startsWith("locale")) { + localeString = line.replace("locale:", "").trim() + isLocaleValid = line.matches("^locale:\\s*[a-z]{2}(?:-[A-Z]{2})?") } - def validChars = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-']+\$" - def uniqueWords = [:] - int lineNumber = 0 + if (line.startsWith("dictionaryFile")) { + dictionaryFileName = line.replace("dictionaryFile:", "").trim() + } - dictionaryFile.eachLine {line -> - if (errorCount >= MAX_ERRORS) { - return - } + def lineCharacters = extractAlphabetCharsFromLine(line) + alphabet += lineCharacters + } - lineNumber++ + if (!hasLayout) { + errorCount++ + errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n" + } + + if (alphabet.isEmpty()) { + errorCount++ + errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n" + } + + if (!isLocaleValid) { + errorCount++ + def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'" + errorMsg += "Language '${languageFile.name}' is invalid. ${msg}\n" + } + + dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}") + if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) { + errorCount++ + errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n" + } + + return [alphabet, dictionaryFile, errorCount, errorMsg] +} + + +static def parseDictionaryFile(String alphabet, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) { + final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+" + final VALID_CHARS = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-']+\$" + + def uniqueWords = [:] + + int errorCount = 0 + String errorMsg = "" + + def fileContents = dictionaryFile.readLines() + for (int lineNumber = 0; lineNumber < fileContents.size() && errorCount < MAX_ERRORS; lineNumber++) { + String line = fileContents.get(lineNumber) String error = validateDictionaryLine(line, lineNumber) if (!error.isEmpty()) { - isFileValid = false errorCount++ - errors += "Dictionary '${dictionaryFile.name}' is invalid. ${error}.\n" - return + errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. ${error}.\n" + break } String[] parts = line.split(CSV_DELIMITER, 2) @@ -161,34 +148,57 @@ ext.validateLanguageFiles = { definitionsDir, dictionariesDir, outputFile -> final frequency = (parts.length > 1 ? parts[1] : "0") as int if (frequency < 0 || frequency > MAX_WORD_FREQUENCY) { - isFileValid = false errorCount++ - errors += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n" + errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n" } - def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, validChars, "Dictionary '${dictionaryFile.name}' is invalid") - isFileValid = wordErrorCount > 0 ? false : isFileValid + def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, VALID_CHARS, "Dictionary '${dictionaryFile.name}' is invalid") errorCount += wordErrorCount - errors += wordErrors + errorMsg += wordErrors String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase() if (uniqueWords[uniqueWordKey] != null && uniqueWords[uniqueWordKey] == true) { - isFileValid = false errorCount++ - errors += "Dictionary '${dictionaryFile.name}' is invalid. Found a repeating word: '${word}' on line ${lineNumber}. Ensure all words appear only once.\n" + errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found a repeating word: '${word}' on line ${lineNumber}. Ensure all words appear only once.\n" } else { uniqueWords[uniqueWordKey] = true } - - if (errorCount >= MAX_ERRORS) { - errors += "Too many errors! Aborting.\n" - } } - outputFile.text += "${languageFile.name} ${isFileValid ? 'OK' : 'INVALID'}\n" + return [errorMsg, errorCount] +} + + +ext.validateLanguageFiles = { definitionsDir, dictionariesDir, outputFile -> + int errorCount = 0 + + outputFile.text = "" + + def errorStream = fileTree(definitionsDir).getFiles().parallelStream().map { File languageFile -> + if (errorCount >= MAX_ERRORS) { + return "Too many errors! Skipping: ${languageFile}\n" + } + + def (alphabet, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageFile(languageFile, dictionariesDir) + errorCount += langFileErrorCount + if (!langFileErrorMsg.isEmpty()) { + outputFile.text += "${languageFile.name} INVALID \n" + return langFileErrorMsg + } + + def (dictionaryErrorMsg, dictionaryErrorCount) = parseDictionaryFile(alphabet, dictionaryFile, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY) + errorCount += dictionaryErrorCount + if (!dictionaryErrorMsg.isEmpty()) { + outputFile.text += "${languageFile.name} INVALID \n" + return dictionaryErrorMsg + } + + outputFile.text += "${languageFile.name} OK\n" + return "" } - if (errors != "") { - throw new GradleException(errors) + String errorsMsg = errorStream.reduce("", String::concat) + if (errorsMsg) { + throw new GradleException(errorsMsg) } -} \ No newline at end of file +}