Build scripts cleanup and dictionary loading optimization
* moved the source languages out of assets/ into their own directory (#356) * split build.gradle into several smaller files * improved word frequency validation during build time * slightly optimized dictionary loading speed using pre-calculated file size * fixed a potential crash when loading invalid assets * fixed dictionary loading progress starting at 100% then jumping to 0% when manually loading two dictionaries one after another * documentation update
This commit is contained in:
parent
d8c2f7fc15
commit
44ecb8999e
50 changed files with 367 additions and 320 deletions
16
gradle/scripts/constants.gradle
Normal file
16
gradle/scripts/constants.gradle
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
ext.LANGUAGES_DIR_NAME = 'languages'
|
||||
ext.DEFINITIONS_DIR_NAME = 'definitions'
|
||||
ext.DICTIONARIES_DIR_NAME = 'dictionaries'
|
||||
ext.DICTIONARY_SIZES_DIR_NAME = 'dictionary-sizes'
|
||||
|
||||
ext.LANGUAGES_INPUT_DIR = "${project.rootDir}/${LANGUAGES_DIR_NAME}"
|
||||
ext.DEFINITIONS_INPUT_DIR = "${LANGUAGES_INPUT_DIR}/${DEFINITIONS_DIR_NAME}"
|
||||
ext.DICTIONARIES_INPUT_DIR = "${LANGUAGES_INPUT_DIR}/${DICTIONARIES_DIR_NAME}"
|
||||
|
||||
ext.LANGUAGES_OUTPUT_DIR = "${LANGUAGES_INPUT_DIR}".replace("${project.rootDir}", "${project.rootDir}/assets")
|
||||
ext.DEFINITIONS_OUTPUT_DIR = "${DEFINITIONS_INPUT_DIR}".replace("${project.rootDir}", "${project.rootDir}/assets")
|
||||
ext.DICTIONARIES_OUTPUT_DIR = "${DICTIONARIES_INPUT_DIR}".replace("${project.rootDir}", "${project.rootDir}/assets")
|
||||
|
||||
ext.CSV_DELIMITER = ' ' // TAB
|
||||
ext.MAX_WORD_FREQUENCY = 255
|
||||
ext.MAX_ERRORS = 50
|
||||
6
gradle/scripts/dictionary-tools.gradle
Normal file
6
gradle/scripts/dictionary-tools.gradle
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
ext.getDictionarySizes = { dictionariesDir, sizesDir ->
|
||||
fileTree(dir: dictionariesDir).forEach {dictionary ->
|
||||
def dictionarySize = dictionary.exists() ? dictionary.text.split("\n").length : 0
|
||||
new File(sizesDir, "${dictionary.getName()}.size").text = dictionarySize
|
||||
}
|
||||
}
|
||||
194
gradle/scripts/validate-languages.gradle
Normal file
194
gradle/scripts/validate-languages.gradle
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
static def validateDictionaryLine(String line, int lineNumber) {
|
||||
if (line == "") {
|
||||
return "There is no word on line ${lineNumber}. Remove all empty lines."
|
||||
} else if (line.contains(" ")) {
|
||||
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed."
|
||||
}
|
||||
|
||||
return ''
|
||||
}
|
||||
|
||||
static def extractAlphabetCharsFromLine(String line) {
|
||||
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !line.matches('\\s+- \\[.+?\\].*')) {
|
||||
return ''
|
||||
}
|
||||
|
||||
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
||||
}
|
||||
|
||||
static def validateDictionaryWord(String word, int lineNumber, String validCharacters, String errorMsgPrefix) {
|
||||
int errorCount = 0
|
||||
def errors = ''
|
||||
|
||||
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
||||
}
|
||||
|
||||
if (word.matches("^\\P{L}+\$")) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
||||
}
|
||||
|
||||
if (word.matches("^.\$")) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
||||
}
|
||||
|
||||
if (errorCount == 0 && !word.matches(validCharacters)) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contain characters outside of the defined alphabet: $validCharacters.\n"
|
||||
}
|
||||
|
||||
return [errorCount, errors]
|
||||
}
|
||||
|
||||
ext.validateLanguageFiles = { definitionsDir, dictionariesDir, outputFile ->
|
||||
final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+"
|
||||
|
||||
String errors = ""
|
||||
int errorCount = 0
|
||||
|
||||
outputFile.text = ""
|
||||
|
||||
fileTree(definitionsDir).getFiles().each { File languageFile ->
|
||||
if (errorCount >= MAX_ERRORS) {
|
||||
return
|
||||
}
|
||||
|
||||
println "Validating language: ${languageFile.name}"
|
||||
|
||||
boolean isFileValid = true
|
||||
|
||||
boolean hasLayout = false
|
||||
boolean isLocaleValid = false
|
||||
String localeString = ''
|
||||
String dictionaryFileName = ''
|
||||
|
||||
String alphabet = languageFile.name.contains("Hebrew") ? '"' : ''
|
||||
|
||||
languageFile.eachLine { line ->
|
||||
if (
|
||||
line.matches("^[a-zA-Z].*")
|
||||
&& !line.startsWith("abcString")
|
||||
&& !line.startsWith("dictionaryFile")
|
||||
&& !line.startsWith("hasUpperCase")
|
||||
&& !line.startsWith("layout")
|
||||
&& !line.startsWith("locale")
|
||||
&& !line.startsWith("name")
|
||||
) {
|
||||
isFileValid = false
|
||||
def parts = line.split(":")
|
||||
def property = parts.length > 0 ? parts[0] : line
|
||||
|
||||
errorCount++
|
||||
errors += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n"
|
||||
}
|
||||
|
||||
if (line.startsWith("hasUpperCase") && !line.endsWith("yes") && !line.endsWith("no")) {
|
||||
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
errors += "Language '${languageFile.name}' is invalid. Unrecognized 'hasUpperCase' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
||||
}
|
||||
|
||||
if (line.startsWith("layout")) {
|
||||
hasLayout = true
|
||||
}
|
||||
|
||||
if (line.startsWith("locale")) {
|
||||
localeString = line.replace("locale:", "").trim()
|
||||
isLocaleValid = line.matches("^locale:\\s*[a-z]{2}(?:-[A-Z]{2})?")
|
||||
}
|
||||
|
||||
if (line.startsWith("dictionaryFile")) {
|
||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||
}
|
||||
|
||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||
alphabet += lineCharacters
|
||||
}
|
||||
|
||||
if (!hasLayout) {
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
errors += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
|
||||
}
|
||||
|
||||
if (alphabet.isEmpty()) {
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
errors += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
||||
}
|
||||
|
||||
if (!isLocaleValid) {
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
||||
errors += "Language '${languageFile.name}' is invalid. ${msg}\n"
|
||||
}
|
||||
|
||||
def dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}")
|
||||
if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) {
|
||||
errorCount++
|
||||
errors += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n"
|
||||
|
||||
outputFile.text += "${languageFile.name} INVALID \n"
|
||||
return
|
||||
}
|
||||
|
||||
def validChars = alphabet.toUpperCase() == alphabet ? "^[${alphabet}\\-']+\$" : "^[${alphabet}${alphabet.toUpperCase()}\\-']+\$"
|
||||
def uniqueWords = [:]
|
||||
int lineNumber = 0
|
||||
|
||||
dictionaryFile.eachLine {line ->
|
||||
if (errorCount >= MAX_ERRORS) {
|
||||
return
|
||||
}
|
||||
|
||||
lineNumber++
|
||||
|
||||
String error = validateDictionaryLine(line, lineNumber)
|
||||
if (!error.isEmpty()) {
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
errors += "Dictionary '${dictionaryFile.name}' is invalid. ${error}.\n"
|
||||
return
|
||||
}
|
||||
|
||||
String[] parts = line.split(CSV_DELIMITER, 2)
|
||||
String word = parts[0]
|
||||
final frequency = (parts.length > 1 ? parts[1] : "0") as int
|
||||
|
||||
if (frequency < 0 || frequency > MAX_WORD_FREQUENCY) {
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
errors += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n"
|
||||
}
|
||||
|
||||
def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, validChars, "Dictionary '${dictionaryFile.name}' is invalid")
|
||||
isFileValid = wordErrorCount > 0 ? false : isFileValid
|
||||
errorCount += wordErrorCount
|
||||
errors += wordErrors
|
||||
|
||||
String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase()
|
||||
if (uniqueWords[uniqueWordKey] != null && uniqueWords[uniqueWordKey] == true) {
|
||||
isFileValid = false
|
||||
errorCount++
|
||||
errors += "Dictionary '${dictionaryFile.name}' is invalid. Found a repeating word: '${word}' on line ${lineNumber}. Ensure all words appear only once.\n"
|
||||
} else {
|
||||
uniqueWords[uniqueWordKey] = true
|
||||
}
|
||||
|
||||
if (errorCount >= MAX_ERRORS) {
|
||||
errors += "Too many errors! Aborting.\n"
|
||||
}
|
||||
}
|
||||
|
||||
outputFile.text += "${languageFile.name} ${isFileValid ? 'OK' : 'INVALID'}\n"
|
||||
}
|
||||
|
||||
if (errors != "") {
|
||||
throw new GradleException(errors)
|
||||
}
|
||||
}
|
||||
62
gradle/scripts/version-tools.gradle
Normal file
62
gradle/scripts/version-tools.gradle
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
def execThing (String cmdStr) {
|
||||
def stdout = new ByteArrayOutputStream()
|
||||
String prefix = System.getenv("GITCMDPREFIX")
|
||||
if (prefix != null) {
|
||||
String cmd = prefix + cmdStr
|
||||
exec {
|
||||
commandLine cmd.tokenize()
|
||||
standardOutput = stdout
|
||||
}
|
||||
} else {
|
||||
exec {
|
||||
commandLine cmdStr.tokenize()
|
||||
standardOutput = stdout
|
||||
}
|
||||
}
|
||||
return stdout.toString().trim()
|
||||
}
|
||||
|
||||
def getCurrentGitHash() {
|
||||
return execThing('git log -1 --format=%h')
|
||||
}
|
||||
|
||||
def generateVersionName() {
|
||||
// major version
|
||||
String versionTagsRaw = execThing('git tag --list v[0-9]*')
|
||||
int versionTagsCount = versionTagsRaw == "" ? 0 : versionTagsRaw.split('\n').size()
|
||||
|
||||
// minor version
|
||||
String commitsSinceLastTag = "0"
|
||||
if (versionTagsCount > 1) {
|
||||
String lastVersionTag = execThing('git describe --match v[0-9]* --tags --abbrev=0')
|
||||
String gitLogResult = execThing("git log $lastVersionTag..HEAD --oneline")
|
||||
commitsSinceLastTag = gitLogResult == '' ? "0" : gitLogResult.split('\n').size()
|
||||
}
|
||||
|
||||
|
||||
// the commit we are building from
|
||||
|
||||
// beta string, if this is a beta
|
||||
String lastTagName = (execThing('git tag --list') == "") ? "" : execThing('git describe --tags --abbrev=0')
|
||||
String lastTagHash = (lastTagName == "") ? "" : execThing("git log -1 --format=%h $lastTagName")
|
||||
String betaString = lastTagHash == getCurrentGitHash() && lastTagName.contains("-beta") ? '-beta' : ''
|
||||
|
||||
return "$versionTagsCount.$commitsSinceLastTag$betaString"
|
||||
}
|
||||
|
||||
ext.getVersionName = { ->
|
||||
return generateVersionName()
|
||||
}
|
||||
|
||||
ext.getVersionCode = { ->
|
||||
String commitsCount = execThing("git rev-list --count HEAD")
|
||||
return Integer.valueOf(commitsCount)
|
||||
}
|
||||
|
||||
ext.getDebugVersion = { ->
|
||||
return "git-${getCurrentGitHash()} (debug)"
|
||||
}
|
||||
|
||||
ext.getReleaseVersion = { ->
|
||||
return "${generateVersionName()} (${getCurrentGitHash()})"
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue