fixed the .gradle file indentation and updated the Editorconfig rules
This commit is contained in:
parent
e3d0bac90f
commit
3a25c9f52f
7 changed files with 525 additions and 525 deletions
|
|
@ -1,7 +1,7 @@
|
||||||
# Editor configuration, see https://editorconfig.org
|
# Editor configuration, see https://editorconfig.org
|
||||||
root = true
|
root = true
|
||||||
|
|
||||||
[*.{java,properties,xml}]
|
[*.{gradle,java,properties,xml}]
|
||||||
charset = utf-8
|
charset = utf-8
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
|
|
||||||
|
|
@ -5,121 +5,121 @@ import java.util.zip.ZipOutputStream
|
||||||
apply from: 'dictionary-tools.gradle'
|
apply from: 'dictionary-tools.gradle'
|
||||||
|
|
||||||
ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionariesOutputDir, dictionariesMetaDir ->
|
ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionariesOutputDir, dictionariesMetaDir ->
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
|
|
||||||
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
|
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
|
||||||
def (_, sounds, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
def (_, sounds, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
||||||
errorCount += langFileErrorCount
|
errorCount += langFileErrorCount
|
||||||
if (!langFileErrorMsg.isEmpty()) {
|
if (!langFileErrorMsg.isEmpty()) {
|
||||||
return langFileErrorMsg
|
return langFileErrorMsg
|
||||||
}
|
}
|
||||||
|
|
||||||
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
|
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
|
||||||
errorCount += conversionErrorCount
|
errorCount += conversionErrorCount
|
||||||
if (!conversionErrorMessages.isEmpty()) {
|
if (!conversionErrorMessages.isEmpty()) {
|
||||||
return conversionErrorMessages
|
return conversionErrorMessages
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
String errorsMsg = errorStream.reduce("", String::concat)
|
String errorsMsg = errorStream.reduce("", String::concat)
|
||||||
if (errorsMsg) {
|
if (errorsMsg) {
|
||||||
throw new GradleException(errorsMsg)
|
throw new GradleException(errorsMsg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// this cannot be static, because DictionaryTools will not be visible
|
// this cannot be static, because DictionaryTools will not be visible
|
||||||
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
|
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
|
||||||
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
|
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
|
||||||
return [0, ""]
|
return [0, ""]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
String errorMsg = ''
|
String errorMsg = ''
|
||||||
|
|
||||||
List<String> fileContents = csvDictionary.readLines()
|
List<String> fileContents = csvDictionary.readLines()
|
||||||
LinkedHashMap<String, ArrayList<String>> outputDictionary = new LinkedHashMap<>()
|
LinkedHashMap<String, ArrayList<String>> outputDictionary = new LinkedHashMap<>()
|
||||||
int wordCount = 0
|
int wordCount = 0
|
||||||
|
|
||||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||||
String line = fileContents.get(lineNumber - 1)
|
String line = fileContents.get(lineNumber - 1)
|
||||||
|
|
||||||
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||||
|
|
||||||
String digitSequence = ""
|
String digitSequence = ""
|
||||||
try {
|
try {
|
||||||
def transcribedWord = transcription.isEmpty() ? word : transcription
|
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||||
digitSequence = DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
digitSequence = DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${csvDictionary.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
errorMsg += "Dictionary '${csvDictionary.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errorCount == 0) {
|
if (errorCount == 0) {
|
||||||
if (!outputDictionary.containsKey(digitSequence)) {
|
if (!outputDictionary.containsKey(digitSequence)) {
|
||||||
outputDictionary.put(digitSequence, new ArrayList<>())
|
outputDictionary.put(digitSequence, new ArrayList<>())
|
||||||
}
|
}
|
||||||
// prefix the frequency to sort the words later
|
// prefix the frequency to sort the words later
|
||||||
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
|
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
|
||||||
wordCount++
|
wordCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
outputDictionary = sortDictionary(outputDictionary)
|
outputDictionary = sortDictionary(outputDictionary)
|
||||||
|
|
||||||
def (assetError, zippedDictionary) = writeZippedDictionary(dictionariesOutputDir, csvDictionary, outputDictionary, outputDictionaryExtension, noSyllables)
|
def (assetError, zippedDictionary) = writeZippedDictionary(dictionariesOutputDir, csvDictionary, outputDictionary, outputDictionaryExtension, noSyllables)
|
||||||
if (assetError) {
|
if (assetError) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += assetError
|
errorMsg += assetError
|
||||||
}
|
}
|
||||||
|
|
||||||
def propertiesError = writeDictionaryProperties(definition, csvDictionary, zippedDictionary, dictionariesMetaDir, outputDictionary.size(), wordCount)
|
def propertiesError = writeDictionaryProperties(definition, csvDictionary, zippedDictionary, dictionariesMetaDir, outputDictionary.size(), wordCount)
|
||||||
if (propertiesError) {
|
if (propertiesError) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += propertiesError
|
errorMsg += propertiesError
|
||||||
}
|
}
|
||||||
|
|
||||||
return [errorCount, errorMsg]
|
return [errorCount, errorMsg]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////// DICTIONARY PROCESSING ////////////////////
|
//////////////////// DICTIONARY PROCESSING ////////////////////
|
||||||
|
|
||||||
static byte[] compressDictionaryLine(String digitSequence, List<String> words, boolean noSyllables) {
|
static byte[] compressDictionaryLine(String digitSequence, List<String> words, boolean noSyllables) {
|
||||||
if (words.isEmpty()) {
|
if (words.isEmpty()) {
|
||||||
throw new IllegalArgumentException("No words for digit sequence: ${digitSequence}")
|
throw new IllegalArgumentException("No words for digit sequence: ${digitSequence}")
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean shouldSeparateWords = !noSyllables
|
boolean shouldSeparateWords = !noSyllables
|
||||||
|
|
||||||
for (def i = 0; i < words.size(); i++) {
|
for (def i = 0; i < words.size(); i++) {
|
||||||
if (words.get(i).length() != digitSequence.length()) {
|
if (words.get(i).length() != digitSequence.length()) {
|
||||||
shouldSeparateWords = true
|
shouldSeparateWords = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
digitSequence +
|
digitSequence +
|
||||||
(shouldSeparateWords && noSyllables ? ' ' : '') + // if the language definition has sounds (aka the characters are syllables), we separate the words for sure, so the initial hint is not needed
|
(shouldSeparateWords && noSyllables ? ' ' : '') + // if the language definition has sounds (aka the characters are syllables), we separate the words for sure, so the initial hint is not needed
|
||||||
words.join(shouldSeparateWords ? ' ' : null)
|
words.join(shouldSeparateWords ? ' ' : null)
|
||||||
).getBytes(StandardCharsets.UTF_8)
|
).getBytes(StandardCharsets.UTF_8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def isDictionaryUpToDate(File definition, File csvDictionary, String dictionaryPropertiesDir) {
|
def isDictionaryUpToDate(File definition, File csvDictionary, String dictionaryPropertiesDir) {
|
||||||
def dictionaryProperties = new File(dictionaryPropertiesDir, getPropertyFileName(csvDictionary))
|
def dictionaryProperties = new File(dictionaryPropertiesDir, getPropertyFileName(csvDictionary))
|
||||||
if (!dictionaryProperties.exists()) {
|
if (!dictionaryProperties.exists()) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
Properties props = new Properties()
|
Properties props = new Properties()
|
||||||
dictionaryProperties.withInputStream { stream -> props.load(stream) }
|
dictionaryProperties.withInputStream { stream -> props.load(stream) }
|
||||||
|
|
||||||
return props.getProperty("hash", "") == DictionaryTools.getLanguageHash(definition, csvDictionary)
|
return props.getProperty("hash", "") == DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -128,38 +128,38 @@ def isDictionaryUpToDate(File definition, File csvDictionary, String dictionaryP
|
||||||
* Also, it removes the frequency prefix from each word. The input dictionary is not modified.
|
* Also, it removes the frequency prefix from each word. The input dictionary is not modified.
|
||||||
*/
|
*/
|
||||||
static LinkedHashMap<String, ArrayList<String>> sortDictionary(LinkedHashMap<String, ArrayList<String>> dictionary) {
|
static LinkedHashMap<String, ArrayList<String>> sortDictionary(LinkedHashMap<String, ArrayList<String>> dictionary) {
|
||||||
// sort the sequences in ascending order of length, then lexicographically
|
// sort the sequences in ascending order of length, then lexicographically
|
||||||
def sequences = dictionary.keySet().toList()
|
def sequences = dictionary.keySet().toList()
|
||||||
Collections.sort(sequences, { a, b ->
|
Collections.sort(sequences, { a, b ->
|
||||||
a.length() == b.length() ? a.compareTo(b) : a.length() - b.length()
|
a.length() == b.length() ? a.compareTo(b) : a.length() - b.length()
|
||||||
})
|
})
|
||||||
def sortedDictionary = new LinkedHashMap<String, ArrayList<String>>()
|
def sortedDictionary = new LinkedHashMap<String, ArrayList<String>>()
|
||||||
sequences.each { sequence -> sortedDictionary.put(sequence, dictionary.get(sequence)) }
|
sequences.each { sequence -> sortedDictionary.put(sequence, dictionary.get(sequence)) }
|
||||||
|
|
||||||
// sort the words for each sequence in descending order of frequency
|
// sort the words for each sequence in descending order of frequency
|
||||||
sortedDictionary.forEach { _, words -> {
|
sortedDictionary.forEach { _, words -> {
|
||||||
Collections.sort(words, Collections.reverseOrder())
|
Collections.sort(words, Collections.reverseOrder())
|
||||||
words.replaceAll { word -> word.replaceFirst("^\\d+", "") }
|
words.replaceAll { word -> word.replaceFirst("^\\d+", "") }
|
||||||
}}
|
}}
|
||||||
|
|
||||||
return sortedDictionary
|
return sortedDictionary
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////// FILE I/O ////////////////////
|
//////////////////// FILE I/O ////////////////////
|
||||||
|
|
||||||
static getDictionaryFileName(csvDictionary) {
|
static getDictionaryFileName(csvDictionary) {
|
||||||
return "${csvDictionary.getName().replaceFirst("\\.\\w+\$", "")}"
|
return "${csvDictionary.getName().replaceFirst("\\.\\w+\$", "")}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static getPropertyFileName(csvDictionary) {
|
static getPropertyFileName(csvDictionary) {
|
||||||
return "${getDictionaryFileName(csvDictionary)}.props.yml"
|
return "${getDictionaryFileName(csvDictionary)}.props.yml"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static getZipDictionaryFile(dictionariesOutputDir, csvDictionary, outputDictionaryExtension) {
|
static getZipDictionaryFile(dictionariesOutputDir, csvDictionary, outputDictionaryExtension) {
|
||||||
return new File(dictionariesOutputDir, "${getDictionaryFileName(csvDictionary)}.${outputDictionaryExtension}")
|
return new File(dictionariesOutputDir, "${getDictionaryFileName(csvDictionary)}.${outputDictionaryExtension}")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -167,37 +167,37 @@ static getZipDictionaryFile(dictionariesOutputDir, csvDictionary, outputDictiona
|
||||||
* Zipping the text files results in a smaller APK in comparison to the uncompressed text files.
|
* Zipping the text files results in a smaller APK in comparison to the uncompressed text files.
|
||||||
*/
|
*/
|
||||||
static def writeZippedDictionary(dictionariesOutputDir, csvDictionaryFile, outputDictionary, outputDictionaryExtension, noSyllables) {
|
static def writeZippedDictionary(dictionariesOutputDir, csvDictionaryFile, outputDictionary, outputDictionaryExtension, noSyllables) {
|
||||||
def fileName = getDictionaryFileName(csvDictionaryFile)
|
def fileName = getDictionaryFileName(csvDictionaryFile)
|
||||||
def outputFile = getZipDictionaryFile(dictionariesOutputDir, csvDictionaryFile, outputDictionaryExtension)
|
def outputFile = getZipDictionaryFile(dictionariesOutputDir, csvDictionaryFile, outputDictionaryExtension)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
def zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile))
|
def zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile))
|
||||||
zipOutputStream.putNextEntry(new ZipEntry("${fileName}.txt"))
|
zipOutputStream.putNextEntry(new ZipEntry("${fileName}.txt"))
|
||||||
outputDictionary.each { digitSequence, words ->
|
outputDictionary.each { digitSequence, words ->
|
||||||
zipOutputStream.write(compressDictionaryLine(digitSequence, words, noSyllables))
|
zipOutputStream.write(compressDictionaryLine(digitSequence, words, noSyllables))
|
||||||
}
|
}
|
||||||
zipOutputStream.closeEntry()
|
zipOutputStream.closeEntry()
|
||||||
zipOutputStream.close()
|
zipOutputStream.close()
|
||||||
return ["", outputFile]
|
return ["", outputFile]
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return ["Failed writing to '${outputFile.path}'. ${e.message}\n", outputFile]
|
return ["Failed writing to '${outputFile.path}'. ${e.message}\n", outputFile]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// this cannot be static, because it requires access to exec() and DictionaryTools
|
// this cannot be static, because it requires access to exec() and DictionaryTools
|
||||||
def writeDictionaryProperties(File definition, File csvDictionary, File zipDictionary, outputDir, int sequences, int words) {
|
def writeDictionaryProperties(File definition, File csvDictionary, File zipDictionary, outputDir, int sequences, int words) {
|
||||||
def name = getPropertyFileName(csvDictionary)
|
def name = getPropertyFileName(csvDictionary)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
def hash = DictionaryTools.getLanguageHash(definition, csvDictionary)
|
def hash = DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||||
def revision = zipDictionary.exists() ? exec("git log --pretty=tformat:%H -n 1 ${zipDictionary}") : ""
|
def revision = zipDictionary.exists() ? exec("git log --pretty=tformat:%H -n 1 ${zipDictionary}") : ""
|
||||||
def size = zipDictionary.exists() ? zipDictionary.length() : 0
|
def size = zipDictionary.exists() ? zipDictionary.length() : 0
|
||||||
|
|
||||||
new File(outputDir, name).text = "hash: ${hash}\nrevision: ${revision}\nsequences: ${sequences}\nsize: ${size}\nwords: ${words}"
|
new File(outputDir, name).text = "hash: ${hash}\nrevision: ${revision}\nsequences: ${sequences}\nsize: ${size}\nwords: ${words}"
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return "Failed writing dictionary properties to: '${outputDir}/${name}'. ${e.message}\n"
|
return "Failed writing dictionary properties to: '${outputDir}/${name}'. ${e.message}\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,65 +1,65 @@
|
||||||
class Wrapper {
|
class Wrapper {
|
||||||
static def getDictionaryLineData(String line, String delimiter) {
|
static def getDictionaryLineData(String line, String delimiter) {
|
||||||
String[] parts = line.split(delimiter, 2)
|
String[] parts = line.split(delimiter, 2)
|
||||||
String word = parts[0]
|
String word = parts[0]
|
||||||
String transcription = parts.length > 1 && parts[1] =~ "^[a-zA-Z]+\$" ? parts[1] : ""
|
String transcription = parts.length > 1 && parts[1] =~ "^[a-zA-Z]+\$" ? parts[1] : ""
|
||||||
|
|
||||||
int frequency
|
int frequency
|
||||||
try {
|
try {
|
||||||
int partsElement = transcription.isEmpty() ? 1 : 2
|
int partsElement = transcription.isEmpty() ? 1 : 2
|
||||||
frequency = (parts.length > partsElement ? parts[partsElement] : "0") as int
|
frequency = (parts.length > partsElement ? parts[partsElement] : "0") as int
|
||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
frequency = -1
|
frequency = -1
|
||||||
}
|
}
|
||||||
|
|
||||||
return [word, transcription, frequency]
|
return [word, transcription, frequency]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def wordToDigitSequence(Locale locale, String word, HashMap<String, String> sounds, boolean isTranscribed) {
|
static def wordToDigitSequence(Locale locale, String word, HashMap<String, String> sounds, boolean isTranscribed) {
|
||||||
def sequence = new StringBuilder()
|
def sequence = new StringBuilder()
|
||||||
|
|
||||||
final String normalizedWord = isTranscribed ? word : word.toUpperCase(locale)
|
final String normalizedWord = isTranscribed ? word : word.toUpperCase(locale)
|
||||||
String currentSound = ""
|
String currentSound = ""
|
||||||
|
|
||||||
for (int i = 0, end = normalizedWord.length() - 1; i <= end; i++) {
|
for (int i = 0, end = normalizedWord.length() - 1; i <= end; i++) {
|
||||||
char currentChar = normalizedWord.charAt(i)
|
char currentChar = normalizedWord.charAt(i)
|
||||||
char nextChar = i < end ? normalizedWord.charAt(i + 1) : 0
|
char nextChar = i < end ? normalizedWord.charAt(i + 1) : 0
|
||||||
int nextCharType = Character.getType(nextChar)
|
int nextCharType = Character.getType(nextChar)
|
||||||
|
|
||||||
currentSound += currentChar
|
currentSound += currentChar
|
||||||
|
|
||||||
// charAt(i) returns "ΐ" as three separate characters, but they must be treated as one.
|
// charAt(i) returns "ΐ" as three separate characters, but they must be treated as one.
|
||||||
if (
|
if (
|
||||||
locale.getLanguage() == "el"
|
locale.getLanguage() == "el"
|
||||||
&& (nextCharType == Character.NON_SPACING_MARK || nextCharType == Character.ENCLOSING_MARK || nextCharType == Character.COMBINING_SPACING_MARK)
|
&& (nextCharType == Character.NON_SPACING_MARK || nextCharType == Character.ENCLOSING_MARK || nextCharType == Character.COMBINING_SPACING_MARK)
|
||||||
) {
|
) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isTranscribed || i == end || Character.isUpperCase(nextChar)) {
|
if (!isTranscribed || i == end || Character.isUpperCase(nextChar)) {
|
||||||
if (!sounds.containsKey(currentSound)) {
|
if (!sounds.containsKey(currentSound)) {
|
||||||
throw new IllegalArgumentException("Sound or layout entry '${currentSound}' does not belong to the language sound list: ${sounds}.")
|
throw new IllegalArgumentException("Sound or layout entry '${currentSound}' does not belong to the language sound list: ${sounds}.")
|
||||||
} else {
|
} else {
|
||||||
sequence << sounds.get(currentSound)
|
sequence << sounds.get(currentSound)
|
||||||
currentSound = ""
|
currentSound = ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sequence.isEmpty()) {
|
if (sequence.isEmpty()) {
|
||||||
throw new IllegalArgumentException("The word does not contain any valid sounds.")
|
throw new IllegalArgumentException("The word does not contain any valid sounds.")
|
||||||
}
|
}
|
||||||
|
|
||||||
return sequence.toString()
|
return sequence.toString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def getLanguageHash(File definitionFile, File dictionaryFile) {
|
static def getLanguageHash(File definitionFile, File dictionaryFile) {
|
||||||
def definitionHash = definitionFile != null && definitionFile.exists() ? definitionFile.text.digest("SHA-256") : ""
|
def definitionHash = definitionFile != null && definitionFile.exists() ? definitionFile.text.digest("SHA-256") : ""
|
||||||
def dictionaryHash = dictionaryFile != null && dictionaryFile.exists() ? dictionaryFile.text.digest("SHA-256") : ""
|
def dictionaryHash = dictionaryFile != null && dictionaryFile.exists() ? dictionaryFile.text.digest("SHA-256") : ""
|
||||||
return definitionHash + dictionaryHash
|
return definitionHash + dictionaryHash
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ext.DictionaryTools = Wrapper
|
ext.DictionaryTools = Wrapper
|
||||||
|
|
|
||||||
|
|
@ -1,172 +1,172 @@
|
||||||
ext.convertHelpDocs = {markdownDir, htmlDir ->
|
ext.convertHelpDocs = {markdownDir, htmlDir ->
|
||||||
fileTree(markdownDir).getFiles().parallelStream().forEach { File markdownPath ->
|
fileTree(markdownDir).getFiles().parallelStream().forEach { File markdownPath ->
|
||||||
markdownToHtml(markdownPath.path, "${htmlDir}/${markdownPath.name.replaceAll("\\.md\$", ".html")}")
|
markdownToHtml(markdownPath.path, "${htmlDir}/${markdownPath.name.replaceAll("\\.md\$", ".html")}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static markdownToHtml(markdownPath, htmlPath) {
|
static markdownToHtml(markdownPath, htmlPath) {
|
||||||
def text = new File(markdownPath).text
|
def text = new File(markdownPath).text
|
||||||
|
|
||||||
text = convertHeaders(text)
|
text = convertHeaders(text)
|
||||||
text = convertOrderedLists(text)
|
text = convertOrderedLists(text)
|
||||||
text = convertUnorderedLists(text)
|
text = convertUnorderedLists(text)
|
||||||
text = convertInlineTags(text)
|
text = convertInlineTags(text)
|
||||||
text = addStylesToTags(text)
|
text = addStylesToTags(text)
|
||||||
text = insertIndex(text, generateIndex(text))
|
text = insertIndex(text, generateIndex(text))
|
||||||
text = removeWhitespace(text)
|
text = removeWhitespace(text)
|
||||||
|
|
||||||
new File(htmlPath).text = "<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"UTF-8\"><style>${getStyles()}</style><title>Help</title></head><body>${text}</body></html>"
|
new File(htmlPath).text = "<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"UTF-8\"><style>${getStyles()}</style><title>Help</title></head><body>${text}</body></html>"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static getStyles() {
|
static getStyles() {
|
||||||
return "body {padding: 0 6px; background-color: #f4f4f4; color: #000;}" +
|
return "body {padding: 0 6px; background-color: #f4f4f4; color: #000;}" +
|
||||||
"a {color: #225682}" +
|
"a {color: #225682}" +
|
||||||
"a:visited {color: #644280}" +
|
"a:visited {color: #644280}" +
|
||||||
"li {margin: 4px 0; padding: 1px;}" +
|
"li {margin: 4px 0; padding: 1px;}" +
|
||||||
"p {text-align: left;}" +
|
"p {text-align: left;}" +
|
||||||
"p.wrap{word-wrap: break-word;}" +
|
"p.wrap{word-wrap: break-word;}" +
|
||||||
".toc {border: 1px solid; display: inline-block; padding: 12px 20px 12px 0; margin: 12px 0;}" +
|
".toc {border: 1px solid; display: inline-block; padding: 12px 20px 12px 0; margin: 12px 0;}" +
|
||||||
".toc > h3 {text-align: center; margin: 0;}" +
|
".toc > h3 {text-align: center; margin: 0;}" +
|
||||||
"@media (prefers-color-scheme: dark) {" +
|
"@media (prefers-color-scheme: dark) {" +
|
||||||
"body { background-color: #333; color: #c8c8c8; }" +
|
"body { background-color: #333; color: #c8c8c8; }" +
|
||||||
"a {color: #a0c1de}" +
|
"a {color: #a0c1de}" +
|
||||||
"a:visited {color: #d9bce1}" +
|
"a:visited {color: #d9bce1}" +
|
||||||
"}"
|
"}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static generateIndex(html) {
|
static generateIndex(html) {
|
||||||
def entries = html.split("\n").collect( { line ->
|
def entries = html.split("\n").collect( { line ->
|
||||||
def matches = line =~ "<h2 id=\"(\\S+)\">(.+)</h2>"
|
def matches = line =~ "<h2 id=\"(\\S+)\">(.+)</h2>"
|
||||||
if (matches.size() > 0 && matches[0].size() > 2) {
|
if (matches.size() > 0 && matches[0].size() > 2) {
|
||||||
return "<a href=\"#${matches[0][1]}\">${matches[0][2]}</a>"
|
return "<a href=\"#${matches[0][1]}\">${matches[0][2]}</a>"
|
||||||
} else {
|
} else {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
}).findAll { it != null }
|
}).findAll { it != null }
|
||||||
|
|
||||||
return "<section class=\"toc\"><h3>Contents</h3>" +
|
return "<section class=\"toc\"><h3>Contents</h3>" +
|
||||||
"<ol>${entries.collect { "<li>${it}</li>" }.join("\n")}</ol>" +
|
"<ol>${entries.collect { "<li>${it}</li>" }.join("\n")}</ol>" +
|
||||||
"</section>"
|
"</section>"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static insertIndex(html, index) {
|
static insertIndex(html, index) {
|
||||||
return html.replaceFirst("<h2", "${index}<h2")
|
return html.replaceFirst("<h2", "${index}<h2")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static convertHeaders(markdown) {
|
static convertHeaders(markdown) {
|
||||||
def html = markdown.split("\n").collect { line ->
|
def html = markdown.split("\n").collect { line ->
|
||||||
if (line.startsWith("#")) {
|
if (line.startsWith("#")) {
|
||||||
def headerNumber = 0
|
def headerNumber = 0
|
||||||
for (int i = 0; i < line.length(); i++) {
|
for (int i = 0; i < line.length(); i++) {
|
||||||
if (line[i] != '#') {
|
if (line[i] != '#') {
|
||||||
headerNumber = i
|
headerNumber = i
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def header = line.replaceAll("^#+", "").trim()
|
def header = line.replaceAll("^#+", "").trim()
|
||||||
def anchor = header.toLowerCase().replaceAll("[^\\d\\p{L}]+", "-").replaceAll("[\\-]+\$", "")
|
def anchor = header.toLowerCase().replaceAll("[^\\d\\p{L}]+", "-").replaceAll("[\\-]+\$", "")
|
||||||
|
|
||||||
return "<h${headerNumber} id=\"${anchor}\">${header}</h${headerNumber}>"
|
return "<h${headerNumber} id=\"${anchor}\">${header}</h${headerNumber}>"
|
||||||
} else {
|
} else {
|
||||||
return line
|
return line
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return html.join("\n")
|
return html.join("\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static convertOrderedLists(markdown) {
|
static convertOrderedLists(markdown) {
|
||||||
def html = markdown.split("\n").collect { line ->
|
def html = markdown.split("\n").collect { line ->
|
||||||
if (line.matches("^\\d+\\..*")) {
|
if (line.matches("^\\d+\\..*")) {
|
||||||
return "<li>${line.replaceAll("^\\d+\\.\\s*", "")}</li>"
|
return "<li>${line.replaceAll("^\\d+\\.\\s*", "")}</li>"
|
||||||
} else {
|
} else {
|
||||||
return line
|
return line
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return html.join("\n").replaceAll("(?<!li>\n)<li>", "<ol><li>").replaceAll("</li>(?!\n<li)", "</li></ol>")
|
return html.join("\n").replaceAll("(?<!li>\n)<li>", "<ol><li>").replaceAll("</li>(?!\n<li)", "</li></ol>")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static convertUnorderedLists(markdown) {
|
static convertUnorderedLists(markdown) {
|
||||||
boolean inList = false
|
boolean inList = false
|
||||||
boolean inNestedList = false
|
boolean inNestedList = false
|
||||||
|
|
||||||
def html = ""
|
def html = ""
|
||||||
|
|
||||||
markdown.split("\n").each { line ->
|
markdown.split("\n").each { line ->
|
||||||
def convertedLine = ""
|
def convertedLine = ""
|
||||||
|
|
||||||
def innerLi = line.replaceAll("^\\s*-\\s*", "")
|
def innerLi = line.replaceAll("^\\s*-\\s*", "")
|
||||||
|
|
||||||
if (line.matches("^-.*")) {
|
if (line.matches("^-.*")) {
|
||||||
if (!inList) {
|
if (!inList) {
|
||||||
convertedLine += "<ul>"
|
convertedLine += "<ul>"
|
||||||
inList = true
|
inList = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inNestedList) {
|
if (inNestedList) {
|
||||||
convertedLine += "</ul></li>"
|
convertedLine += "</ul></li>"
|
||||||
inNestedList = false
|
inNestedList = false
|
||||||
}
|
}
|
||||||
|
|
||||||
convertedLine += "<li>${innerLi}</li>"
|
convertedLine += "<li>${innerLi}</li>"
|
||||||
} else if (line.matches("^\\s+-.*")) {
|
} else if (line.matches("^\\s+-.*")) {
|
||||||
if (!inNestedList) {
|
if (!inNestedList) {
|
||||||
if (html.endsWith("</li>")) {
|
if (html.endsWith("</li>")) {
|
||||||
html = html.substring(0, html.length() - 5)
|
html = html.substring(0, html.length() - 5)
|
||||||
} else if (html.endsWith("</li>\n")) {
|
} else if (html.endsWith("</li>\n")) {
|
||||||
html = html.substring(0, html.length() - 6)
|
html = html.substring(0, html.length() - 6)
|
||||||
}
|
}
|
||||||
|
|
||||||
convertedLine += "<ul>"
|
convertedLine += "<ul>"
|
||||||
inNestedList = true
|
inNestedList = true
|
||||||
}
|
}
|
||||||
|
|
||||||
convertedLine += "<li>${innerLi}</li>"
|
convertedLine += "<li>${innerLi}</li>"
|
||||||
} else {
|
} else {
|
||||||
if (inNestedList) {
|
if (inNestedList) {
|
||||||
inNestedList = false
|
inNestedList = false
|
||||||
convertedLine += "</ul></li>"
|
convertedLine += "</ul></li>"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inList) {
|
if (inList) {
|
||||||
inList = false
|
inList = false
|
||||||
convertedLine += "</ul>"
|
convertedLine += "</ul>"
|
||||||
}
|
}
|
||||||
|
|
||||||
convertedLine += line
|
convertedLine += line
|
||||||
}
|
}
|
||||||
|
|
||||||
html += convertedLine + "\n"
|
html += convertedLine + "\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
return html
|
return html
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static convertInlineTags(markdown) {
|
static convertInlineTags(markdown) {
|
||||||
return markdown
|
return markdown
|
||||||
.replaceAll("\n([^\n<]+?)(\n|\$)", "<p>\$1</p>")
|
.replaceAll("\n([^\n<]+?)(\n|\$)", "<p>\$1</p>")
|
||||||
.replaceAll("_([^_]+)_", "<i>\$1</i>")
|
.replaceAll("_([^_]+)_", "<i>\$1</i>")
|
||||||
.replaceAll("[*]{2}(.+?)[*]{2}", "<b>\$1</b>")
|
.replaceAll("[*]{2}(.+?)[*]{2}", "<b>\$1</b>")
|
||||||
.replaceAll("\\[([^]]+)\\]\\(([^)]+)\\)", "<a href=\"\$2\">\$1</a>")
|
.replaceAll("\\[([^]]+)\\]\\(([^)]+)\\)", "<a href=\"\$2\">\$1</a>")
|
||||||
.replaceAll("href=\"([^\"]+)-\"", "href=\"\$1\"")
|
.replaceAll("href=\"([^\"]+)-\"", "href=\"\$1\"")
|
||||||
.replaceAll("href=\"([^\"]+?)--([^\"]+?)\"", "href=\"\$1-\$2\"")
|
.replaceAll("href=\"([^\"]+?)--([^\"]+?)\"", "href=\"\$1-\$2\"")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static addStylesToTags(html) {
|
static addStylesToTags(html) {
|
||||||
return html.replaceAll("<p>([^<]+?googlequicksearchbox[^<]+?)</p>", "<p class=\"wrap\">\$1</p>")
|
return html.replaceAll("<p>([^<]+?googlequicksearchbox[^<]+?)</p>", "<p class=\"wrap\">\$1</p>")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static removeWhitespace(html) {
|
static removeWhitespace(html) {
|
||||||
return html.replaceAll("\\s+", " ").replaceAll("/> <", "/><")
|
return html.replaceAll("\\s+", " ").replaceAll("/> <", "/><")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,330 +2,330 @@ apply from: 'dictionary-tools.gradle'
|
||||||
|
|
||||||
|
|
||||||
ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
|
|
||||||
def errorStream = fileTree(dir: definitionsDir).getFiles().parallelStream().map { definition ->
|
def errorStream = fileTree(dir: definitionsDir).getFiles().parallelStream().map { definition ->
|
||||||
if (errorCount >= MAX_ERRORS) {
|
if (errorCount >= MAX_ERRORS) {
|
||||||
return "Too many errors! Skipping: ${definition}\n"
|
return "Too many errors! Skipping: ${definition}\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
||||||
|
|
||||||
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
|
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
|
||||||
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
|
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
|
||||||
|
|
||||||
errorCount += langFileErrorCount
|
errorCount += langFileErrorCount
|
||||||
if (!langFileErrorMsg.isEmpty()) {
|
if (!langFileErrorMsg.isEmpty()) {
|
||||||
validationFile.text = "${languageHash} INVALID"
|
validationFile.text = "${languageHash} INVALID"
|
||||||
return langFileErrorMsg
|
return langFileErrorMsg
|
||||||
}
|
}
|
||||||
|
|
||||||
if (validationFile.exists() && validationFile.text == "${languageHash} OK") {
|
if (validationFile.exists() && validationFile.text == "${languageHash} OK") {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
def (dictionaryErrorCount, dictionaryErrorMesages) = validateDictionary(dictionaryFile, alphabet, sounds, isAlphabeticLanguage, locale, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
|
def (dictionaryErrorCount, dictionaryErrorMesages) = validateDictionary(dictionaryFile, alphabet, sounds, isAlphabeticLanguage, locale, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
|
||||||
errorCount += dictionaryErrorCount
|
errorCount += dictionaryErrorCount
|
||||||
if (!dictionaryErrorMesages.isEmpty()) {
|
if (!dictionaryErrorMesages.isEmpty()) {
|
||||||
validationFile.text = "${languageHash} INVALID"
|
validationFile.text = "${languageHash} INVALID"
|
||||||
return dictionaryErrorMesages
|
return dictionaryErrorMesages
|
||||||
}
|
}
|
||||||
|
|
||||||
validationFile.text = "${languageHash} OK"
|
validationFile.text = "${languageHash} OK"
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
String errorsMsg = errorStream.reduce("", String::concat)
|
String errorsMsg = errorStream.reduce("", String::concat)
|
||||||
if (errorsMsg) {
|
if (errorsMsg) {
|
||||||
throw new GradleException(errorsMsg)
|
throw new GradleException(errorsMsg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
||||||
String alphabet = ''
|
String alphabet = ''
|
||||||
int layoutKey = 0
|
int layoutKey = 0
|
||||||
HashMap<String, String> sounds = new HashMap<>()
|
HashMap<String, String> sounds = new HashMap<>()
|
||||||
HashMap<String, String> layoutSounds = new HashMap<>()
|
HashMap<String, String> layoutSounds = new HashMap<>()
|
||||||
|
|
||||||
File dictionaryFile
|
File dictionaryFile
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
String errorMsg = ""
|
String errorMsg = ""
|
||||||
|
|
||||||
boolean hasLayout = false
|
boolean hasLayout = false
|
||||||
boolean hasSounds = false
|
boolean hasSounds = false
|
||||||
boolean isLocaleValid = false
|
boolean isLocaleValid = false
|
||||||
String localeString = ""
|
String localeString = ""
|
||||||
String dictionaryFileName = ""
|
String dictionaryFileName = ""
|
||||||
|
|
||||||
for (String rawLine : languageFile.readLines()) {
|
for (String rawLine : languageFile.readLines()) {
|
||||||
if (
|
if (
|
||||||
rawLine.matches("^[a-zA-Z].*")
|
rawLine.matches("^[a-zA-Z].*")
|
||||||
&& !rawLine.startsWith("abcString")
|
&& !rawLine.startsWith("abcString")
|
||||||
&& !rawLine.startsWith("dictionaryFile")
|
&& !rawLine.startsWith("dictionaryFile")
|
||||||
&& !rawLine.startsWith("hasSpaceBetweenWords")
|
&& !rawLine.startsWith("hasSpaceBetweenWords")
|
||||||
&& !rawLine.startsWith("hasUpperCase")
|
&& !rawLine.startsWith("hasUpperCase")
|
||||||
&& !rawLine.startsWith("layout")
|
&& !rawLine.startsWith("layout")
|
||||||
&& !rawLine.startsWith("locale")
|
&& !rawLine.startsWith("locale")
|
||||||
&& !rawLine.startsWith("name")
|
&& !rawLine.startsWith("name")
|
||||||
&& !rawLine.startsWith("sounds")
|
&& !rawLine.startsWith("sounds")
|
||||||
) {
|
) {
|
||||||
def parts = rawLine.split(":")
|
def parts = rawLine.split(":")
|
||||||
def property = parts.length > 0 ? parts[0] : rawLine
|
def property = parts.length > 0 ? parts[0] : rawLine
|
||||||
|
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
String line = rawLine.replaceFirst("#[\\s\\S]+\$", "")
|
String line = rawLine.replaceFirst("#[\\s\\S]+\$", "")
|
||||||
|
|
||||||
if (
|
if (
|
||||||
(line.startsWith("hasUpperCase") || line.startsWith("hasSpaceBetweenWords"))
|
(line.startsWith("hasUpperCase") || line.startsWith("hasSpaceBetweenWords"))
|
||||||
&& !line.endsWith("yes") && !line.endsWith("no")
|
&& !line.endsWith("yes") && !line.endsWith("no")
|
||||||
) {
|
) {
|
||||||
def property = line.replaceAll(":.*\$", "")
|
def property = line.replaceAll(":.*\$", "")
|
||||||
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.startsWith("layout")) {
|
if (line.startsWith("layout")) {
|
||||||
hasLayout = true
|
hasLayout = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.startsWith("sounds")) {
|
if (line.startsWith("sounds")) {
|
||||||
hasSounds = true
|
hasSounds = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.startsWith("locale")) {
|
if (line.startsWith("locale")) {
|
||||||
localeString = line.replace("locale:", "").trim()
|
localeString = line.replace("locale:", "").trim()
|
||||||
isLocaleValid = localeString.matches("^[a-z]{2,3}(?:-[A-Z]{2})?\$")
|
isLocaleValid = localeString.matches("^[a-z]{2,3}(?:-[A-Z]{2})?\$")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (line.startsWith("dictionaryFile")) {
|
if (line.startsWith("dictionaryFile")) {
|
||||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
// alphabet string
|
// alphabet string
|
||||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||||
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
|
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
|
||||||
|
|
||||||
alphabet += lineCharacters
|
alphabet += lineCharacters
|
||||||
|
|
||||||
// sounds, single letters or special characters that are treated as letters
|
// sounds, single letters or special characters that are treated as letters
|
||||||
if (lineCharacters) {
|
if (lineCharacters) {
|
||||||
lineCharacters.each { letter ->
|
lineCharacters.each { letter ->
|
||||||
layoutSounds.put(letter, layoutKey.toString())
|
layoutSounds.put(letter, layoutKey.toString())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isLayoutLine(line)) {
|
if (isLayoutLine(line)) {
|
||||||
layoutKey++
|
layoutKey++
|
||||||
}
|
}
|
||||||
|
|
||||||
// sounds, syllables
|
// sounds, syllables
|
||||||
def (sound, sequence) = extractSoundFromLine(line)
|
def (sound, sequence) = extractSoundFromLine(line)
|
||||||
if (!sound.isEmpty() && !sequence.isEmpty()) {
|
if (!sound.isEmpty() && !sequence.isEmpty()) {
|
||||||
sounds.put(sound, sequence)
|
sounds.put(sound, sequence)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hasLayout) {
|
if (!hasLayout) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alphabet.isEmpty()) {
|
if (alphabet.isEmpty()) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasSounds && sounds.isEmpty()) {
|
if (hasSounds && sounds.isEmpty()) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isLocaleValid) {
|
if (!isLocaleValid) {
|
||||||
errorCount++
|
errorCount++
|
||||||
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. ${msg}\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. ${msg}\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}")
|
dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}")
|
||||||
if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) {
|
if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n"
|
errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] localeParts = localeString.split(("[-_]"))
|
String[] localeParts = localeString.split(("[-_]"))
|
||||||
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
|
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
|
||||||
|
|
||||||
if (!hasSounds && locale != null) {
|
if (!hasSounds && locale != null) {
|
||||||
layoutSounds.forEach { sound, sequence ->
|
layoutSounds.forEach { sound, sequence ->
|
||||||
sounds.put(sound.toUpperCase(locale), sequence)
|
sounds.put(sound.toUpperCase(locale), sequence)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// this cannot be static, because DictionaryTools will not be visible
|
// this cannot be static, because DictionaryTools will not be visible
|
||||||
def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, String> sounds, boolean isAlphabeticLanguage, Locale locale, int maxErrors, String csvDelimiter, int maxWordFrequency) {
|
def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, String> sounds, boolean isAlphabeticLanguage, Locale locale, int maxErrors, String csvDelimiter, int maxWordFrequency) {
|
||||||
String regexSafeAlphabet = alphabet.replaceAll("([\\[\\]\\-\\.])", "")
|
String regexSafeAlphabet = alphabet.replaceAll("([\\[\\]\\-\\.])", "")
|
||||||
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${regexSafeAlphabet}\\.\\-]+\$" : "^[${regexSafeAlphabet}${regexSafeAlphabet.toUpperCase(locale)}\\.\\-]+\$"
|
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${regexSafeAlphabet}\\.\\-]+\$" : "^[${regexSafeAlphabet}${regexSafeAlphabet.toUpperCase(locale)}\\.\\-]+\$"
|
||||||
|
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
String errorMsg = ''
|
String errorMsg = ''
|
||||||
Set<String> uniqueWords = new HashSet<>()
|
Set<String> uniqueWords = new HashSet<>()
|
||||||
|
|
||||||
List<String> fileContents = dictionaryFile.readLines()
|
List<String> fileContents = dictionaryFile.readLines()
|
||||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||||
String line = fileContents.get(lineNumber - 1)
|
String line = fileContents.get(lineNumber - 1)
|
||||||
boolean lineHasErrors = false
|
boolean lineHasErrors = false
|
||||||
|
|
||||||
String whiteSpaceError = validateNoWhitespace(line, lineNumber)
|
String whiteSpaceError = validateNoWhitespace(line, lineNumber)
|
||||||
if (whiteSpaceError) {
|
if (whiteSpaceError) {
|
||||||
lineHasErrors = true
|
lineHasErrors = true
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += whiteSpaceError
|
errorMsg += whiteSpaceError
|
||||||
}
|
}
|
||||||
|
|
||||||
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||||
|
|
||||||
String frequencyError = validateFrequency(frequency, maxWordFrequency, dictionaryFile.name, lineNumber)
|
String frequencyError = validateFrequency(frequency, maxWordFrequency, dictionaryFile.name, lineNumber)
|
||||||
if (frequencyError) {
|
if (frequencyError) {
|
||||||
lineHasErrors = true
|
lineHasErrors = true
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += frequencyError
|
errorMsg += frequencyError
|
||||||
}
|
}
|
||||||
|
|
||||||
def (wordErrorCount, wordErrors) = validateWord(word, VALID_CHARS, isAlphabeticLanguage, lineNumber, "Dictionary '${dictionaryFile.name}' is invalid")
|
def (wordErrorCount, wordErrors) = validateWord(word, VALID_CHARS, isAlphabeticLanguage, lineNumber, "Dictionary '${dictionaryFile.name}' is invalid")
|
||||||
if (wordErrorCount > 0) {
|
if (wordErrorCount > 0) {
|
||||||
errorCount += wordErrorCount
|
errorCount += wordErrorCount
|
||||||
errorMsg += wordErrors
|
errorMsg += wordErrors
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uniqueWords.contains(word)) {
|
if (uniqueWords.contains(word)) {
|
||||||
lineHasErrors = true
|
lineHasErrors = true
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
|
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
|
||||||
} else {
|
} else {
|
||||||
uniqueWords.add(word)
|
uniqueWords.add(word)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lineHasErrors) {
|
if (lineHasErrors) {
|
||||||
// the validations below make no sense if the previous ones have failed
|
// the validations below make no sense if the previous ones have failed
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
def transcribedWord = transcription.isEmpty() ? word : transcription
|
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||||
DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [errorCount, errorMsg]
|
return [errorCount, errorMsg]
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////// PARSING ////////////////////
|
//////////////////// PARSING ////////////////////
|
||||||
|
|
||||||
static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
|
static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
|
||||||
if (languageName == null || !line.contains('PUNCTUATION') || !isLayoutLine(line)) {
|
if (languageName == null || !line.contains('PUNCTUATION') || !isLayoutLine(line)) {
|
||||||
return ''
|
return ''
|
||||||
}
|
}
|
||||||
|
|
||||||
final DEFAULT = "'-."
|
final DEFAULT = "'-."
|
||||||
|
|
||||||
if (languageName.contains('Korean')) {
|
if (languageName.contains('Korean')) {
|
||||||
return DEFAULT
|
return DEFAULT
|
||||||
} else if (languageName.contains("Hebrew") || languageName.contains("Yiddish")) {
|
} else if (languageName.contains("Hebrew") || languageName.contains("Yiddish")) {
|
||||||
return DEFAULT + '"'
|
return DEFAULT + '"'
|
||||||
}
|
}
|
||||||
|
|
||||||
String allChars = line
|
String allChars = line
|
||||||
.replaceFirst('\\].*', '')
|
.replaceFirst('\\].*', '')
|
||||||
.replaceFirst('^\\s+- \\[', '')
|
.replaceFirst('^\\s+- \\[', '')
|
||||||
.replaceFirst("PUNCTUATION[^,\\s]*", '')
|
.replaceFirst("PUNCTUATION[^,\\s]*", '')
|
||||||
.replace(',', '')
|
.replace(',', '')
|
||||||
.replace(' ', '')
|
.replace(' ', '')
|
||||||
|
|
||||||
return DEFAULT + allChars
|
return DEFAULT + allChars
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def extractAlphabetCharsFromLine(String line) {
|
static def extractAlphabetCharsFromLine(String line) {
|
||||||
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
|
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
|
||||||
return ''
|
return ''
|
||||||
}
|
}
|
||||||
|
|
||||||
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def extractSoundFromLine(String line) {
|
static def extractSoundFromLine(String line) {
|
||||||
if (!line.matches('\\s+- \\[\\w+\\s*,\\s*\\d+\\].*')) {
|
if (!line.matches('\\s+- \\[\\w+\\s*,\\s*\\d+\\].*')) {
|
||||||
return ['', '']
|
return ['', '']
|
||||||
}
|
}
|
||||||
|
|
||||||
def cleanLine = line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(' ', '')
|
def cleanLine = line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(' ', '')
|
||||||
def parts = cleanLine.split(',')
|
def parts = cleanLine.split(',')
|
||||||
return parts.length > 1 ? [parts[0], parts[1]] : ['', '']
|
return parts.length > 1 ? [parts[0], parts[1]] : ['', '']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def isLayoutLine(String line) {
|
static def isLayoutLine(String line) {
|
||||||
return line.matches('\\s+- \\[.+?\\].*') && !line.find('\\d+]')
|
return line.matches('\\s+- \\[.+?\\].*') && !line.find('\\d+]')
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////// VALIDATION ////////////////////
|
//////////////////// VALIDATION ////////////////////
|
||||||
|
|
||||||
static def validateNoWhitespace(String line, int lineNumber) {
|
static def validateNoWhitespace(String line, int lineNumber) {
|
||||||
if (line == "") {
|
if (line == "") {
|
||||||
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"
|
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"
|
||||||
} else if (line.contains(" ")) {
|
} else if (line.contains(" ")) {
|
||||||
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed.\n"
|
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
return ''
|
return ''
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def validateFrequency(int frequency, int maxFrequency, String dictionaryFileName, int lineNumber) {
|
static def validateFrequency(int frequency, int maxFrequency, String dictionaryFileName, int lineNumber) {
|
||||||
if (frequency < 0 || frequency > maxFrequency) {
|
if (frequency < 0 || frequency > maxFrequency) {
|
||||||
return "Dictionary '${dictionaryFileName}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${maxFrequency}.\n"
|
return "Dictionary '${dictionaryFileName}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${maxFrequency}.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
return ''
|
return ''
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def validateWord(String word, String validCharacters, boolean isAlphabeticLanguage, int lineNumber, String errorMsgPrefix) {
|
static def validateWord(String word, String validCharacters, boolean isAlphabeticLanguage, int lineNumber, String errorMsgPrefix) {
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
def errors = ''
|
def errors = ''
|
||||||
|
|
||||||
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (word.matches("^\\P{L}+\$") && !validCharacters.contains(word)) {
|
if (word.matches("^\\P{L}+\$") && !validCharacters.contains(word)) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isAlphabeticLanguage && word.trim().length() == 1) {
|
if (isAlphabeticLanguage && word.trim().length() == 1) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errorCount == 0 && isAlphabeticLanguage && !word.matches(validCharacters)) {
|
if (errorCount == 0 && isAlphabeticLanguage && !word.matches(validCharacters)) {
|
||||||
errorCount++
|
errorCount++
|
||||||
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contains characters outside of the defined alphabet: $validCharacters.\n"
|
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contains characters outside of the defined alphabet: $validCharacters.\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
return [errorCount, errors]
|
return [errorCount, errors]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,16 @@
|
||||||
pluginManagement {
|
pluginManagement {
|
||||||
repositories {
|
repositories {
|
||||||
google()
|
google()
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
gradlePluginPortal()
|
gradlePluginPortal()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dependencyResolutionManagement {
|
dependencyResolutionManagement {
|
||||||
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
||||||
repositories {
|
repositories {
|
||||||
google()
|
google()
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rootProject.name = "tt9"
|
rootProject.name = "tt9"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue