fixed the .gradle file indentation and updated the Editorconfig rules
This commit is contained in:
parent
e3d0bac90f
commit
3a25c9f52f
7 changed files with 525 additions and 525 deletions
|
|
@ -1,7 +1,7 @@
|
|||
# Editor configuration, see https://editorconfig.org
|
||||
root = true
|
||||
|
||||
[*.{java,properties,xml}]
|
||||
[*.{gradle,java,properties,xml}]
|
||||
charset = utf-8
|
||||
indent_style = tab
|
||||
indent_size = 2
|
||||
|
|
|
|||
|
|
@ -5,121 +5,121 @@ import java.util.zip.ZipOutputStream
|
|||
apply from: 'dictionary-tools.gradle'
|
||||
|
||||
ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionariesOutputDir, dictionariesMetaDir ->
|
||||
int errorCount = 0
|
||||
int errorCount = 0
|
||||
|
||||
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
|
||||
def (_, sounds, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
||||
errorCount += langFileErrorCount
|
||||
if (!langFileErrorMsg.isEmpty()) {
|
||||
return langFileErrorMsg
|
||||
}
|
||||
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
|
||||
def (_, sounds, noSyllables, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
||||
errorCount += langFileErrorCount
|
||||
if (!langFileErrorMsg.isEmpty()) {
|
||||
return langFileErrorMsg
|
||||
}
|
||||
|
||||
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
|
||||
errorCount += conversionErrorCount
|
||||
if (!conversionErrorMessages.isEmpty()) {
|
||||
return conversionErrorMessages
|
||||
}
|
||||
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, noSyllables, locale, MAX_ERRORS, CSV_DELIMITER)
|
||||
errorCount += conversionErrorCount
|
||||
if (!conversionErrorMessages.isEmpty()) {
|
||||
return conversionErrorMessages
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
String errorsMsg = errorStream.reduce("", String::concat)
|
||||
if (errorsMsg) {
|
||||
throw new GradleException(errorsMsg)
|
||||
}
|
||||
String errorsMsg = errorStream.reduce("", String::concat)
|
||||
if (errorsMsg) {
|
||||
throw new GradleException(errorsMsg)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// this cannot be static, because DictionaryTools will not be visible
|
||||
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, boolean noSyllables, Locale locale, int maxErrors, String csvDelimiter) {
|
||||
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
|
||||
return [0, ""]
|
||||
}
|
||||
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
|
||||
return [0, ""]
|
||||
}
|
||||
|
||||
|
||||
int errorCount = 0
|
||||
String errorMsg = ''
|
||||
int errorCount = 0
|
||||
String errorMsg = ''
|
||||
|
||||
List<String> fileContents = csvDictionary.readLines()
|
||||
LinkedHashMap<String, ArrayList<String>> outputDictionary = new LinkedHashMap<>()
|
||||
int wordCount = 0
|
||||
List<String> fileContents = csvDictionary.readLines()
|
||||
LinkedHashMap<String, ArrayList<String>> outputDictionary = new LinkedHashMap<>()
|
||||
int wordCount = 0
|
||||
|
||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||
String line = fileContents.get(lineNumber - 1)
|
||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||
String line = fileContents.get(lineNumber - 1)
|
||||
|
||||
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||
|
||||
String digitSequence = ""
|
||||
try {
|
||||
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||
digitSequence = DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||
} catch (IllegalArgumentException e) {
|
||||
errorCount++
|
||||
errorMsg += "Dictionary '${csvDictionary.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||
}
|
||||
String digitSequence = ""
|
||||
try {
|
||||
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||
digitSequence = DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||
} catch (IllegalArgumentException e) {
|
||||
errorCount++
|
||||
errorMsg += "Dictionary '${csvDictionary.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||
}
|
||||
|
||||
if (errorCount == 0) {
|
||||
if (!outputDictionary.containsKey(digitSequence)) {
|
||||
outputDictionary.put(digitSequence, new ArrayList<>())
|
||||
}
|
||||
// prefix the frequency to sort the words later
|
||||
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
|
||||
wordCount++
|
||||
}
|
||||
}
|
||||
if (errorCount == 0) {
|
||||
if (!outputDictionary.containsKey(digitSequence)) {
|
||||
outputDictionary.put(digitSequence, new ArrayList<>())
|
||||
}
|
||||
// prefix the frequency to sort the words later
|
||||
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
|
||||
wordCount++
|
||||
}
|
||||
}
|
||||
|
||||
outputDictionary = sortDictionary(outputDictionary)
|
||||
outputDictionary = sortDictionary(outputDictionary)
|
||||
|
||||
def (assetError, zippedDictionary) = writeZippedDictionary(dictionariesOutputDir, csvDictionary, outputDictionary, outputDictionaryExtension, noSyllables)
|
||||
if (assetError) {
|
||||
errorCount++
|
||||
errorMsg += assetError
|
||||
}
|
||||
def (assetError, zippedDictionary) = writeZippedDictionary(dictionariesOutputDir, csvDictionary, outputDictionary, outputDictionaryExtension, noSyllables)
|
||||
if (assetError) {
|
||||
errorCount++
|
||||
errorMsg += assetError
|
||||
}
|
||||
|
||||
def propertiesError = writeDictionaryProperties(definition, csvDictionary, zippedDictionary, dictionariesMetaDir, outputDictionary.size(), wordCount)
|
||||
if (propertiesError) {
|
||||
errorCount++
|
||||
errorMsg += propertiesError
|
||||
}
|
||||
def propertiesError = writeDictionaryProperties(definition, csvDictionary, zippedDictionary, dictionariesMetaDir, outputDictionary.size(), wordCount)
|
||||
if (propertiesError) {
|
||||
errorCount++
|
||||
errorMsg += propertiesError
|
||||
}
|
||||
|
||||
return [errorCount, errorMsg]
|
||||
return [errorCount, errorMsg]
|
||||
}
|
||||
|
||||
|
||||
//////////////////// DICTIONARY PROCESSING ////////////////////
|
||||
|
||||
static byte[] compressDictionaryLine(String digitSequence, List<String> words, boolean noSyllables) {
|
||||
if (words.isEmpty()) {
|
||||
throw new IllegalArgumentException("No words for digit sequence: ${digitSequence}")
|
||||
}
|
||||
if (words.isEmpty()) {
|
||||
throw new IllegalArgumentException("No words for digit sequence: ${digitSequence}")
|
||||
}
|
||||
|
||||
boolean shouldSeparateWords = !noSyllables
|
||||
boolean shouldSeparateWords = !noSyllables
|
||||
|
||||
for (def i = 0; i < words.size(); i++) {
|
||||
if (words.get(i).length() != digitSequence.length()) {
|
||||
shouldSeparateWords = true
|
||||
break
|
||||
}
|
||||
}
|
||||
for (def i = 0; i < words.size(); i++) {
|
||||
if (words.get(i).length() != digitSequence.length()) {
|
||||
shouldSeparateWords = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
digitSequence +
|
||||
(shouldSeparateWords && noSyllables ? ' ' : '') + // if the language definition has sounds (aka the characters are syllables), we separate the words for sure, so the initial hint is not needed
|
||||
words.join(shouldSeparateWords ? ' ' : null)
|
||||
).getBytes(StandardCharsets.UTF_8)
|
||||
return (
|
||||
digitSequence +
|
||||
(shouldSeparateWords && noSyllables ? ' ' : '') + // if the language definition has sounds (aka the characters are syllables), we separate the words for sure, so the initial hint is not needed
|
||||
words.join(shouldSeparateWords ? ' ' : null)
|
||||
).getBytes(StandardCharsets.UTF_8)
|
||||
}
|
||||
|
||||
|
||||
def isDictionaryUpToDate(File definition, File csvDictionary, String dictionaryPropertiesDir) {
|
||||
def dictionaryProperties = new File(dictionaryPropertiesDir, getPropertyFileName(csvDictionary))
|
||||
if (!dictionaryProperties.exists()) {
|
||||
return false
|
||||
}
|
||||
def dictionaryProperties = new File(dictionaryPropertiesDir, getPropertyFileName(csvDictionary))
|
||||
if (!dictionaryProperties.exists()) {
|
||||
return false
|
||||
}
|
||||
|
||||
Properties props = new Properties()
|
||||
dictionaryProperties.withInputStream { stream -> props.load(stream) }
|
||||
Properties props = new Properties()
|
||||
dictionaryProperties.withInputStream { stream -> props.load(stream) }
|
||||
|
||||
return props.getProperty("hash", "") == DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||
return props.getProperty("hash", "") == DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -128,38 +128,38 @@ def isDictionaryUpToDate(File definition, File csvDictionary, String dictionaryP
|
|||
* Also, it removes the frequency prefix from each word. The input dictionary is not modified.
|
||||
*/
|
||||
static LinkedHashMap<String, ArrayList<String>> sortDictionary(LinkedHashMap<String, ArrayList<String>> dictionary) {
|
||||
// sort the sequences in ascending order of length, then lexicographically
|
||||
def sequences = dictionary.keySet().toList()
|
||||
Collections.sort(sequences, { a, b ->
|
||||
a.length() == b.length() ? a.compareTo(b) : a.length() - b.length()
|
||||
})
|
||||
def sortedDictionary = new LinkedHashMap<String, ArrayList<String>>()
|
||||
sequences.each { sequence -> sortedDictionary.put(sequence, dictionary.get(sequence)) }
|
||||
// sort the sequences in ascending order of length, then lexicographically
|
||||
def sequences = dictionary.keySet().toList()
|
||||
Collections.sort(sequences, { a, b ->
|
||||
a.length() == b.length() ? a.compareTo(b) : a.length() - b.length()
|
||||
})
|
||||
def sortedDictionary = new LinkedHashMap<String, ArrayList<String>>()
|
||||
sequences.each { sequence -> sortedDictionary.put(sequence, dictionary.get(sequence)) }
|
||||
|
||||
// sort the words for each sequence in descending order of frequency
|
||||
sortedDictionary.forEach { _, words -> {
|
||||
Collections.sort(words, Collections.reverseOrder())
|
||||
words.replaceAll { word -> word.replaceFirst("^\\d+", "") }
|
||||
}}
|
||||
// sort the words for each sequence in descending order of frequency
|
||||
sortedDictionary.forEach { _, words -> {
|
||||
Collections.sort(words, Collections.reverseOrder())
|
||||
words.replaceAll { word -> word.replaceFirst("^\\d+", "") }
|
||||
}}
|
||||
|
||||
return sortedDictionary
|
||||
return sortedDictionary
|
||||
}
|
||||
|
||||
|
||||
//////////////////// FILE I/O ////////////////////
|
||||
|
||||
static getDictionaryFileName(csvDictionary) {
|
||||
return "${csvDictionary.getName().replaceFirst("\\.\\w+\$", "")}"
|
||||
return "${csvDictionary.getName().replaceFirst("\\.\\w+\$", "")}"
|
||||
}
|
||||
|
||||
|
||||
static getPropertyFileName(csvDictionary) {
|
||||
return "${getDictionaryFileName(csvDictionary)}.props.yml"
|
||||
return "${getDictionaryFileName(csvDictionary)}.props.yml"
|
||||
}
|
||||
|
||||
|
||||
static getZipDictionaryFile(dictionariesOutputDir, csvDictionary, outputDictionaryExtension) {
|
||||
return new File(dictionariesOutputDir, "${getDictionaryFileName(csvDictionary)}.${outputDictionaryExtension}")
|
||||
return new File(dictionariesOutputDir, "${getDictionaryFileName(csvDictionary)}.${outputDictionaryExtension}")
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -167,37 +167,37 @@ static getZipDictionaryFile(dictionariesOutputDir, csvDictionary, outputDictiona
|
|||
* Zipping the text files results in a smaller APK in comparison to the uncompressed text files.
|
||||
*/
|
||||
static def writeZippedDictionary(dictionariesOutputDir, csvDictionaryFile, outputDictionary, outputDictionaryExtension, noSyllables) {
|
||||
def fileName = getDictionaryFileName(csvDictionaryFile)
|
||||
def outputFile = getZipDictionaryFile(dictionariesOutputDir, csvDictionaryFile, outputDictionaryExtension)
|
||||
def fileName = getDictionaryFileName(csvDictionaryFile)
|
||||
def outputFile = getZipDictionaryFile(dictionariesOutputDir, csvDictionaryFile, outputDictionaryExtension)
|
||||
|
||||
try {
|
||||
def zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile))
|
||||
zipOutputStream.putNextEntry(new ZipEntry("${fileName}.txt"))
|
||||
outputDictionary.each { digitSequence, words ->
|
||||
zipOutputStream.write(compressDictionaryLine(digitSequence, words, noSyllables))
|
||||
}
|
||||
zipOutputStream.closeEntry()
|
||||
zipOutputStream.close()
|
||||
return ["", outputFile]
|
||||
} catch (Exception e) {
|
||||
return ["Failed writing to '${outputFile.path}'. ${e.message}\n", outputFile]
|
||||
}
|
||||
try {
|
||||
def zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile))
|
||||
zipOutputStream.putNextEntry(new ZipEntry("${fileName}.txt"))
|
||||
outputDictionary.each { digitSequence, words ->
|
||||
zipOutputStream.write(compressDictionaryLine(digitSequence, words, noSyllables))
|
||||
}
|
||||
zipOutputStream.closeEntry()
|
||||
zipOutputStream.close()
|
||||
return ["", outputFile]
|
||||
} catch (Exception e) {
|
||||
return ["Failed writing to '${outputFile.path}'. ${e.message}\n", outputFile]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// this cannot be static, because it requires access to exec() and DictionaryTools
|
||||
def writeDictionaryProperties(File definition, File csvDictionary, File zipDictionary, outputDir, int sequences, int words) {
|
||||
def name = getPropertyFileName(csvDictionary)
|
||||
def name = getPropertyFileName(csvDictionary)
|
||||
|
||||
try {
|
||||
def hash = DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||
def revision = zipDictionary.exists() ? exec("git log --pretty=tformat:%H -n 1 ${zipDictionary}") : ""
|
||||
def size = zipDictionary.exists() ? zipDictionary.length() : 0
|
||||
try {
|
||||
def hash = DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||
def revision = zipDictionary.exists() ? exec("git log --pretty=tformat:%H -n 1 ${zipDictionary}") : ""
|
||||
def size = zipDictionary.exists() ? zipDictionary.length() : 0
|
||||
|
||||
new File(outputDir, name).text = "hash: ${hash}\nrevision: ${revision}\nsequences: ${sequences}\nsize: ${size}\nwords: ${words}"
|
||||
new File(outputDir, name).text = "hash: ${hash}\nrevision: ${revision}\nsequences: ${sequences}\nsize: ${size}\nwords: ${words}"
|
||||
|
||||
return ""
|
||||
} catch (Exception e) {
|
||||
return "Failed writing dictionary properties to: '${outputDir}/${name}'. ${e.message}\n"
|
||||
}
|
||||
return ""
|
||||
} catch (Exception e) {
|
||||
return "Failed writing dictionary properties to: '${outputDir}/${name}'. ${e.message}\n"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -163,4 +163,4 @@ android {
|
|||
dependencies {
|
||||
implementation 'androidx.preference:preference:1.2.1'
|
||||
implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,65 +1,65 @@
|
|||
class Wrapper {
|
||||
static def getDictionaryLineData(String line, String delimiter) {
|
||||
String[] parts = line.split(delimiter, 2)
|
||||
String word = parts[0]
|
||||
String transcription = parts.length > 1 && parts[1] =~ "^[a-zA-Z]+\$" ? parts[1] : ""
|
||||
static def getDictionaryLineData(String line, String delimiter) {
|
||||
String[] parts = line.split(delimiter, 2)
|
||||
String word = parts[0]
|
||||
String transcription = parts.length > 1 && parts[1] =~ "^[a-zA-Z]+\$" ? parts[1] : ""
|
||||
|
||||
int frequency
|
||||
try {
|
||||
int partsElement = transcription.isEmpty() ? 1 : 2
|
||||
frequency = (parts.length > partsElement ? parts[partsElement] : "0") as int
|
||||
} catch (Exception ignored) {
|
||||
frequency = -1
|
||||
}
|
||||
int frequency
|
||||
try {
|
||||
int partsElement = transcription.isEmpty() ? 1 : 2
|
||||
frequency = (parts.length > partsElement ? parts[partsElement] : "0") as int
|
||||
} catch (Exception ignored) {
|
||||
frequency = -1
|
||||
}
|
||||
|
||||
return [word, transcription, frequency]
|
||||
}
|
||||
return [word, transcription, frequency]
|
||||
}
|
||||
|
||||
|
||||
static def wordToDigitSequence(Locale locale, String word, HashMap<String, String> sounds, boolean isTranscribed) {
|
||||
def sequence = new StringBuilder()
|
||||
static def wordToDigitSequence(Locale locale, String word, HashMap<String, String> sounds, boolean isTranscribed) {
|
||||
def sequence = new StringBuilder()
|
||||
|
||||
final String normalizedWord = isTranscribed ? word : word.toUpperCase(locale)
|
||||
String currentSound = ""
|
||||
final String normalizedWord = isTranscribed ? word : word.toUpperCase(locale)
|
||||
String currentSound = ""
|
||||
|
||||
for (int i = 0, end = normalizedWord.length() - 1; i <= end; i++) {
|
||||
char currentChar = normalizedWord.charAt(i)
|
||||
char nextChar = i < end ? normalizedWord.charAt(i + 1) : 0
|
||||
int nextCharType = Character.getType(nextChar)
|
||||
for (int i = 0, end = normalizedWord.length() - 1; i <= end; i++) {
|
||||
char currentChar = normalizedWord.charAt(i)
|
||||
char nextChar = i < end ? normalizedWord.charAt(i + 1) : 0
|
||||
int nextCharType = Character.getType(nextChar)
|
||||
|
||||
currentSound += currentChar
|
||||
currentSound += currentChar
|
||||
|
||||
// charAt(i) returns "ΐ" as three separate characters, but they must be treated as one.
|
||||
if (
|
||||
locale.getLanguage() == "el"
|
||||
&& (nextCharType == Character.NON_SPACING_MARK || nextCharType == Character.ENCLOSING_MARK || nextCharType == Character.COMBINING_SPACING_MARK)
|
||||
) {
|
||||
continue
|
||||
}
|
||||
// charAt(i) returns "ΐ" as three separate characters, but they must be treated as one.
|
||||
if (
|
||||
locale.getLanguage() == "el"
|
||||
&& (nextCharType == Character.NON_SPACING_MARK || nextCharType == Character.ENCLOSING_MARK || nextCharType == Character.COMBINING_SPACING_MARK)
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (!isTranscribed || i == end || Character.isUpperCase(nextChar)) {
|
||||
if (!sounds.containsKey(currentSound)) {
|
||||
throw new IllegalArgumentException("Sound or layout entry '${currentSound}' does not belong to the language sound list: ${sounds}.")
|
||||
} else {
|
||||
sequence << sounds.get(currentSound)
|
||||
currentSound = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!isTranscribed || i == end || Character.isUpperCase(nextChar)) {
|
||||
if (!sounds.containsKey(currentSound)) {
|
||||
throw new IllegalArgumentException("Sound or layout entry '${currentSound}' does not belong to the language sound list: ${sounds}.")
|
||||
} else {
|
||||
sequence << sounds.get(currentSound)
|
||||
currentSound = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sequence.isEmpty()) {
|
||||
throw new IllegalArgumentException("The word does not contain any valid sounds.")
|
||||
}
|
||||
if (sequence.isEmpty()) {
|
||||
throw new IllegalArgumentException("The word does not contain any valid sounds.")
|
||||
}
|
||||
|
||||
return sequence.toString()
|
||||
}
|
||||
return sequence.toString()
|
||||
}
|
||||
|
||||
|
||||
static def getLanguageHash(File definitionFile, File dictionaryFile) {
|
||||
def definitionHash = definitionFile != null && definitionFile.exists() ? definitionFile.text.digest("SHA-256") : ""
|
||||
def dictionaryHash = dictionaryFile != null && dictionaryFile.exists() ? dictionaryFile.text.digest("SHA-256") : ""
|
||||
return definitionHash + dictionaryHash
|
||||
}
|
||||
static def getLanguageHash(File definitionFile, File dictionaryFile) {
|
||||
def definitionHash = definitionFile != null && definitionFile.exists() ? definitionFile.text.digest("SHA-256") : ""
|
||||
def dictionaryHash = dictionaryFile != null && dictionaryFile.exists() ? dictionaryFile.text.digest("SHA-256") : ""
|
||||
return definitionHash + dictionaryHash
|
||||
}
|
||||
}
|
||||
|
||||
ext.DictionaryTools = Wrapper
|
||||
|
|
|
|||
|
|
@ -1,172 +1,172 @@
|
|||
ext.convertHelpDocs = {markdownDir, htmlDir ->
|
||||
fileTree(markdownDir).getFiles().parallelStream().forEach { File markdownPath ->
|
||||
markdownToHtml(markdownPath.path, "${htmlDir}/${markdownPath.name.replaceAll("\\.md\$", ".html")}")
|
||||
}
|
||||
fileTree(markdownDir).getFiles().parallelStream().forEach { File markdownPath ->
|
||||
markdownToHtml(markdownPath.path, "${htmlDir}/${markdownPath.name.replaceAll("\\.md\$", ".html")}")
|
||||
}
|
||||
}
|
||||
|
||||
static markdownToHtml(markdownPath, htmlPath) {
|
||||
def text = new File(markdownPath).text
|
||||
def text = new File(markdownPath).text
|
||||
|
||||
text = convertHeaders(text)
|
||||
text = convertOrderedLists(text)
|
||||
text = convertUnorderedLists(text)
|
||||
text = convertInlineTags(text)
|
||||
text = addStylesToTags(text)
|
||||
text = insertIndex(text, generateIndex(text))
|
||||
text = removeWhitespace(text)
|
||||
text = convertHeaders(text)
|
||||
text = convertOrderedLists(text)
|
||||
text = convertUnorderedLists(text)
|
||||
text = convertInlineTags(text)
|
||||
text = addStylesToTags(text)
|
||||
text = insertIndex(text, generateIndex(text))
|
||||
text = removeWhitespace(text)
|
||||
|
||||
new File(htmlPath).text = "<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"UTF-8\"><style>${getStyles()}</style><title>Help</title></head><body>${text}</body></html>"
|
||||
new File(htmlPath).text = "<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"UTF-8\"><style>${getStyles()}</style><title>Help</title></head><body>${text}</body></html>"
|
||||
}
|
||||
|
||||
|
||||
static getStyles() {
|
||||
return "body {padding: 0 6px; background-color: #f4f4f4; color: #000;}" +
|
||||
"a {color: #225682}" +
|
||||
"a:visited {color: #644280}" +
|
||||
"li {margin: 4px 0; padding: 1px;}" +
|
||||
"p {text-align: left;}" +
|
||||
"p.wrap{word-wrap: break-word;}" +
|
||||
".toc {border: 1px solid; display: inline-block; padding: 12px 20px 12px 0; margin: 12px 0;}" +
|
||||
".toc > h3 {text-align: center; margin: 0;}" +
|
||||
"@media (prefers-color-scheme: dark) {" +
|
||||
"body { background-color: #333; color: #c8c8c8; }" +
|
||||
"a {color: #a0c1de}" +
|
||||
"a:visited {color: #d9bce1}" +
|
||||
"}"
|
||||
return "body {padding: 0 6px; background-color: #f4f4f4; color: #000;}" +
|
||||
"a {color: #225682}" +
|
||||
"a:visited {color: #644280}" +
|
||||
"li {margin: 4px 0; padding: 1px;}" +
|
||||
"p {text-align: left;}" +
|
||||
"p.wrap{word-wrap: break-word;}" +
|
||||
".toc {border: 1px solid; display: inline-block; padding: 12px 20px 12px 0; margin: 12px 0;}" +
|
||||
".toc > h3 {text-align: center; margin: 0;}" +
|
||||
"@media (prefers-color-scheme: dark) {" +
|
||||
"body { background-color: #333; color: #c8c8c8; }" +
|
||||
"a {color: #a0c1de}" +
|
||||
"a:visited {color: #d9bce1}" +
|
||||
"}"
|
||||
}
|
||||
|
||||
|
||||
static generateIndex(html) {
|
||||
def entries = html.split("\n").collect( { line ->
|
||||
def matches = line =~ "<h2 id=\"(\\S+)\">(.+)</h2>"
|
||||
if (matches.size() > 0 && matches[0].size() > 2) {
|
||||
return "<a href=\"#${matches[0][1]}\">${matches[0][2]}</a>"
|
||||
} else {
|
||||
return null
|
||||
}
|
||||
}).findAll { it != null }
|
||||
def entries = html.split("\n").collect( { line ->
|
||||
def matches = line =~ "<h2 id=\"(\\S+)\">(.+)</h2>"
|
||||
if (matches.size() > 0 && matches[0].size() > 2) {
|
||||
return "<a href=\"#${matches[0][1]}\">${matches[0][2]}</a>"
|
||||
} else {
|
||||
return null
|
||||
}
|
||||
}).findAll { it != null }
|
||||
|
||||
return "<section class=\"toc\"><h3>Contents</h3>" +
|
||||
"<ol>${entries.collect { "<li>${it}</li>" }.join("\n")}</ol>" +
|
||||
"</section>"
|
||||
return "<section class=\"toc\"><h3>Contents</h3>" +
|
||||
"<ol>${entries.collect { "<li>${it}</li>" }.join("\n")}</ol>" +
|
||||
"</section>"
|
||||
}
|
||||
|
||||
|
||||
static insertIndex(html, index) {
|
||||
return html.replaceFirst("<h2", "${index}<h2")
|
||||
return html.replaceFirst("<h2", "${index}<h2")
|
||||
}
|
||||
|
||||
|
||||
static convertHeaders(markdown) {
|
||||
def html = markdown.split("\n").collect { line ->
|
||||
if (line.startsWith("#")) {
|
||||
def headerNumber = 0
|
||||
for (int i = 0; i < line.length(); i++) {
|
||||
if (line[i] != '#') {
|
||||
headerNumber = i
|
||||
break
|
||||
}
|
||||
}
|
||||
def html = markdown.split("\n").collect { line ->
|
||||
if (line.startsWith("#")) {
|
||||
def headerNumber = 0
|
||||
for (int i = 0; i < line.length(); i++) {
|
||||
if (line[i] != '#') {
|
||||
headerNumber = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
def header = line.replaceAll("^#+", "").trim()
|
||||
def anchor = header.toLowerCase().replaceAll("[^\\d\\p{L}]+", "-").replaceAll("[\\-]+\$", "")
|
||||
def header = line.replaceAll("^#+", "").trim()
|
||||
def anchor = header.toLowerCase().replaceAll("[^\\d\\p{L}]+", "-").replaceAll("[\\-]+\$", "")
|
||||
|
||||
return "<h${headerNumber} id=\"${anchor}\">${header}</h${headerNumber}>"
|
||||
} else {
|
||||
return line
|
||||
}
|
||||
}
|
||||
return "<h${headerNumber} id=\"${anchor}\">${header}</h${headerNumber}>"
|
||||
} else {
|
||||
return line
|
||||
}
|
||||
}
|
||||
|
||||
return html.join("\n")
|
||||
return html.join("\n")
|
||||
}
|
||||
|
||||
|
||||
static convertOrderedLists(markdown) {
|
||||
def html = markdown.split("\n").collect { line ->
|
||||
if (line.matches("^\\d+\\..*")) {
|
||||
return "<li>${line.replaceAll("^\\d+\\.\\s*", "")}</li>"
|
||||
} else {
|
||||
return line
|
||||
}
|
||||
}
|
||||
def html = markdown.split("\n").collect { line ->
|
||||
if (line.matches("^\\d+\\..*")) {
|
||||
return "<li>${line.replaceAll("^\\d+\\.\\s*", "")}</li>"
|
||||
} else {
|
||||
return line
|
||||
}
|
||||
}
|
||||
|
||||
return html.join("\n").replaceAll("(?<!li>\n)<li>", "<ol><li>").replaceAll("</li>(?!\n<li)", "</li></ol>")
|
||||
return html.join("\n").replaceAll("(?<!li>\n)<li>", "<ol><li>").replaceAll("</li>(?!\n<li)", "</li></ol>")
|
||||
}
|
||||
|
||||
|
||||
static convertUnorderedLists(markdown) {
|
||||
boolean inList = false
|
||||
boolean inNestedList = false
|
||||
boolean inList = false
|
||||
boolean inNestedList = false
|
||||
|
||||
def html = ""
|
||||
def html = ""
|
||||
|
||||
markdown.split("\n").each { line ->
|
||||
def convertedLine = ""
|
||||
markdown.split("\n").each { line ->
|
||||
def convertedLine = ""
|
||||
|
||||
def innerLi = line.replaceAll("^\\s*-\\s*", "")
|
||||
def innerLi = line.replaceAll("^\\s*-\\s*", "")
|
||||
|
||||
if (line.matches("^-.*")) {
|
||||
if (!inList) {
|
||||
convertedLine += "<ul>"
|
||||
inList = true
|
||||
}
|
||||
if (line.matches("^-.*")) {
|
||||
if (!inList) {
|
||||
convertedLine += "<ul>"
|
||||
inList = true
|
||||
}
|
||||
|
||||
if (inNestedList) {
|
||||
convertedLine += "</ul></li>"
|
||||
inNestedList = false
|
||||
}
|
||||
if (inNestedList) {
|
||||
convertedLine += "</ul></li>"
|
||||
inNestedList = false
|
||||
}
|
||||
|
||||
convertedLine += "<li>${innerLi}</li>"
|
||||
} else if (line.matches("^\\s+-.*")) {
|
||||
if (!inNestedList) {
|
||||
if (html.endsWith("</li>")) {
|
||||
html = html.substring(0, html.length() - 5)
|
||||
} else if (html.endsWith("</li>\n")) {
|
||||
html = html.substring(0, html.length() - 6)
|
||||
}
|
||||
convertedLine += "<li>${innerLi}</li>"
|
||||
} else if (line.matches("^\\s+-.*")) {
|
||||
if (!inNestedList) {
|
||||
if (html.endsWith("</li>")) {
|
||||
html = html.substring(0, html.length() - 5)
|
||||
} else if (html.endsWith("</li>\n")) {
|
||||
html = html.substring(0, html.length() - 6)
|
||||
}
|
||||
|
||||
convertedLine += "<ul>"
|
||||
inNestedList = true
|
||||
}
|
||||
convertedLine += "<ul>"
|
||||
inNestedList = true
|
||||
}
|
||||
|
||||
convertedLine += "<li>${innerLi}</li>"
|
||||
} else {
|
||||
if (inNestedList) {
|
||||
inNestedList = false
|
||||
convertedLine += "</ul></li>"
|
||||
}
|
||||
convertedLine += "<li>${innerLi}</li>"
|
||||
} else {
|
||||
if (inNestedList) {
|
||||
inNestedList = false
|
||||
convertedLine += "</ul></li>"
|
||||
}
|
||||
|
||||
if (inList) {
|
||||
inList = false
|
||||
convertedLine += "</ul>"
|
||||
}
|
||||
if (inList) {
|
||||
inList = false
|
||||
convertedLine += "</ul>"
|
||||
}
|
||||
|
||||
convertedLine += line
|
||||
}
|
||||
convertedLine += line
|
||||
}
|
||||
|
||||
html += convertedLine + "\n"
|
||||
}
|
||||
html += convertedLine + "\n"
|
||||
}
|
||||
|
||||
return html
|
||||
return html
|
||||
}
|
||||
|
||||
|
||||
static convertInlineTags(markdown) {
|
||||
return markdown
|
||||
.replaceAll("\n([^\n<]+?)(\n|\$)", "<p>\$1</p>")
|
||||
.replaceAll("_([^_]+)_", "<i>\$1</i>")
|
||||
.replaceAll("[*]{2}(.+?)[*]{2}", "<b>\$1</b>")
|
||||
.replaceAll("\\[([^]]+)\\]\\(([^)]+)\\)", "<a href=\"\$2\">\$1</a>")
|
||||
.replaceAll("href=\"([^\"]+)-\"", "href=\"\$1\"")
|
||||
.replaceAll("href=\"([^\"]+?)--([^\"]+?)\"", "href=\"\$1-\$2\"")
|
||||
return markdown
|
||||
.replaceAll("\n([^\n<]+?)(\n|\$)", "<p>\$1</p>")
|
||||
.replaceAll("_([^_]+)_", "<i>\$1</i>")
|
||||
.replaceAll("[*]{2}(.+?)[*]{2}", "<b>\$1</b>")
|
||||
.replaceAll("\\[([^]]+)\\]\\(([^)]+)\\)", "<a href=\"\$2\">\$1</a>")
|
||||
.replaceAll("href=\"([^\"]+)-\"", "href=\"\$1\"")
|
||||
.replaceAll("href=\"([^\"]+?)--([^\"]+?)\"", "href=\"\$1-\$2\"")
|
||||
}
|
||||
|
||||
|
||||
static addStylesToTags(html) {
|
||||
return html.replaceAll("<p>([^<]+?googlequicksearchbox[^<]+?)</p>", "<p class=\"wrap\">\$1</p>")
|
||||
return html.replaceAll("<p>([^<]+?googlequicksearchbox[^<]+?)</p>", "<p class=\"wrap\">\$1</p>")
|
||||
}
|
||||
|
||||
|
||||
static removeWhitespace(html) {
|
||||
return html.replaceAll("\\s+", " ").replaceAll("/> <", "/><")
|
||||
return html.replaceAll("\\s+", " ").replaceAll("/> <", "/><")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,330 +2,330 @@ apply from: 'dictionary-tools.gradle'
|
|||
|
||||
|
||||
ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
||||
int errorCount = 0
|
||||
int errorCount = 0
|
||||
|
||||
def errorStream = fileTree(dir: definitionsDir).getFiles().parallelStream().map { definition ->
|
||||
if (errorCount >= MAX_ERRORS) {
|
||||
return "Too many errors! Skipping: ${definition}\n"
|
||||
}
|
||||
def errorStream = fileTree(dir: definitionsDir).getFiles().parallelStream().map { definition ->
|
||||
if (errorCount >= MAX_ERRORS) {
|
||||
return "Too many errors! Skipping: ${definition}\n"
|
||||
}
|
||||
|
||||
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
||||
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
||||
|
||||
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
|
||||
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
|
||||
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
|
||||
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
|
||||
|
||||
errorCount += langFileErrorCount
|
||||
if (!langFileErrorMsg.isEmpty()) {
|
||||
validationFile.text = "${languageHash} INVALID"
|
||||
return langFileErrorMsg
|
||||
}
|
||||
errorCount += langFileErrorCount
|
||||
if (!langFileErrorMsg.isEmpty()) {
|
||||
validationFile.text = "${languageHash} INVALID"
|
||||
return langFileErrorMsg
|
||||
}
|
||||
|
||||
if (validationFile.exists() && validationFile.text == "${languageHash} OK") {
|
||||
return ""
|
||||
}
|
||||
if (validationFile.exists() && validationFile.text == "${languageHash} OK") {
|
||||
return ""
|
||||
}
|
||||
|
||||
def (dictionaryErrorCount, dictionaryErrorMesages) = validateDictionary(dictionaryFile, alphabet, sounds, isAlphabeticLanguage, locale, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
|
||||
errorCount += dictionaryErrorCount
|
||||
if (!dictionaryErrorMesages.isEmpty()) {
|
||||
validationFile.text = "${languageHash} INVALID"
|
||||
return dictionaryErrorMesages
|
||||
}
|
||||
def (dictionaryErrorCount, dictionaryErrorMesages) = validateDictionary(dictionaryFile, alphabet, sounds, isAlphabeticLanguage, locale, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
|
||||
errorCount += dictionaryErrorCount
|
||||
if (!dictionaryErrorMesages.isEmpty()) {
|
||||
validationFile.text = "${languageHash} INVALID"
|
||||
return dictionaryErrorMesages
|
||||
}
|
||||
|
||||
validationFile.text = "${languageHash} OK"
|
||||
return ""
|
||||
}
|
||||
validationFile.text = "${languageHash} OK"
|
||||
return ""
|
||||
}
|
||||
|
||||
String errorsMsg = errorStream.reduce("", String::concat)
|
||||
if (errorsMsg) {
|
||||
throw new GradleException(errorsMsg)
|
||||
}
|
||||
String errorsMsg = errorStream.reduce("", String::concat)
|
||||
if (errorsMsg) {
|
||||
throw new GradleException(errorsMsg)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
||||
String alphabet = ''
|
||||
int layoutKey = 0
|
||||
HashMap<String, String> sounds = new HashMap<>()
|
||||
HashMap<String, String> layoutSounds = new HashMap<>()
|
||||
String alphabet = ''
|
||||
int layoutKey = 0
|
||||
HashMap<String, String> sounds = new HashMap<>()
|
||||
HashMap<String, String> layoutSounds = new HashMap<>()
|
||||
|
||||
File dictionaryFile
|
||||
int errorCount = 0
|
||||
String errorMsg = ""
|
||||
File dictionaryFile
|
||||
int errorCount = 0
|
||||
String errorMsg = ""
|
||||
|
||||
boolean hasLayout = false
|
||||
boolean hasSounds = false
|
||||
boolean isLocaleValid = false
|
||||
String localeString = ""
|
||||
String dictionaryFileName = ""
|
||||
boolean hasLayout = false
|
||||
boolean hasSounds = false
|
||||
boolean isLocaleValid = false
|
||||
String localeString = ""
|
||||
String dictionaryFileName = ""
|
||||
|
||||
for (String rawLine : languageFile.readLines()) {
|
||||
if (
|
||||
rawLine.matches("^[a-zA-Z].*")
|
||||
&& !rawLine.startsWith("abcString")
|
||||
&& !rawLine.startsWith("dictionaryFile")
|
||||
&& !rawLine.startsWith("hasSpaceBetweenWords")
|
||||
&& !rawLine.startsWith("hasUpperCase")
|
||||
&& !rawLine.startsWith("layout")
|
||||
&& !rawLine.startsWith("locale")
|
||||
&& !rawLine.startsWith("name")
|
||||
&& !rawLine.startsWith("sounds")
|
||||
) {
|
||||
def parts = rawLine.split(":")
|
||||
def property = parts.length > 0 ? parts[0] : rawLine
|
||||
for (String rawLine : languageFile.readLines()) {
|
||||
if (
|
||||
rawLine.matches("^[a-zA-Z].*")
|
||||
&& !rawLine.startsWith("abcString")
|
||||
&& !rawLine.startsWith("dictionaryFile")
|
||||
&& !rawLine.startsWith("hasSpaceBetweenWords")
|
||||
&& !rawLine.startsWith("hasUpperCase")
|
||||
&& !rawLine.startsWith("layout")
|
||||
&& !rawLine.startsWith("locale")
|
||||
&& !rawLine.startsWith("name")
|
||||
&& !rawLine.startsWith("sounds")
|
||||
) {
|
||||
def parts = rawLine.split(":")
|
||||
def property = parts.length > 0 ? parts[0] : rawLine
|
||||
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n"
|
||||
}
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Found unknown property: '${property}'.\n"
|
||||
}
|
||||
|
||||
String line = rawLine.replaceFirst("#[\\s\\S]+\$", "")
|
||||
String line = rawLine.replaceFirst("#[\\s\\S]+\$", "")
|
||||
|
||||
if (
|
||||
(line.startsWith("hasUpperCase") || line.startsWith("hasSpaceBetweenWords"))
|
||||
&& !line.endsWith("yes") && !line.endsWith("no")
|
||||
) {
|
||||
def property = line.replaceAll(":.*\$", "")
|
||||
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
||||
}
|
||||
if (
|
||||
(line.startsWith("hasUpperCase") || line.startsWith("hasSpaceBetweenWords"))
|
||||
&& !line.endsWith("yes") && !line.endsWith("no")
|
||||
) {
|
||||
def property = line.replaceAll(":.*\$", "")
|
||||
def invalidVal = line.replace("hasUpperCase:", "").trim()
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Unrecognized '${property}' value: '${invalidVal}'. Only 'yes' and 'no' are allowed.\n"
|
||||
}
|
||||
|
||||
if (line.startsWith("layout")) {
|
||||
hasLayout = true
|
||||
}
|
||||
if (line.startsWith("layout")) {
|
||||
hasLayout = true
|
||||
}
|
||||
|
||||
if (line.startsWith("sounds")) {
|
||||
hasSounds = true
|
||||
}
|
||||
if (line.startsWith("sounds")) {
|
||||
hasSounds = true
|
||||
}
|
||||
|
||||
if (line.startsWith("locale")) {
|
||||
localeString = line.replace("locale:", "").trim()
|
||||
isLocaleValid = localeString.matches("^[a-z]{2,3}(?:-[A-Z]{2})?\$")
|
||||
}
|
||||
if (line.startsWith("locale")) {
|
||||
localeString = line.replace("locale:", "").trim()
|
||||
isLocaleValid = localeString.matches("^[a-z]{2,3}(?:-[A-Z]{2})?\$")
|
||||
}
|
||||
|
||||
if (line.startsWith("dictionaryFile")) {
|
||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||
}
|
||||
if (line.startsWith("dictionaryFile")) {
|
||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||
}
|
||||
|
||||
// alphabet string
|
||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
|
||||
// alphabet string
|
||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||
lineCharacters = lineCharacters.isEmpty() ? extractAlphabetExtraCharsFromLine(languageFile.name, line) : lineCharacters
|
||||
|
||||
alphabet += lineCharacters
|
||||
alphabet += lineCharacters
|
||||
|
||||
// sounds, single letters or special characters that are treated as letters
|
||||
if (lineCharacters) {
|
||||
lineCharacters.each { letter ->
|
||||
layoutSounds.put(letter, layoutKey.toString())
|
||||
}
|
||||
}
|
||||
// sounds, single letters or special characters that are treated as letters
|
||||
if (lineCharacters) {
|
||||
lineCharacters.each { letter ->
|
||||
layoutSounds.put(letter, layoutKey.toString())
|
||||
}
|
||||
}
|
||||
|
||||
if (isLayoutLine(line)) {
|
||||
layoutKey++
|
||||
}
|
||||
if (isLayoutLine(line)) {
|
||||
layoutKey++
|
||||
}
|
||||
|
||||
// sounds, syllables
|
||||
def (sound, sequence) = extractSoundFromLine(line)
|
||||
if (!sound.isEmpty() && !sequence.isEmpty()) {
|
||||
sounds.put(sound, sequence)
|
||||
}
|
||||
}
|
||||
// sounds, syllables
|
||||
def (sound, sequence) = extractSoundFromLine(line)
|
||||
if (!sound.isEmpty() && !sequence.isEmpty()) {
|
||||
sounds.put(sound, sequence)
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasLayout) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
|
||||
}
|
||||
if (!hasLayout) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. Missing 'layout' property.\n"
|
||||
}
|
||||
|
||||
if (alphabet.isEmpty()) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
||||
}
|
||||
if (alphabet.isEmpty()) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
||||
}
|
||||
|
||||
if (hasSounds && sounds.isEmpty()) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
|
||||
}
|
||||
if (hasSounds && sounds.isEmpty()) {
|
||||
errorCount++
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
|
||||
}
|
||||
|
||||
if (!isLocaleValid) {
|
||||
errorCount++
|
||||
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. ${msg}\n"
|
||||
}
|
||||
if (!isLocaleValid) {
|
||||
errorCount++
|
||||
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
||||
errorMsg += "Language '${languageFile.name}' is invalid. ${msg}\n"
|
||||
}
|
||||
|
||||
dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}")
|
||||
if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) {
|
||||
errorCount++
|
||||
errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n"
|
||||
}
|
||||
dictionaryFile = new File("$dictionariesDir/${dictionaryFileName}")
|
||||
if (dictionaryFileName.isEmpty() || !dictionaryFile.exists()) {
|
||||
errorCount++
|
||||
errorMsg += "Could not find dictionary file: '${dictionaryFileName}' in: '${dictionariesDir}'. Make sure 'dictionaryFile' is set correctly in: '${languageFile.name}'.\n"
|
||||
}
|
||||
|
||||
String[] localeParts = localeString.split(("[-_]"))
|
||||
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
|
||||
String[] localeParts = localeString.split(("[-_]"))
|
||||
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
|
||||
|
||||
if (!hasSounds && locale != null) {
|
||||
layoutSounds.forEach { sound, sequence ->
|
||||
sounds.put(sound.toUpperCase(locale), sequence)
|
||||
}
|
||||
}
|
||||
if (!hasSounds && locale != null) {
|
||||
layoutSounds.forEach { sound, sequence ->
|
||||
sounds.put(sound.toUpperCase(locale), sequence)
|
||||
}
|
||||
}
|
||||
|
||||
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
||||
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
||||
}
|
||||
|
||||
|
||||
// this cannot be static, because DictionaryTools will not be visible
|
||||
def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, String> sounds, boolean isAlphabeticLanguage, Locale locale, int maxErrors, String csvDelimiter, int maxWordFrequency) {
|
||||
String regexSafeAlphabet = alphabet.replaceAll("([\\[\\]\\-\\.])", "")
|
||||
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${regexSafeAlphabet}\\.\\-]+\$" : "^[${regexSafeAlphabet}${regexSafeAlphabet.toUpperCase(locale)}\\.\\-]+\$"
|
||||
String regexSafeAlphabet = alphabet.replaceAll("([\\[\\]\\-\\.])", "")
|
||||
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${regexSafeAlphabet}\\.\\-]+\$" : "^[${regexSafeAlphabet}${regexSafeAlphabet.toUpperCase(locale)}\\.\\-]+\$"
|
||||
|
||||
int errorCount = 0
|
||||
String errorMsg = ''
|
||||
Set<String> uniqueWords = new HashSet<>()
|
||||
int errorCount = 0
|
||||
String errorMsg = ''
|
||||
Set<String> uniqueWords = new HashSet<>()
|
||||
|
||||
List<String> fileContents = dictionaryFile.readLines()
|
||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||
String line = fileContents.get(lineNumber - 1)
|
||||
boolean lineHasErrors = false
|
||||
List<String> fileContents = dictionaryFile.readLines()
|
||||
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||
String line = fileContents.get(lineNumber - 1)
|
||||
boolean lineHasErrors = false
|
||||
|
||||
String whiteSpaceError = validateNoWhitespace(line, lineNumber)
|
||||
if (whiteSpaceError) {
|
||||
lineHasErrors = true
|
||||
errorCount++
|
||||
errorMsg += whiteSpaceError
|
||||
}
|
||||
String whiteSpaceError = validateNoWhitespace(line, lineNumber)
|
||||
if (whiteSpaceError) {
|
||||
lineHasErrors = true
|
||||
errorCount++
|
||||
errorMsg += whiteSpaceError
|
||||
}
|
||||
|
||||
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||
|
||||
String frequencyError = validateFrequency(frequency, maxWordFrequency, dictionaryFile.name, lineNumber)
|
||||
if (frequencyError) {
|
||||
lineHasErrors = true
|
||||
errorCount++
|
||||
errorMsg += frequencyError
|
||||
}
|
||||
String frequencyError = validateFrequency(frequency, maxWordFrequency, dictionaryFile.name, lineNumber)
|
||||
if (frequencyError) {
|
||||
lineHasErrors = true
|
||||
errorCount++
|
||||
errorMsg += frequencyError
|
||||
}
|
||||
|
||||
def (wordErrorCount, wordErrors) = validateWord(word, VALID_CHARS, isAlphabeticLanguage, lineNumber, "Dictionary '${dictionaryFile.name}' is invalid")
|
||||
if (wordErrorCount > 0) {
|
||||
errorCount += wordErrorCount
|
||||
errorMsg += wordErrors
|
||||
}
|
||||
def (wordErrorCount, wordErrors) = validateWord(word, VALID_CHARS, isAlphabeticLanguage, lineNumber, "Dictionary '${dictionaryFile.name}' is invalid")
|
||||
if (wordErrorCount > 0) {
|
||||
errorCount += wordErrorCount
|
||||
errorMsg += wordErrors
|
||||
}
|
||||
|
||||
if (uniqueWords.contains(word)) {
|
||||
lineHasErrors = true
|
||||
errorCount++
|
||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
|
||||
} else {
|
||||
uniqueWords.add(word)
|
||||
}
|
||||
if (uniqueWords.contains(word)) {
|
||||
lineHasErrors = true
|
||||
errorCount++
|
||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
|
||||
} else {
|
||||
uniqueWords.add(word)
|
||||
}
|
||||
|
||||
if (lineHasErrors) {
|
||||
// the validations below make no sense if the previous ones have failed
|
||||
continue
|
||||
}
|
||||
if (lineHasErrors) {
|
||||
// the validations below make no sense if the previous ones have failed
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||
DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||
} catch (IllegalArgumentException e) {
|
||||
errorCount++
|
||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||
}
|
||||
}
|
||||
try {
|
||||
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||
DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||
} catch (IllegalArgumentException e) {
|
||||
errorCount++
|
||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||
}
|
||||
}
|
||||
|
||||
return [errorCount, errorMsg]
|
||||
return [errorCount, errorMsg]
|
||||
}
|
||||
|
||||
//////////////////// PARSING ////////////////////
|
||||
|
||||
static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
|
||||
if (languageName == null || !line.contains('PUNCTUATION') || !isLayoutLine(line)) {
|
||||
return ''
|
||||
}
|
||||
if (languageName == null || !line.contains('PUNCTUATION') || !isLayoutLine(line)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
final DEFAULT = "'-."
|
||||
final DEFAULT = "'-."
|
||||
|
||||
if (languageName.contains('Korean')) {
|
||||
return DEFAULT
|
||||
} else if (languageName.contains("Hebrew") || languageName.contains("Yiddish")) {
|
||||
return DEFAULT + '"'
|
||||
}
|
||||
if (languageName.contains('Korean')) {
|
||||
return DEFAULT
|
||||
} else if (languageName.contains("Hebrew") || languageName.contains("Yiddish")) {
|
||||
return DEFAULT + '"'
|
||||
}
|
||||
|
||||
String allChars = line
|
||||
.replaceFirst('\\].*', '')
|
||||
.replaceFirst('^\\s+- \\[', '')
|
||||
.replaceFirst("PUNCTUATION[^,\\s]*", '')
|
||||
.replace(',', '')
|
||||
.replace(' ', '')
|
||||
String allChars = line
|
||||
.replaceFirst('\\].*', '')
|
||||
.replaceFirst('^\\s+- \\[', '')
|
||||
.replaceFirst("PUNCTUATION[^,\\s]*", '')
|
||||
.replace(',', '')
|
||||
.replace(' ', '')
|
||||
|
||||
return DEFAULT + allChars
|
||||
return DEFAULT + allChars
|
||||
}
|
||||
|
||||
|
||||
static def extractAlphabetCharsFromLine(String line) {
|
||||
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
|
||||
return ''
|
||||
}
|
||||
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
||||
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
||||
}
|
||||
|
||||
|
||||
static def extractSoundFromLine(String line) {
|
||||
if (!line.matches('\\s+- \\[\\w+\\s*,\\s*\\d+\\].*')) {
|
||||
return ['', '']
|
||||
}
|
||||
if (!line.matches('\\s+- \\[\\w+\\s*,\\s*\\d+\\].*')) {
|
||||
return ['', '']
|
||||
}
|
||||
|
||||
def cleanLine = line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(' ', '')
|
||||
def parts = cleanLine.split(',')
|
||||
return parts.length > 1 ? [parts[0], parts[1]] : ['', '']
|
||||
def cleanLine = line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(' ', '')
|
||||
def parts = cleanLine.split(',')
|
||||
return parts.length > 1 ? [parts[0], parts[1]] : ['', '']
|
||||
}
|
||||
|
||||
|
||||
static def isLayoutLine(String line) {
|
||||
return line.matches('\\s+- \\[.+?\\].*') && !line.find('\\d+]')
|
||||
return line.matches('\\s+- \\[.+?\\].*') && !line.find('\\d+]')
|
||||
}
|
||||
|
||||
//////////////////// VALIDATION ////////////////////
|
||||
|
||||
static def validateNoWhitespace(String line, int lineNumber) {
|
||||
if (line == "") {
|
||||
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"
|
||||
} else if (line.contains(" ")) {
|
||||
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed.\n"
|
||||
}
|
||||
if (line == "") {
|
||||
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"
|
||||
} else if (line.contains(" ")) {
|
||||
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed.\n"
|
||||
}
|
||||
|
||||
return ''
|
||||
return ''
|
||||
}
|
||||
|
||||
|
||||
static def validateFrequency(int frequency, int maxFrequency, String dictionaryFileName, int lineNumber) {
|
||||
if (frequency < 0 || frequency > maxFrequency) {
|
||||
return "Dictionary '${dictionaryFileName}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${maxFrequency}.\n"
|
||||
}
|
||||
if (frequency < 0 || frequency > maxFrequency) {
|
||||
return "Dictionary '${dictionaryFileName}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${maxFrequency}.\n"
|
||||
}
|
||||
|
||||
return ''
|
||||
return ''
|
||||
}
|
||||
|
||||
|
||||
static def validateWord(String word, String validCharacters, boolean isAlphabeticLanguage, int lineNumber, String errorMsgPrefix) {
|
||||
int errorCount = 0
|
||||
def errors = ''
|
||||
int errorCount = 0
|
||||
def errors = ''
|
||||
|
||||
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
||||
}
|
||||
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
||||
}
|
||||
|
||||
if (word.matches("^\\P{L}+\$") && !validCharacters.contains(word)) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
||||
}
|
||||
if (word.matches("^\\P{L}+\$") && !validCharacters.contains(word)) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
||||
}
|
||||
|
||||
if (isAlphabeticLanguage && word.trim().length() == 1) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
||||
}
|
||||
if (isAlphabeticLanguage && word.trim().length() == 1) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
||||
}
|
||||
|
||||
if (errorCount == 0 && isAlphabeticLanguage && !word.matches(validCharacters)) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contains characters outside of the defined alphabet: $validCharacters.\n"
|
||||
}
|
||||
if (errorCount == 0 && isAlphabeticLanguage && !word.matches(validCharacters)) {
|
||||
errorCount++
|
||||
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contains characters outside of the defined alphabet: $validCharacters.\n"
|
||||
}
|
||||
|
||||
return [errorCount, errors]
|
||||
return [errorCount, errors]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
pluginManagement {
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
gradlePluginPortal()
|
||||
}
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
gradlePluginPortal()
|
||||
}
|
||||
}
|
||||
dependencyResolutionManagement {
|
||||
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
}
|
||||
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
}
|
||||
}
|
||||
|
||||
rootProject.name = "tt9"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue