1
0
Fork 0
tt9/app/dictionary-tools.gradle
Dimo Karaivanov da5b4f17b7
New dictionary format (#662)
* new dictionary format that supports syllabaries

* optimized the dictionary build cache significantly to truly build only the changed language files

* code style fixes
2024-11-06 10:43:16 +02:00

65 lines
2.5 KiB
Groovy

class Wrapper {
static def getDictionaryLineData(String line, String delimiter) {
String[] parts = line.split(delimiter, 2)
String word = parts[0]
String transcription = parts.length > 1 && parts[1] =~ "^[a-zA-Z]+\$" ? parts[1] : ""
int frequency
try {
int partsElement = transcription.isEmpty() ? 1 : 2
frequency = (parts.length > partsElement ? parts[partsElement] : "0") as int
} catch (Exception ignored) {
frequency = -1
}
return [word, transcription, frequency]
}
static def wordToDigitSequence(Locale locale, String word, HashMap<String, String> sounds, boolean isTranscribed) {
String sequence = ""
final String normalizedWord = isTranscribed ? word : word.toUpperCase(locale)
String currentSound = ""
for (int i = 0, end = normalizedWord.length() - 1; i <= end; i++) {
char currentChar = normalizedWord.charAt(i)
char nextChar = i < end ? normalizedWord.charAt(i + 1) : 0
int nextCharType = Character.getType(nextChar)
currentSound += currentChar
// charAt(i) returns "ΐ" as three separate characters, but they must be treated as one.
if (
locale.getLanguage() == "el"
&& (nextCharType == Character.NON_SPACING_MARK || nextCharType == Character.ENCLOSING_MARK || nextCharType == Character.COMBINING_SPACING_MARK)
) {
continue
}
if (!isTranscribed || i == end || Character.isUpperCase(nextChar)) {
if (!sounds.containsKey(currentSound)) {
throw new IllegalArgumentException("Sound or layout entry '${currentSound}' does not belong to the language sound list: ${sounds}.")
} else {
sequence += sounds.get(currentSound)
currentSound = ""
}
}
}
if (sequence.isEmpty()) {
throw new IllegalArgumentException("The word does not contain any valid sounds.")
}
return sequence
}
static def getLanguageHash(File definitionFile, File dictionaryFile) {
def definitionHash = definitionFile != null && definitionFile.exists() ? definitionFile.text.digest("SHA-256") : ""
def dictionaryHash = dictionaryFile != null && dictionaryFile.exists() ? dictionaryFile.text.digest("SHA-256") : ""
return definitionHash + dictionaryHash
}
}
ext.DictionaryTools = Wrapper