1
0
Fork 0
* added Japanese (Hiragana, Katakana, Kanji)

* improved dictionary validation: it is now possible to have the same ideogram with two different transcriptions

* fixed frequency updating not working sometimes (in Chinese too)
This commit is contained in:
Dimo Karaivanov 2025-04-12 11:59:13 +03:00 committed by GitHub
parent efa1fb4d79
commit 0ec912f9c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 1603029 additions and 89 deletions

View file

@ -243,12 +243,13 @@ def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, Str
errorMsg += wordErrors
}
if (uniqueWords.contains(word)) {
final uniqueWord = transcription + word
if (uniqueWords.contains(uniqueWord)) {
lineHasErrors = true
errorCount++
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}${!transcription.isEmpty() ? ' [' + transcription + ']' : ''}' on line ${lineNumber}. Remove all duplicates.\n"
} else {
uniqueWords.add(word)
uniqueWords.add(uniqueWord)
}
if (lineHasErrors) {
@ -295,6 +296,10 @@ static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
allChars += '\u200C'
}
if (line.contains("PUNCTUATION") && languageName.contains("Japanese")) {
allChars += ''
}
return DEFAULT + allChars
}