New dictionary format (#662)
* new dictionary format that supports syllabaries * optimized the dictionary build cache significantly to truly build only the changed language files * code style fixes
This commit is contained in:
parent
56b355631a
commit
da5b4f17b7
62 changed files with 871 additions and 397 deletions
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
|
|
@ -24,10 +24,12 @@ jobs:
|
||||||
uses: gradle/gradle-build-action@v3
|
uses: gradle/gradle-build-action@v3
|
||||||
|
|
||||||
# validate and build
|
# validate and build
|
||||||
|
- name: Validate Help
|
||||||
|
run: ./gradlew convertHelp
|
||||||
- name: Validate Dictionaries
|
- name: Validate Dictionaries
|
||||||
run: ./gradlew validateLanguages
|
run: ./gradlew validateLanguages
|
||||||
- name: Build Languages
|
- name: Build Languages
|
||||||
run: ./gradlew copyDefinitions copyDictionaries writeDictionaryProperties
|
run: ./gradlew copyDefinitions convertHelp buildDictionaryDownloads copyDownloadsToAssets
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: ./gradlew lint # this actually runs mergeResources, so it must come after the dictionary tasks
|
run: ./gradlew lint # this actually runs mergeResources, so it must come after the dictionary tasks
|
||||||
- name: Build all APK variants
|
- name: Build all APK variants
|
||||||
|
|
|
||||||
203
app/build-dictionary.gradle
Normal file
203
app/build-dictionary.gradle
Normal file
|
|
@ -0,0 +1,203 @@
|
||||||
|
import java.nio.charset.StandardCharsets
|
||||||
|
import java.util.zip.ZipEntry
|
||||||
|
import java.util.zip.ZipOutputStream
|
||||||
|
|
||||||
|
apply from: 'dictionary-tools.gradle'
|
||||||
|
|
||||||
|
ext.convertDictionaries = { definitionsInputDir, dictionariesInputDir, dictionariesOutputDir, dictionariesMetaDir ->
|
||||||
|
int errorCount = 0
|
||||||
|
|
||||||
|
def errorStream = fileTree(dir: definitionsInputDir).getFiles().parallelStream().map { definition ->
|
||||||
|
def (_, sounds, __, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesInputDir)
|
||||||
|
errorCount += langFileErrorCount
|
||||||
|
if (!langFileErrorMsg.isEmpty()) {
|
||||||
|
return langFileErrorMsg
|
||||||
|
}
|
||||||
|
|
||||||
|
def (conversionErrorCount, conversionErrorMessages) = convertDictionary(definition, dictionaryFile, dictionariesOutputDir, dictionariesMetaDir, DICTIONARY_OUTPUT_EXTENSION, sounds, locale, MAX_ERRORS, CSV_DELIMITER)
|
||||||
|
errorCount += conversionErrorCount
|
||||||
|
if (!conversionErrorMessages.isEmpty()) {
|
||||||
|
return conversionErrorMessages
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
String errorsMsg = errorStream.reduce("", String::concat)
|
||||||
|
if (errorsMsg) {
|
||||||
|
throw new GradleException(errorsMsg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// this cannot be static, because DictionaryTools will not be visible
|
||||||
|
def convertDictionary(File definition, File csvDictionary, String dictionariesOutputDir, String dictionariesMetaDir, String outputDictionaryExtension, HashMap<String, String> sounds, Locale locale, int maxErrors, String csvDelimiter) {
|
||||||
|
if (isDictionaryUpToDate(definition, csvDictionary, dictionariesMetaDir)) {
|
||||||
|
return [0, ""]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int errorCount = 0
|
||||||
|
String errorMsg = ''
|
||||||
|
|
||||||
|
List<String> fileContents = csvDictionary.readLines()
|
||||||
|
LinkedHashMap<String, ArrayList<String>> outputDictionary = new LinkedHashMap<>()
|
||||||
|
int wordCount = 0
|
||||||
|
|
||||||
|
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||||
|
String line = fileContents.get(lineNumber - 1)
|
||||||
|
|
||||||
|
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||||
|
|
||||||
|
String digitSequence = ""
|
||||||
|
try {
|
||||||
|
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||||
|
digitSequence = DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
errorCount++
|
||||||
|
errorMsg += "Dictionary '${csvDictionary.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorCount == 0) {
|
||||||
|
if (!outputDictionary.containsKey(digitSequence)) {
|
||||||
|
outputDictionary.put(digitSequence, new ArrayList<>())
|
||||||
|
}
|
||||||
|
// prefix the frequency to sort the words later
|
||||||
|
outputDictionary.get(digitSequence).add("${String.format('%03d', frequency)}${word}")
|
||||||
|
wordCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
outputDictionary = sortDictionary(outputDictionary)
|
||||||
|
|
||||||
|
def (assetError, zippedDictionary) = writeZippedDictionary(dictionariesOutputDir, csvDictionary, outputDictionary, outputDictionaryExtension)
|
||||||
|
if (assetError) {
|
||||||
|
errorCount++
|
||||||
|
errorMsg += assetError
|
||||||
|
}
|
||||||
|
|
||||||
|
def propertiesError = writeDictionaryProperties(definition, csvDictionary, zippedDictionary, dictionariesMetaDir, outputDictionary.size(), wordCount)
|
||||||
|
if (propertiesError) {
|
||||||
|
errorCount++
|
||||||
|
errorMsg += propertiesError
|
||||||
|
}
|
||||||
|
|
||||||
|
return [errorCount, errorMsg]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////// DICTIONARY PROCESSING ////////////////////
|
||||||
|
|
||||||
|
static byte[] compressDictionaryLine(String digitSequence, List<String> words) {
|
||||||
|
if (words.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("No words for digit sequence: ${digitSequence}")
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean shouldSeparateWords = false
|
||||||
|
|
||||||
|
for (def i = 0; i < words.size(); i++) {
|
||||||
|
if (words.get(i).length() != digitSequence.length()) {
|
||||||
|
shouldSeparateWords = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
digitSequence +
|
||||||
|
(shouldSeparateWords ? ' ' : '') +
|
||||||
|
words.join(shouldSeparateWords ? ' ' : null)
|
||||||
|
).getBytes(StandardCharsets.UTF_8)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def isDictionaryUpToDate(File definition, File csvDictionary, String dictionaryPropertiesDir) {
|
||||||
|
def dictionaryProperties = new File(dictionaryPropertiesDir, getPropertyFileName(csvDictionary))
|
||||||
|
if (!dictionaryProperties.exists()) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
Properties props = new Properties()
|
||||||
|
dictionaryProperties.withInputStream { stream -> props.load(stream) }
|
||||||
|
|
||||||
|
return props.getProperty("hash", "") == DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sorts the dictionary in ascending order of sequence length and in descending order of word frequency.
|
||||||
|
* Also, it removes the frequency prefix from each word. The input dictionary is not modified.
|
||||||
|
*/
|
||||||
|
static LinkedHashMap<String, ArrayList<String>> sortDictionary(LinkedHashMap<String, ArrayList<String>> dictionary) {
|
||||||
|
// sort the sequences in ascending order of length, then lexicographically
|
||||||
|
def sequences = dictionary.keySet().toList()
|
||||||
|
Collections.sort(sequences, { a, b ->
|
||||||
|
a.length() == b.length() ? a.compareTo(b) : a.length() - b.length()
|
||||||
|
})
|
||||||
|
def sortedDictionary = new LinkedHashMap<String, ArrayList<String>>()
|
||||||
|
sequences.each { sequence -> sortedDictionary.put(sequence, dictionary.get(sequence)) }
|
||||||
|
|
||||||
|
// sort the words for each sequence in descending order of frequency
|
||||||
|
sortedDictionary.forEach { _, words -> {
|
||||||
|
Collections.sort(words, Collections.reverseOrder())
|
||||||
|
words.replaceAll { word -> word.replaceFirst("^\\d+", "") }
|
||||||
|
}}
|
||||||
|
|
||||||
|
return sortedDictionary
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////// FILE I/O ////////////////////
|
||||||
|
|
||||||
|
static getDictionaryFileName(csvDictionary) {
|
||||||
|
return "${csvDictionary.getName().replaceFirst("\\.\\w+\$", "")}"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static getPropertyFileName(csvDictionary) {
|
||||||
|
return "${getDictionaryFileName(csvDictionary)}.props.yml"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static getZipDictionaryFile(dictionariesOutputDir, csvDictionary, outputDictionaryExtension) {
|
||||||
|
return new File(dictionariesOutputDir, "${getDictionaryFileName(csvDictionary)}.${outputDictionaryExtension}")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Zipping the text files results in a smaller APK in comparison to the uncompressed text files.
|
||||||
|
*/
|
||||||
|
static def writeZippedDictionary(dictionariesOutputDir, csvDictionaryFile, outputDictionary, outputDictionaryExtension) {
|
||||||
|
def fileName = getDictionaryFileName(csvDictionaryFile)
|
||||||
|
def outputFile = getZipDictionaryFile(dictionariesOutputDir, csvDictionaryFile, outputDictionaryExtension)
|
||||||
|
|
||||||
|
try {
|
||||||
|
def zipOutputStream = new ZipOutputStream(new FileOutputStream(outputFile))
|
||||||
|
zipOutputStream.putNextEntry(new ZipEntry("${fileName}.txt"))
|
||||||
|
outputDictionary.each { digitSequence, words ->
|
||||||
|
zipOutputStream.write(compressDictionaryLine(digitSequence, words))
|
||||||
|
}
|
||||||
|
zipOutputStream.closeEntry()
|
||||||
|
zipOutputStream.close()
|
||||||
|
return ["", outputFile]
|
||||||
|
} catch (Exception e) {
|
||||||
|
return ["Failed writing to '${outputFile.path}'. ${e.message}\n", outputFile]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// this cannot be static, because it requires access to exec() and DictionaryTools
|
||||||
|
def writeDictionaryProperties(File definition, File csvDictionary, File zipDictionary, outputDir, int sequences, int words) {
|
||||||
|
def name = getPropertyFileName(csvDictionary)
|
||||||
|
|
||||||
|
try {
|
||||||
|
def hash = DictionaryTools.getLanguageHash(definition, csvDictionary)
|
||||||
|
def revision = zipDictionary.exists() ? exec("git log --pretty=tformat:%H -n 1 ${zipDictionary}") : ""
|
||||||
|
def size = zipDictionary.exists() ? zipDictionary.length() : 0
|
||||||
|
|
||||||
|
new File(outputDir, name).text = "hash: ${hash}\nrevision: ${revision}\nsequences: ${sequences}\nsize: ${size}\nwords: ${words}"
|
||||||
|
|
||||||
|
return ""
|
||||||
|
} catch (Exception e) {
|
||||||
|
return "Failed writing dictionary properties to: '${outputDir}/${name}'. ${e.message}\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -3,12 +3,19 @@ plugins {
|
||||||
}
|
}
|
||||||
|
|
||||||
apply from: 'constants.gradle'
|
apply from: 'constants.gradle'
|
||||||
apply from: 'dictionary-tools.gradle'
|
apply from: 'build-dictionary.gradle'
|
||||||
apply from: 'help-tools.gradle'
|
|
||||||
apply from: 'validate-languages.gradle'
|
apply from: 'validate-languages.gradle'
|
||||||
|
apply from: 'help-tools.gradle'
|
||||||
apply from: 'version-tools.gradle'
|
apply from: 'version-tools.gradle'
|
||||||
|
|
||||||
|
|
||||||
|
tasks.register('copyDefinitions', Copy) {
|
||||||
|
from LANGUAGES_INPUT_DIR
|
||||||
|
include '**/*.yml'
|
||||||
|
into LANGUAGES_OUTPUT_DIR
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
tasks.register('validateLanguages') {
|
tasks.register('validateLanguages') {
|
||||||
inputs.dir LANGUAGES_INPUT_DIR
|
inputs.dir LANGUAGES_INPUT_DIR
|
||||||
outputs.dir LANGUAGE_VALIDATION_DIR
|
outputs.dir LANGUAGE_VALIDATION_DIR
|
||||||
|
|
@ -18,19 +25,30 @@ tasks.register('validateLanguages') {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.register('copyDefinitions', Copy) {
|
|
||||||
from LANGUAGES_INPUT_DIR
|
tasks.register('buildDictionaryDownloads') {
|
||||||
include '**/*.yml'
|
inputs.dir DICTIONARIES_INPUT_DIR
|
||||||
into LANGUAGES_OUTPUT_DIR
|
outputs.dir DICTIONARIES_DOWNLOAD_DIR
|
||||||
|
outputs.dir DICTIONARY_META_OUTPUT_DIR
|
||||||
|
|
||||||
|
dependsOn validateLanguages
|
||||||
|
mustRunAfter validateLanguages
|
||||||
|
|
||||||
|
doLast {
|
||||||
|
convertDictionaries(DEFINITIONS_INPUT_DIR, DICTIONARIES_INPUT_DIR, DICTIONARIES_DOWNLOAD_DIR, DICTIONARY_META_OUTPUT_DIR)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.register('copyDictionaries', Copy) {
|
|
||||||
from DICTIONARIES_INPUT_DIR
|
tasks.register('copyDownloadsToAssets', Copy) {
|
||||||
include '**/*.csv'
|
from DICTIONARIES_DOWNLOAD_DIR
|
||||||
include '**/*.txt'
|
include '**/*.zip'
|
||||||
into DICTIONARIES_OUTPUT_DIR
|
into DICTIONARIES_OUTPUT_DIR
|
||||||
|
dependsOn buildDictionaryDownloads
|
||||||
|
mustRunAfter buildDictionaryDownloads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
tasks.register('convertHelp') {
|
tasks.register('convertHelp') {
|
||||||
inputs.dir HELP_MARKDOWN_DIR
|
inputs.dir HELP_MARKDOWN_DIR
|
||||||
outputs.dir HELP_HTML_DIR
|
outputs.dir HELP_HTML_DIR
|
||||||
|
|
@ -40,15 +58,6 @@ tasks.register('convertHelp') {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.register('writeDictionaryProperties') {
|
|
||||||
inputs.dir fileTree(dir: DICTIONARIES_INPUT_DIR)
|
|
||||||
outputs.dir DICTIONARY_META_OUTPUT_DIR
|
|
||||||
|
|
||||||
doLast {
|
|
||||||
getDictionaryProperties(DICTIONARIES_INPUT_DIR, DICTIONARY_META_OUTPUT_DIR)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tasks.register('updateManifest') {
|
tasks.register('updateManifest') {
|
||||||
doLast {
|
doLast {
|
||||||
updateManifestVersion(getVersionCode(), getVersionName())
|
updateManifestVersion(getVersionCode(), getVersionName())
|
||||||
|
|
@ -58,6 +67,7 @@ tasks.register('updateManifest') {
|
||||||
clean {
|
clean {
|
||||||
delete LANGUAGES_OUTPUT_DIR
|
delete LANGUAGES_OUTPUT_DIR
|
||||||
delete DICTIONARIES_OUTPUT_DIR
|
delete DICTIONARIES_OUTPUT_DIR
|
||||||
|
delete DICTIONARIES_DOWNLOAD_DIR
|
||||||
delete HELP_HTML_DIR
|
delete HELP_HTML_DIR
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -84,10 +94,12 @@ android {
|
||||||
}
|
}
|
||||||
buildTypes {
|
buildTypes {
|
||||||
debug {
|
debug {
|
||||||
|
buildConfigField 'String', 'DICTIONARY_EXTENSION', "\"${DICTIONARY_OUTPUT_EXTENSION}\""
|
||||||
buildConfigField 'String', 'VERSION_FULL', "\"${getVersionString('debug')}\""
|
buildConfigField 'String', 'VERSION_FULL', "\"${getVersionString('debug')}\""
|
||||||
}
|
}
|
||||||
|
|
||||||
release {
|
release {
|
||||||
|
buildConfigField 'String', 'DICTIONARY_EXTENSION', "\"${DICTIONARY_OUTPUT_EXTENSION}\""
|
||||||
buildConfigField 'String', 'VERSION_FULL', "\"${getVersionString('release')}\""
|
buildConfigField 'String', 'VERSION_FULL', "\"${getVersionString('release')}\""
|
||||||
|
|
||||||
debuggable false
|
debuggable false
|
||||||
|
|
@ -124,7 +136,11 @@ android {
|
||||||
].each { taskName ->
|
].each { taskName ->
|
||||||
try {
|
try {
|
||||||
tasks.named(taskName)?.configure {
|
tasks.named(taskName)?.configure {
|
||||||
dependsOn(validateLanguages, copyDefinitions, copyDictionaries, writeDictionaryProperties, convertHelp)
|
dependsOn(copyDefinitions, convertHelp, validateLanguages, buildDictionaryDownloads)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (taskName.toLowerCase().contains("full")) {
|
||||||
|
tasks.named(taskName)?.configure {dependsOn(copyDownloadsToAssets) }
|
||||||
}
|
}
|
||||||
} catch (UnknownTaskException ignored) {}
|
} catch (UnknownTaskException ignored) {}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,22 @@ ext.PACKAGE_NAME = "io.github.sspanak.${APP_NAME}"
|
||||||
ext.LANGUAGES_DIR_NAME = 'languages'
|
ext.LANGUAGES_DIR_NAME = 'languages'
|
||||||
ext.DEFINITIONS_DIR_NAME = 'definitions'
|
ext.DEFINITIONS_DIR_NAME = 'definitions'
|
||||||
ext.DICTIONARIES_DIR_NAME = 'dictionaries'
|
ext.DICTIONARIES_DIR_NAME = 'dictionaries'
|
||||||
|
ext.DICTIONARIES_DOWNLOAD_DIR_NAME = 'downloads'
|
||||||
ext.DICTIONARY_SIZES_DIR_NAME = 'dictionary-sizes'
|
ext.DICTIONARY_SIZES_DIR_NAME = 'dictionary-sizes'
|
||||||
|
|
||||||
def ROOT_DIR = "${project.rootDir}/app"
|
def APP_ROOT_DIR = "${project.rootDir}/app"
|
||||||
def MAIN_ASSETS_DIR = "${ROOT_DIR}/src/main/assets"
|
def MAIN_ASSETS_DIR = "${APP_ROOT_DIR}/src/main/assets"
|
||||||
def FULL_VERSION_ASSETS_DIR = "${ROOT_DIR}/src/full/assets"
|
def FULL_VERSION_ASSETS_DIR = "${APP_ROOT_DIR}/src/full/assets"
|
||||||
|
|
||||||
ext.HELP_MARKDOWN_DIR = "${project.rootDir}/docs/help"
|
ext.HELP_MARKDOWN_DIR = "${project.rootDir}/docs/help"
|
||||||
ext.HELP_HTML_DIR = "${MAIN_ASSETS_DIR}/help"
|
ext.HELP_HTML_DIR = "${MAIN_ASSETS_DIR}/help"
|
||||||
|
|
||||||
ext.LANGUAGES_INPUT_DIR = "${ROOT_DIR}/${LANGUAGES_DIR_NAME}"
|
ext.LANGUAGES_INPUT_DIR = "${APP_ROOT_DIR}/${LANGUAGES_DIR_NAME}"
|
||||||
ext.DEFINITIONS_INPUT_DIR = "${LANGUAGES_INPUT_DIR}/${DEFINITIONS_DIR_NAME}"
|
ext.DEFINITIONS_INPUT_DIR = "${LANGUAGES_INPUT_DIR}/${DEFINITIONS_DIR_NAME}"
|
||||||
ext.DICTIONARIES_INPUT_DIR = "${LANGUAGES_INPUT_DIR}/${DICTIONARIES_DIR_NAME}"
|
ext.DICTIONARIES_INPUT_DIR = "${LANGUAGES_INPUT_DIR}/${DICTIONARIES_DIR_NAME}"
|
||||||
|
|
||||||
|
ext.DICTIONARIES_DOWNLOAD_DIR = "${project.rootDir}/${DICTIONARIES_DOWNLOAD_DIR_NAME}"
|
||||||
|
|
||||||
ext.LANGUAGES_OUTPUT_DIR = "${MAIN_ASSETS_DIR}/${LANGUAGES_DIR_NAME}"
|
ext.LANGUAGES_OUTPUT_DIR = "${MAIN_ASSETS_DIR}/${LANGUAGES_DIR_NAME}"
|
||||||
ext.DEFINITIONS_OUTPUT_DIR = "${LANGUAGES_OUTPUT_DIR}/${DEFINITIONS_DIR_NAME}"
|
ext.DEFINITIONS_OUTPUT_DIR = "${LANGUAGES_OUTPUT_DIR}/${DEFINITIONS_DIR_NAME}"
|
||||||
ext.DICTIONARY_META_OUTPUT_DIR = "${LANGUAGES_OUTPUT_DIR}/${DICTIONARIES_DIR_NAME}"
|
ext.DICTIONARY_META_OUTPUT_DIR = "${LANGUAGES_OUTPUT_DIR}/${DICTIONARIES_DIR_NAME}"
|
||||||
|
|
@ -25,5 +28,6 @@ ext.DICTIONARIES_OUTPUT_DIR = "${FULL_VERSION_ASSETS_DIR}/${LANGUAGES_DIR_NAME}/
|
||||||
ext.LANGUAGE_VALIDATION_DIR = layout.buildDirectory.dir("langValidation")
|
ext.LANGUAGE_VALIDATION_DIR = layout.buildDirectory.dir("langValidation")
|
||||||
|
|
||||||
ext.CSV_DELIMITER = ' ' // TAB
|
ext.CSV_DELIMITER = ' ' // TAB
|
||||||
|
ext.DICTIONARY_OUTPUT_EXTENSION = 'zip'
|
||||||
ext.MAX_WORD_FREQUENCY = 255
|
ext.MAX_WORD_FREQUENCY = 255
|
||||||
ext.MAX_ERRORS = 50
|
ext.MAX_ERRORS = 50
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,65 @@
|
||||||
ext.getDictionaryProperties = { dictionariesDir, sizesDir ->
|
class Wrapper {
|
||||||
fileTree(dir: dictionariesDir).getFiles().parallelStream().forEach {dictionary ->
|
static def getDictionaryLineData(String line, String delimiter) {
|
||||||
def hash = dictionary.exists() ? dictionary.text.digest("SHA-1") : ""
|
String[] parts = line.split(delimiter, 2)
|
||||||
def revision = dictionary.exists() ? exec("git log --pretty=tformat:%H -n 1 ${dictionary}") : ""
|
String word = parts[0]
|
||||||
def size = dictionary.exists() ? dictionary.length() : 0
|
String transcription = parts.length > 1 && parts[1] =~ "^[a-zA-Z]+\$" ? parts[1] : ""
|
||||||
def words = dictionary.exists() ? dictionary.text.split("\n").length : 0
|
|
||||||
|
|
||||||
new File(sizesDir, "${dictionary.getName()}.props.yml").text = "hash: ${hash}\nrevision: ${revision}\nsize: ${size}\nwords: ${words}"
|
int frequency
|
||||||
|
try {
|
||||||
|
int partsElement = transcription.isEmpty() ? 1 : 2
|
||||||
|
frequency = (parts.length > partsElement ? parts[partsElement] : "0") as int
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
frequency = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
return [word, transcription, frequency]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static def wordToDigitSequence(Locale locale, String word, HashMap<String, String> sounds, boolean isTranscribed) {
|
||||||
|
String sequence = ""
|
||||||
|
|
||||||
|
final String normalizedWord = isTranscribed ? word : word.toUpperCase(locale)
|
||||||
|
String currentSound = ""
|
||||||
|
|
||||||
|
for (int i = 0, end = normalizedWord.length() - 1; i <= end; i++) {
|
||||||
|
char currentChar = normalizedWord.charAt(i)
|
||||||
|
char nextChar = i < end ? normalizedWord.charAt(i + 1) : 0
|
||||||
|
int nextCharType = Character.getType(nextChar)
|
||||||
|
|
||||||
|
currentSound += currentChar
|
||||||
|
|
||||||
|
// charAt(i) returns "ΐ" as three separate characters, but they must be treated as one.
|
||||||
|
if (
|
||||||
|
locale.getLanguage() == "el"
|
||||||
|
&& (nextCharType == Character.NON_SPACING_MARK || nextCharType == Character.ENCLOSING_MARK || nextCharType == Character.COMBINING_SPACING_MARK)
|
||||||
|
) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isTranscribed || i == end || Character.isUpperCase(nextChar)) {
|
||||||
|
if (!sounds.containsKey(currentSound)) {
|
||||||
|
throw new IllegalArgumentException("Sound or layout entry '${currentSound}' does not belong to the language sound list: ${sounds}.")
|
||||||
|
} else {
|
||||||
|
sequence += sounds.get(currentSound)
|
||||||
|
currentSound = ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sequence.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("The word does not contain any valid sounds.")
|
||||||
|
}
|
||||||
|
|
||||||
|
return sequence
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static def getLanguageHash(File definitionFile, File dictionaryFile) {
|
||||||
|
def definitionHash = definitionFile != null && definitionFile.exists() ? definitionFile.text.digest("SHA-256") : ""
|
||||||
|
def dictionaryHash = dictionaryFile != null && dictionaryFile.exists() ? dictionaryFile.text.digest("SHA-256") : ""
|
||||||
|
return definitionHash + dictionaryHash
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ext.DictionaryTools = Wrapper
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<manifest xmlns:tools="http://schemas.android.com/tools"
|
<manifest xmlns:tools="http://schemas.android.com/tools"
|
||||||
android:versionCode="774"
|
android:versionCode="775"
|
||||||
android:versionName="40.0"
|
android:versionName="40.1"
|
||||||
xmlns:android="http://schemas.android.com/apk/res/android">
|
xmlns:android="http://schemas.android.com/apk/res/android">
|
||||||
|
|
||||||
<uses-permission android:name="android.permission.POST_NOTIFICATIONS"/> <!-- allows displaying notifications on Android >= 13 -->
|
<uses-permission android:name="android.permission.POST_NOTIFICATIONS"/> <!-- allows displaying notifications on Android >= 13 -->
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ public class CustomWordFile {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] parts = WordFile.splitLine(line);
|
String[] parts = WordFile.getLineData(line);
|
||||||
if (parts == null || parts.length < 2) {
|
if (parts == null || parts.length < 2) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -79,7 +79,7 @@ public class CustomWordFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
@NonNull public static String getWord(String line) {
|
@NonNull public static String getWord(String line) {
|
||||||
String[] parts = WordFile.splitLine(line);
|
String[] parts = WordFile.getLineData(line);
|
||||||
return parts != null && parts.length > 0 ? parts[0] : "";
|
return parts != null && parts.length > 0 ? parts[0] : "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,16 +4,14 @@ import androidx.annotation.NonNull;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
|
|
||||||
import io.github.sspanak.tt9.languages.Language;
|
import io.github.sspanak.tt9.languages.Language;
|
||||||
|
import io.github.sspanak.tt9.languages.exceptions.InvalidLanguageCharactersException;
|
||||||
|
|
||||||
public class WordBatch {
|
public class WordBatch {
|
||||||
@NonNull private final Language language;
|
@NonNull private final Language language;
|
||||||
@NonNull private final ArrayList<Word> words;
|
@NonNull private final ArrayList<Word> words;
|
||||||
@NonNull private final ArrayList<WordPosition> positions;
|
@NonNull private final ArrayList<WordPosition> positions;
|
||||||
|
|
||||||
private WordPosition lastWordPosition;
|
|
||||||
|
|
||||||
public WordBatch(@NonNull Language language, int size) {
|
public WordBatch(@NonNull Language language, int size) {
|
||||||
this.language = language;
|
this.language = language;
|
||||||
words = size > 0 ? new ArrayList<>(size) : new ArrayList<>();
|
words = size > 0 ? new ArrayList<>(size) : new ArrayList<>();
|
||||||
|
|
@ -24,31 +22,25 @@ public class WordBatch {
|
||||||
this(language, 0);
|
this(language, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean add(@NonNull String word, int frequency, int position) throws InvalidLanguageCharactersException {
|
public void add(String word, int frequency, int position) throws InvalidLanguageCharactersException {
|
||||||
words.add(Word.create(word, frequency, position));
|
words.add(Word.create(word, frequency, position));
|
||||||
|
positions.add(WordPosition.create(language.getDigitSequenceForWord(word), position, position));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(@NonNull ArrayList<String> words, @NonNull String digitSequence, int position) {
|
||||||
|
if (words.isEmpty() || digitSequence.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0, size = words.size(); i < size; i++) {
|
||||||
|
this.words.add(Word.create(words.get(i), size - i, position + i));
|
||||||
|
}
|
||||||
|
|
||||||
if (position == 0) {
|
if (position == 0) {
|
||||||
return true;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String sequence = language.getDigitSequenceForWord(word);
|
positions.add(WordPosition.create(digitSequence, position, position + words.size() - 1));
|
||||||
|
|
||||||
if (position == 1 || lastWordPosition == null) {
|
|
||||||
lastWordPosition = WordPosition.create(sequence, position);
|
|
||||||
} else {
|
|
||||||
lastWordPosition.end = position;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!sequence.equals(lastWordPosition.sequence)) {
|
|
||||||
lastWordPosition.end--;
|
|
||||||
positions.add(lastWordPosition);
|
|
||||||
|
|
||||||
lastWordPosition = WordPosition.create(sequence, position);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void clear() {
|
public void clear() {
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,17 @@ import android.content.Context;
|
||||||
import android.content.res.AssetManager;
|
import android.content.res.AssetManager;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLConnection;
|
import java.net.URLConnection;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
|
import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
import io.github.sspanak.tt9.R;
|
import io.github.sspanak.tt9.R;
|
||||||
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
import io.github.sspanak.tt9.preferences.settings.SettingsStore;
|
||||||
|
|
@ -21,21 +25,29 @@ public class WordFile {
|
||||||
|
|
||||||
private final AssetManager assets;
|
private final AssetManager assets;
|
||||||
private final Context context;
|
private final Context context;
|
||||||
private final String name;
|
private final String path;
|
||||||
|
|
||||||
|
private int lastCharCode;
|
||||||
|
private BufferedReader reader;
|
||||||
|
|
||||||
private String hash = null;
|
private String hash = null;
|
||||||
private String downloadUrl = null;
|
private String downloadUrl = null;
|
||||||
private int totalLines = -1;
|
private int words = -1;
|
||||||
private long size = -1;
|
private long size = -1;
|
||||||
|
private int sequences = -1;
|
||||||
|
|
||||||
|
|
||||||
public WordFile(Context context, String name, AssetManager assets) {
|
public WordFile(Context context, String path, AssetManager assets) {
|
||||||
this.assets = assets;
|
this.assets = assets;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.name = name;
|
this.path = path;
|
||||||
|
|
||||||
|
lastCharCode = 0;
|
||||||
|
reader = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String[] splitLine(String line) {
|
public static String[] getLineData(String line) {
|
||||||
String[] parts = {line, ""};
|
String[] parts = {line, ""};
|
||||||
|
|
||||||
// This is faster than String.split() by around 10%, so it's worth having it.
|
// This is faster than String.split() by around 10%, so it's worth having it.
|
||||||
|
|
@ -52,18 +64,9 @@ public class WordFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static short getFrequencyFromLineParts(String[] frequencyParts) {
|
|
||||||
try {
|
|
||||||
return Short.parseShort(frequencyParts[1]);
|
|
||||||
} catch (Exception e) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean exists() {
|
public boolean exists() {
|
||||||
try {
|
try {
|
||||||
assets.open(name).close();
|
assets.open(path).close();
|
||||||
return true;
|
return true;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -80,8 +83,17 @@ public class WordFile {
|
||||||
|
|
||||||
|
|
||||||
public BufferedReader getReader() throws IOException {
|
public BufferedReader getReader() throws IOException {
|
||||||
InputStream stream = exists() ? assets.open(name) : getRemoteStream();
|
if (reader != null) {
|
||||||
return new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
|
return reader;
|
||||||
|
}
|
||||||
|
|
||||||
|
InputStream stream = exists() ? assets.open(path) : getRemoteStream();
|
||||||
|
ZipInputStream zipStream = new ZipInputStream(stream);
|
||||||
|
ZipEntry entry = zipStream.getNextEntry();
|
||||||
|
if (entry == null) {
|
||||||
|
throw new IOException("Dictionary ZIP file: " + path + " is empty.");
|
||||||
|
}
|
||||||
|
return reader = new BufferedReader(new InputStreamReader(zipStream, StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -99,12 +111,20 @@ public class WordFile {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String revision = rawValue == null || rawValue.isEmpty() ? "" : rawValue;
|
downloadUrl = null;
|
||||||
downloadUrl = revision.isEmpty() ? null : context.getString(R.string.dictionary_url, revision, name);
|
|
||||||
|
|
||||||
|
String revision = rawValue == null || rawValue.isEmpty() ? "" : rawValue;
|
||||||
if (revision.isEmpty()) {
|
if (revision.isEmpty()) {
|
||||||
Logger.w(LOG_TAG, "Invalid 'revision' property of: " + name + ". Expecting a string, got: '" + rawValue + "'.");
|
Logger.w(LOG_TAG, "Invalid 'revision' property of: " + path + ". Expecting a string, got: '" + rawValue + "'.");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (path == null || path.isEmpty()) {
|
||||||
|
Logger.w(LOG_TAG, "Cannot generate a download URL for an empty path.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
downloadUrl = context.getString(R.string.dictionary_url, revision, new File(path).getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -125,39 +145,62 @@ public class WordFile {
|
||||||
hash = rawValue == null || rawValue.isEmpty() ? "" : rawValue;
|
hash = rawValue == null || rawValue.isEmpty() ? "" : rawValue;
|
||||||
|
|
||||||
if (hash.isEmpty()) {
|
if (hash.isEmpty()) {
|
||||||
Logger.w(LOG_TAG, "Invalid 'hash' property of: " + name + ". Expecting a string, got: '" + rawValue + "'.");
|
Logger.w(LOG_TAG, "Invalid 'hash' property of: " + path + ". Expecting a string, got: '" + rawValue + "'.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int getTotalLines() {
|
public int getSequences() {
|
||||||
if (totalLines < 0) {
|
if (sequences < 0) {
|
||||||
loadProperties();
|
loadProperties();
|
||||||
}
|
}
|
||||||
|
|
||||||
return totalLines;
|
return sequences;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String getFormattedTotalLines(String suffix) {
|
private void setSequences(String rawProperty, String rawValue) {
|
||||||
if (getTotalLines() > 1000000) {
|
if (!rawProperty.equals("sequences")) {
|
||||||
return String.format(Locale.ROOT, "%1.2fM %s", getTotalLines() / 1000000.0, suffix);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
sequences = Integer.parseInt(rawValue);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Logger.w(LOG_TAG, "Invalid 'sequences' property of: " + path + ". Expecting an integer, got: '" + rawValue + "'.");
|
||||||
|
sequences = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public int getWords() {
|
||||||
|
if (words < 0) {
|
||||||
|
loadProperties();
|
||||||
|
}
|
||||||
|
|
||||||
|
return words;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String getFormattedWords(String suffix) {
|
||||||
|
if (getWords() > 1000000) {
|
||||||
|
return String.format(Locale.ROOT, "%1.2fM %s", getWords() / 1000000.0, suffix);
|
||||||
} else {
|
} else {
|
||||||
return getTotalLines() / 1000 + "k " + suffix;
|
return getWords() / 1000 + "k " + suffix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void setTotalLines(String rawProperty, String rawValue) {
|
private void setWords(String rawProperty, String rawValue) {
|
||||||
if (!rawProperty.equals("words")) {
|
if (!rawProperty.equals("words")) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
totalLines = Integer.parseInt(rawValue);
|
words = Integer.parseInt(rawValue);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Logger.w(LOG_TAG, "Invalid 'words' property of: " + name + ". Expecting an integer, got: '" + rawValue + "'.");
|
Logger.w(LOG_TAG, "Invalid 'words' property of: " + path + ". Expecting an integer, got: '" + rawValue + "'.");
|
||||||
totalLines = 0;
|
words = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -184,14 +227,14 @@ public class WordFile {
|
||||||
try {
|
try {
|
||||||
size = Long.parseLong(rawValue);
|
size = Long.parseLong(rawValue);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Logger.w(LOG_TAG, "Invalid 'size' property of: " + name + ". Expecting an integer, got: '" + rawValue + "'.");
|
Logger.w(LOG_TAG, "Invalid 'size' property of: " + path + ". Expecting an integer, got: '" + rawValue + "'.");
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void loadProperties() {
|
private void loadProperties() {
|
||||||
String propertyFilename = name + ".props.yml";
|
String propertyFilename = path.replaceFirst("\\.\\w+$", "") + ".props.yml";
|
||||||
|
|
||||||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(assets.open(propertyFilename)))) {
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(assets.open(propertyFilename)))) {
|
||||||
for (String line; (line = reader.readLine()) != null; ) {
|
for (String line; (line = reader.readLine()) != null; ) {
|
||||||
|
|
@ -202,11 +245,102 @@ public class WordFile {
|
||||||
|
|
||||||
setDownloadUrl(parts[0], parts[1]);
|
setDownloadUrl(parts[0], parts[1]);
|
||||||
setHash(parts[0], parts[1]);
|
setHash(parts[0], parts[1]);
|
||||||
setTotalLines(parts[0], parts[1]);
|
setWords(parts[0], parts[1]);
|
||||||
|
setSequences(parts[0], parts[1]);
|
||||||
setSize(parts[0], parts[1]);
|
setSize(parts[0], parts[1]);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Logger.w(LOG_TAG, "Could not read the property file: " + propertyFilename + ". " + e.getMessage());
|
Logger.w(LOG_TAG, "Could not read the property file: " + propertyFilename + ". " + e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String getNextSequence() throws IOException {
|
||||||
|
if (reader == null || !notEOF()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder sequence = new StringBuilder();
|
||||||
|
|
||||||
|
// use the last char from getNextWords() if it's a digit
|
||||||
|
if (Character.isDigit(lastCharCode)) {
|
||||||
|
sequence.append((char) lastCharCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
while ((lastCharCode = reader.read()) != -1) {
|
||||||
|
if (Character.isDigit(lastCharCode)) {
|
||||||
|
sequence.append((char) lastCharCode);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sequence.length() == 0) {
|
||||||
|
throw new IOException("Could not find next sequence. Unexpected end of file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return sequence.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public ArrayList<String> getNextWords(String digitSequence) throws IOException {
|
||||||
|
ArrayList<String> words = new ArrayList<>();
|
||||||
|
|
||||||
|
if (reader == null || !notEOF()) {
|
||||||
|
return words;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean areWordsSeparated = false;
|
||||||
|
StringBuilder word = new StringBuilder();
|
||||||
|
|
||||||
|
// If the word string starts with a space, it means there are words longer than the sequence.
|
||||||
|
// We must make sure to extract them correctly.
|
||||||
|
if (lastCharCode == ' ') {
|
||||||
|
areWordsSeparated = true;
|
||||||
|
}
|
||||||
|
// use the last char from getNextSequence() if it's a letter
|
||||||
|
else if (!Character.isDigit(lastCharCode)) {
|
||||||
|
word.append((char) lastCharCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
int sequenceLength = digitSequence.length();
|
||||||
|
|
||||||
|
// start extracting the words
|
||||||
|
int wordLength = word.length();
|
||||||
|
while ((lastCharCode = reader.read()) != -1) {
|
||||||
|
if (Character.isDigit(lastCharCode)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastCharCode == ' ') {
|
||||||
|
areWordsSeparated = true;
|
||||||
|
} else {
|
||||||
|
word.append((char) lastCharCode);
|
||||||
|
wordLength++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((areWordsSeparated && lastCharCode == ' ' && wordLength > 0) || (!areWordsSeparated && wordLength == sequenceLength)) {
|
||||||
|
words.add(word.toString());
|
||||||
|
wordLength = 0;
|
||||||
|
word.setLength(wordLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((areWordsSeparated && wordLength > 0) || (!areWordsSeparated && wordLength == sequenceLength)) {
|
||||||
|
words.add(word.toString());
|
||||||
|
} else if (wordLength > 0) {
|
||||||
|
throw new IOException("Unexpected end of file. Word: '" + word + "' length (" + wordLength + ") differs from the length of sequence: " + digitSequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (words.isEmpty()) {
|
||||||
|
throw new IOException("Could not find any words for sequence: " + digitSequence);
|
||||||
|
}
|
||||||
|
|
||||||
|
return words;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean notEOF() {
|
||||||
|
return lastCharCode != -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,11 @@ public class WordPosition {
|
||||||
public int start;
|
public int start;
|
||||||
public int end;
|
public int end;
|
||||||
|
|
||||||
public static WordPosition create(@NonNull String sequence, int start) {
|
public static WordPosition create(@NonNull String sequence, int start, int end) {
|
||||||
WordPosition position = new WordPosition();
|
WordPosition position = new WordPosition();
|
||||||
position.sequence = sequence;
|
position.sequence = sequence;
|
||||||
position.start = start;
|
position.start = start;
|
||||||
|
position.end = end;
|
||||||
|
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,10 @@
|
||||||
package io.github.sspanak.tt9.db.exceptions;
|
package io.github.sspanak.tt9.db.exceptions;
|
||||||
|
|
||||||
public class DictionaryImportException extends Exception {
|
public class DictionaryImportException extends Exception {
|
||||||
public final String word;
|
|
||||||
public final long line;
|
public final long line;
|
||||||
|
|
||||||
public DictionaryImportException(String word, long line) {
|
public DictionaryImportException(String message, long line) {
|
||||||
super("Dictionary import failed");
|
super(message);
|
||||||
this.word = word;
|
|
||||||
this.line = line;
|
this.line = line;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import android.os.Bundle;
|
||||||
import android.os.Handler;
|
import android.os.Handler;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
@ -79,9 +80,14 @@ public class DictionaryLoader {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
loadThread = new Thread() {
|
loadThread = new Thread(() -> loadSync(context, languages));
|
||||||
@Override
|
loadThread.start();
|
||||||
public void run() {
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void loadSync(Context context, ArrayList<Language> languages) {
|
||||||
currentFile = 0;
|
currentFile = 0;
|
||||||
Timer.start(IMPORT_TIMER);
|
Timer.start(IMPORT_TIMER);
|
||||||
|
|
||||||
|
|
@ -89,7 +95,7 @@ public class DictionaryLoader {
|
||||||
|
|
||||||
// SQLite does not support parallel queries, so let's import them one by one
|
// SQLite does not support parallel queries, so let's import them one by one
|
||||||
for (Language lang : languages) {
|
for (Language lang : languages) {
|
||||||
if (isInterrupted()) {
|
if (loadThread.isInterrupted()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
importAll(context, lang);
|
importAll(context, lang);
|
||||||
|
|
@ -98,11 +104,6 @@ public class DictionaryLoader {
|
||||||
|
|
||||||
Timer.stop(IMPORT_TIMER);
|
Timer.stop(IMPORT_TIMER);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
loadThread.start();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static void load(Context context, Language language) {
|
public static void load(Context context, Language language) {
|
||||||
|
|
@ -210,13 +211,11 @@ public class DictionaryLoader {
|
||||||
} catch (DictionaryImportException e) {
|
} catch (DictionaryImportException e) {
|
||||||
stop();
|
stop();
|
||||||
sqlite.failTransaction();
|
sqlite.failTransaction();
|
||||||
sendImportError(DictionaryImportException.class.getSimpleName(), language.getId(), e.line, e.word);
|
sendImportError(DictionaryImportException.class.getSimpleName(), language.getId(), e.line);
|
||||||
|
|
||||||
Logger.e(
|
Logger.e(
|
||||||
LOG_TAG,
|
LOG_TAG,
|
||||||
" Invalid word: '" + e.word
|
" Invalid word in dictionary: '" + language.getDictionaryFile() + "'"
|
||||||
+ "' in dictionary: '" + language.getDictionaryFile() + "'"
|
|
||||||
+ " on line " + e.line
|
|
||||||
+ " of language '" + language.getName() + "'. "
|
+ " of language '" + language.getName() + "'. "
|
||||||
+ e.getMessage()
|
+ e.getMessage()
|
||||||
);
|
);
|
||||||
|
|
@ -256,34 +255,32 @@ public class DictionaryLoader {
|
||||||
|
|
||||||
private void importWordFile(Context context, Language language, int positionShift, float minProgress, float maxProgress) throws Exception {
|
private void importWordFile(Context context, Language language, int positionShift, float minProgress, float maxProgress) throws Exception {
|
||||||
WordFile wordFile = new WordFile(context, language.getDictionaryFile(), assets);
|
WordFile wordFile = new WordFile(context, language.getDictionaryFile(), assets);
|
||||||
WordBatch batch = new WordBatch(language, wordFile.getTotalLines());
|
WordBatch batch = new WordBatch(language, SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE + 1);
|
||||||
int currentLine = 1;
|
float progressRatio = (maxProgress - minProgress) / wordFile.getWords();
|
||||||
float progressRatio = (maxProgress - minProgress) / wordFile.getTotalLines();
|
int wordCount = 0;
|
||||||
|
|
||||||
try (BufferedReader br = wordFile.getReader()) {
|
try (BufferedReader ignored = wordFile.getReader()) {
|
||||||
for (String line; (line = br.readLine()) != null; currentLine++) {
|
while (wordFile.notEOF()) {
|
||||||
if (loadThread.isInterrupted()) {
|
if (loadThread.isInterrupted()) {
|
||||||
sendProgressMessage(language, 0, 0);
|
sendProgressMessage(language, 0, 0);
|
||||||
throw new DictionaryImportAbortedException();
|
throw new DictionaryImportAbortedException();
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] parts = WordFile.splitLine(line);
|
|
||||||
String word = parts[0];
|
|
||||||
short frequency = WordFile.getFrequencyFromLineParts(parts);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
boolean isFinalized = batch.add(word, frequency, currentLine + positionShift);
|
String digitSequence = wordFile.getNextSequence();
|
||||||
if (isFinalized && batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) {
|
ArrayList<String> words = wordFile.getNextWords(digitSequence);
|
||||||
|
batch.add(words, digitSequence, wordCount + positionShift);
|
||||||
|
wordCount += words.size();
|
||||||
|
|
||||||
|
if (batch.getWords().size() > SettingsStore.DICTIONARY_IMPORT_BATCH_SIZE) {
|
||||||
saveWordBatch(batch);
|
saveWordBatch(batch);
|
||||||
batch.clear();
|
batch.clear();
|
||||||
}
|
}
|
||||||
} catch (InvalidLanguageCharactersException e) {
|
} catch (IOException e) {
|
||||||
throw new DictionaryImportException(word, currentLine);
|
throw new DictionaryImportException(e.getMessage(), wordCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wordFile.getTotalLines() > 0) {
|
sendProgressMessage(language, minProgress + progressRatio * wordCount, SettingsStore.DICTIONARY_IMPORT_PROGRESS_UPDATE_TIME);
|
||||||
sendProgressMessage(language, minProgress + progressRatio * currentLine, SettingsStore.DICTIONARY_IMPORT_PROGRESS_UPDATE_TIME);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -353,7 +350,7 @@ public class DictionaryLoader {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void sendImportError(String message, int langId, long fileLine, String word) {
|
private void sendImportError(String message, int langId, long fileLine) {
|
||||||
if (onStatusChange == null) {
|
if (onStatusChange == null) {
|
||||||
Logger.w(LOG_TAG, "Cannot send an import error without a status Handler. Ignoring message.");
|
Logger.w(LOG_TAG, "Cannot send an import error without a status Handler. Ignoring message.");
|
||||||
return;
|
return;
|
||||||
|
|
@ -363,14 +360,13 @@ public class DictionaryLoader {
|
||||||
errorMsg.putString("error", message);
|
errorMsg.putString("error", message);
|
||||||
errorMsg.putLong("fileLine", fileLine + 1);
|
errorMsg.putLong("fileLine", fileLine + 1);
|
||||||
errorMsg.putInt("languageId", langId);
|
errorMsg.putInt("languageId", langId);
|
||||||
errorMsg.putString("word", word);
|
|
||||||
asyncHandler.post(() -> onStatusChange.accept(errorMsg));
|
asyncHandler.post(() -> onStatusChange.accept(errorMsg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void logLoadingStep(String message, Language language, long time) {
|
private void logLoadingStep(String message, Language language, long time) {
|
||||||
if (Logger.isDebugLevel()) {
|
if (Logger.isDebugLevel()) {
|
||||||
Logger.d(LOG_TAG, message + " for language '" + language.getName() + "' in: " + time + " ms");
|
Logger.d(LOG_TAG, message + " for language '" + language.getName() + "' (" + language.getId() + ") in: " + time + " ms");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import io.github.sspanak.tt9.BuildConfig;
|
||||||
import io.github.sspanak.tt9.util.Logger;
|
import io.github.sspanak.tt9.util.Logger;
|
||||||
|
|
||||||
public class LanguageDefinition {
|
public class LanguageDefinition {
|
||||||
|
|
@ -93,6 +94,9 @@ public class LanguageDefinition {
|
||||||
definition.locale = getPropertyFromYaml(yaml, "locale", definition.locale);
|
definition.locale = getPropertyFromYaml(yaml, "locale", definition.locale);
|
||||||
definition.name = getPropertyFromYaml(yaml, "name", definition.name);
|
definition.name = getPropertyFromYaml(yaml, "name", definition.name);
|
||||||
|
|
||||||
|
if (definition.dictionaryFile != null) {
|
||||||
|
definition.dictionaryFile = definition.dictionaryFile.replaceFirst("\\.\\w+$", "." + BuildConfig.DICTIONARY_EXTENSION);
|
||||||
|
}
|
||||||
return definition;
|
return definition;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class PreferenceSwitchLanguage extends SwitchPreferenceCompat {
|
||||||
summary
|
summary
|
||||||
.append(", ")
|
.append(", ")
|
||||||
.append(
|
.append(
|
||||||
wordFile.getFormattedTotalLines(activity.getString(R.string.language_selection_words))
|
wordFile.getFormattedWords(activity.getString(R.string.language_selection_words))
|
||||||
);
|
);
|
||||||
|
|
||||||
// download size
|
// download size
|
||||||
|
|
|
||||||
|
|
@ -75,8 +75,7 @@ public class DictionaryLoadingBar extends DictionaryProgressNotification {
|
||||||
context,
|
context,
|
||||||
error,
|
error,
|
||||||
data.getInt("languageId", -1),
|
data.getInt("languageId", -1),
|
||||||
data.getLong("fileLine", -1),
|
data.getLong("fileLine", -1)
|
||||||
data.getString("word", "")
|
|
||||||
);
|
);
|
||||||
} else if (progress >= 0) {
|
} else if (progress >= 0) {
|
||||||
hasFailed = false;
|
hasFailed = false;
|
||||||
|
|
@ -133,13 +132,13 @@ public class DictionaryLoadingBar extends DictionaryProgressNotification {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void showError(Context context, String errorType, int langId, long line, String word) {
|
private void showError(Context context, String errorType, int langId, long line) {
|
||||||
Language lang = LanguageCollection.getLanguage(context, langId);
|
Language lang = LanguageCollection.getLanguage(context, langId);
|
||||||
|
|
||||||
if (lang == null || errorType.equals(InvalidLanguageException.class.getSimpleName())) {
|
if (lang == null || errorType.equals(InvalidLanguageException.class.getSimpleName())) {
|
||||||
message = resources.getString(R.string.add_word_invalid_language);
|
message = resources.getString(R.string.add_word_invalid_language);
|
||||||
} else if (errorType.equals(DictionaryImportException.class.getSimpleName()) || errorType.equals(InvalidLanguageCharactersException.class.getSimpleName())) {
|
} else if (errorType.equals(DictionaryImportException.class.getSimpleName()) || errorType.equals(InvalidLanguageCharactersException.class.getSimpleName())) {
|
||||||
message = resources.getString(R.string.dictionary_load_bad_char, word, line, lang.getName());
|
message = resources.getString(R.string.dictionary_load_bad_char, line, lang.getName());
|
||||||
} else if (errorType.equals(UnknownHostException.class.getSimpleName()) || errorType.equals(SocketException.class.getSimpleName())) {
|
} else if (errorType.equals(UnknownHostException.class.getSimpleName()) || errorType.equals(SocketException.class.getSimpleName())) {
|
||||||
message = resources.getString(R.string.dictionary_load_no_internet, lang.getName());
|
message = resources.getString(R.string.dictionary_load_no_internet, lang.getName());
|
||||||
} else if (errorType.equals(IOException.class.getSimpleName()) || errorType.equals(FileNotFoundException.class.getSimpleName())) {
|
} else if (errorType.equals(IOException.class.getSimpleName()) || errorType.equals(FileNotFoundException.class.getSimpleName())) {
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@
|
||||||
<string name="pref_status_icon">Икона за състояние</string>
|
<string name="pref_status_icon">Икона за състояние</string>
|
||||||
<string name="pref_status_icon_summary">Показвай икона, когато въвеждането с клавиатура е активно.</string>
|
<string name="pref_status_icon_summary">Показвай икона, когато въвеждането с клавиатура е активно.</string>
|
||||||
<string name="dictionary_cancel_load">Отмени зареждането</string>
|
<string name="dictionary_cancel_load">Отмени зареждането</string>
|
||||||
<string name="dictionary_load_bad_char">Неуспешно зареждане. Невалидна дума „%1$s“ на ред %2$d за език „%3$s“.</string>
|
<string name="dictionary_load_bad_char">Неуспешно зареждане. Невалидна дума на ред %1$d за език „%2$s“.</string>
|
||||||
<string name="dictionary_load_error">Несупешно зареждане на речник за език „%1$s“ (%2$s).</string>
|
<string name="dictionary_load_error">Несупешно зареждане на речник за език „%1$s“ (%2$s).</string>
|
||||||
<string name="dictionary_loaded">Зареждането на речник приключи.</string>
|
<string name="dictionary_loaded">Зареждането на речник приключи.</string>
|
||||||
<string name="dictionary_loading">Зареждане на речник (%1$s)…</string>
|
<string name="dictionary_loading">Зареждане на речник (%1$s)…</string>
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@
|
||||||
<string name="pref_upside_down_keys">Die Reihenfolge der Tasten umkehren</string>
|
<string name="pref_upside_down_keys">Die Reihenfolge der Tasten umkehren</string>
|
||||||
<string name="pref_upside_down_keys_summary">Aktivieren Sie, wenn die Tastatur in der ersten Zeile 7–8–9 anstelle von 1–2–3 hat.</string>
|
<string name="pref_upside_down_keys_summary">Aktivieren Sie, wenn die Tastatur in der ersten Zeile 7–8–9 anstelle von 1–2–3 hat.</string>
|
||||||
<string name="dictionary_cancel_load">Laden abbrechen</string>
|
<string name="dictionary_cancel_load">Laden abbrechen</string>
|
||||||
<string name="dictionary_load_bad_char">Laden fehlgeschlagen. Ungültiges Wort „%1$s“ in Zeile %2$d der Sprache „%3$s“.</string>
|
<string name="dictionary_load_bad_char">Laden fehlgeschlagen. Ungültiges Wort in Zeile %1$d der Sprache „%2$s“.</string>
|
||||||
<string name="dictionary_load_error">Fehler beim Laden des Wörterbuchs für die Sprache „%1$s“ (%2$s).</string>
|
<string name="dictionary_load_error">Fehler beim Laden des Wörterbuchs für die Sprache „%1$s“ (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Fehler beim Herunterladen des Wörterbuchs für die Sprache „%1$s“. Überprüfen Sie die Internetverbindung.</string>
|
<string name="dictionary_load_no_internet">Fehler beim Herunterladen des Wörterbuchs für die Sprache „%1$s“. Überprüfen Sie die Internetverbindung.</string>
|
||||||
<string name="dictionary_load_cancelled">Laden abgebrochen.</string>
|
<string name="dictionary_load_cancelled">Laden abgebrochen.</string>
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@
|
||||||
<string name="pref_auto_space">Espacio automático</string>
|
<string name="pref_auto_space">Espacio automático</string>
|
||||||
<string name="pref_auto_space_summary">Insertar un espacio automático después de palabras y signos de puntuación.</string>
|
<string name="pref_auto_space_summary">Insertar un espacio automático después de palabras y signos de puntuación.</string>
|
||||||
<string name="pref_double_zero_char">Carácter cuando se presiona \"0\" dos veces</string>
|
<string name="pref_double_zero_char">Carácter cuando se presiona \"0\" dos veces</string>
|
||||||
<string name="dictionary_load_bad_char">Error al cargar. Palabra inválida \"%1$s\" en la línea %2$d del idioma \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">Error al cargar. Palabra inválida en la línea %1$d del idioma \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">Error al cargar el diccionario para el idioma \"%1$s\" (%2$s).</string>
|
<string name="dictionary_load_error">Error al cargar el diccionario para el idioma \"%1$s\" (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Error al descargar el diccionario para el idioma \"%1$s\". Verifique la conexión a Internet.</string>
|
<string name="dictionary_load_no_internet">Error al descargar el diccionario para el idioma \"%1$s\". Verifique la conexión a Internet.</string>
|
||||||
<string name="dictionary_load_cancelled">Carga del diccionario cancelada.</string>
|
<string name="dictionary_load_cancelled">Carga del diccionario cancelada.</string>
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@
|
||||||
<string name="pref_category_delete_words">Supprimer des mots ajoutés</string>
|
<string name="pref_category_delete_words">Supprimer des mots ajoutés</string>
|
||||||
<string name="pref_category_hacks">Compatibilité</string>
|
<string name="pref_category_hacks">Compatibilité</string>
|
||||||
<string name="pref_category_appearance">Apparance</string>
|
<string name="pref_category_appearance">Apparance</string>
|
||||||
<string name="dictionary_load_bad_char">Echec du chargement. Mot inadmissible «%1$s» à la ligne %2$d de langue «%3$s».</string>
|
<string name="dictionary_load_bad_char">Echec du chargement. Mot inadmissible à la ligne %1$d de langue «%2$s».</string>
|
||||||
<string name="dictionary_truncated">Le dictionaire est supprimé avec succès.</string>
|
<string name="dictionary_truncated">Le dictionaire est supprimé avec succès.</string>
|
||||||
<string name="pref_hack_fb_messenger">Envoyer avec «OK» dans Facebook Messenger</string>
|
<string name="pref_hack_fb_messenger">Envoyer avec «OK» dans Facebook Messenger</string>
|
||||||
<string name="pref_hack_always_on_top">Toujours au premier plan</string>
|
<string name="pref_hack_always_on_top">Toujours au premier plan</string>
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@
|
||||||
<string name="pref_category_predictive_mode">Scrittura facilitata</string>
|
<string name="pref_category_predictive_mode">Scrittura facilitata</string>
|
||||||
<string name="pref_category_function_keys">Scorciatoie da tastiera</string>
|
<string name="pref_category_function_keys">Scorciatoie da tastiera</string>
|
||||||
<string name="dictionary_loading_indeterminate">Caricamento del dizionario</string>
|
<string name="dictionary_loading_indeterminate">Caricamento del dizionario</string>
|
||||||
<string name="dictionary_load_bad_char">Caricamento non riuscito. Parola non valida \"%1$s\" alla riga %2$d della lingua \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">Caricamento non riuscito. Parola non valida alla riga %1$d della lingua \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">Caricamento del dizionario per la lingua \"%1$s\" non riuscito (%2$s).</string>
|
<string name="dictionary_load_error">Caricamento del dizionario per la lingua \"%1$s\" non riuscito (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">"Download del dizionario per la lingua \"%1$s\" non riuscito. Controlla la connessione Internet. "</string>
|
<string name="dictionary_load_no_internet">"Download del dizionario per la lingua \"%1$s\" non riuscito. Controlla la connessione Internet. "</string>
|
||||||
<string name="dictionary_load_cancelled">Caricamento annullato.</string>
|
<string name="dictionary_load_cancelled">Caricamento annullato.</string>
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@
|
||||||
<string name="pref_upside_down_keys">להפוך את סדר המקשים</string>
|
<string name="pref_upside_down_keys">להפוך את סדר המקשים</string>
|
||||||
<string name="pref_upside_down_keys_summary">הפעל את ההגדרה אם המקלדת כוללת את המספרים 7-8-9 בשורה הראשונה, במקום 1-2-3.</string>
|
<string name="pref_upside_down_keys_summary">הפעל את ההגדרה אם המקלדת כוללת את המספרים 7-8-9 בשורה הראשונה, במקום 1-2-3.</string>
|
||||||
<string name="dictionary_cancel_load">ביטול טעינה</string>
|
<string name="dictionary_cancel_load">ביטול טעינה</string>
|
||||||
<string name="dictionary_load_bad_char">הטעינה נכשלה. מילה לא חוקית \"%1$s\" בשורה %2$d עבור \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">הטעינה נכשלה. מילה לא חוקית בשורה %1$d עבור \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">נכשלה טעינת המילון עבור \"%1$s\" (%2$s).</string>
|
<string name="dictionary_load_error">נכשלה טעינת המילון עבור \"%1$s\" (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">נכשל בהורדת המילון עבור השפה \"%1$s\". בדוק את חיבור האינטרנט.</string>
|
<string name="dictionary_load_no_internet">נכשל בהורדת המילון עבור השפה \"%1$s\". בדוק את חיבור האינטרנט.</string>
|
||||||
<string name="dictionary_load_cancelled">טעינת המילון בוטלה</string>
|
<string name="dictionary_load_cancelled">טעינת המילון בוטלה</string>
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,7 @@
|
||||||
<string name="pref_status_icon">Būsenos piktograma</string>
|
<string name="pref_status_icon">Būsenos piktograma</string>
|
||||||
<string name="pref_status_icon_summary">Rodyti piktogramą, kai aktyvus klaviatūros įvedimas</string>
|
<string name="pref_status_icon_summary">Rodyti piktogramą, kai aktyvus klaviatūros įvedimas</string>
|
||||||
<string name="dictionary_cancel_load">Atšaukti įkėlimą</string>
|
<string name="dictionary_cancel_load">Atšaukti įkėlimą</string>
|
||||||
<string name="dictionary_load_bad_char">Įkelti \"%3$s\" kalbos nepavyko. Klaida %2$d eilutėje, netinkamas žodis - \"%1$s\".</string>
|
<string name="dictionary_load_bad_char">Įkelti \"%2$s\" kalbos nepavyko. Klaida %1$d eilutėje, netinkamas žodis.</string>
|
||||||
<string name="dictionary_load_error">Klaida įkeliant \"%1$s\" kalbos žodyną (%2$s).</string>
|
<string name="dictionary_load_error">Klaida įkeliant \"%1$s\" kalbos žodyną (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Nepavyko atsisiųsti žodyno kalbai „%1$s“. Patikrinkite interneto ryšį.</string>
|
<string name="dictionary_load_no_internet">Nepavyko atsisiųsti žodyno kalbai „%1$s“. Patikrinkite interneto ryšį.</string>
|
||||||
<string name="dictionary_load_cancelled">Žodyno įkėlimas atšauktas.</string>
|
<string name="dictionary_load_cancelled">Žodyno įkėlimas atšauktas.</string>
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@
|
||||||
<string name="pref_upside_down_keys">De volgorde van de toetsen omkeren</string>
|
<string name="pref_upside_down_keys">De volgorde van de toetsen omkeren</string>
|
||||||
<string name="pref_upside_down_keys_summary">Activeer als het toetsenbord 7–8–9 op de eerste rij heeft, in plaats van 1–2–3.</string>
|
<string name="pref_upside_down_keys_summary">Activeer als het toetsenbord 7–8–9 op de eerste rij heeft, in plaats van 1–2–3.</string>
|
||||||
<string name="dictionary_cancel_load">Laden annuleren</string>
|
<string name="dictionary_cancel_load">Laden annuleren</string>
|
||||||
<string name="dictionary_load_bad_char">Laden mislukt. Ongeldig woord \"%1$s\" op regel %2$d van taal \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">Laden mislukt. Ongeldig woord op regel %1$d van taal \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">Het laden van het woordenboek voor de taal \"%1$s\" is mislukt (%2$s).</string>
|
<string name="dictionary_load_error">Het laden van het woordenboek voor de taal \"%1$s\" is mislukt (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Het downloaden van het woordenboek voor de taal \"%1$s\" is mislukt. Controleer de internetverbinding.</string>
|
<string name="dictionary_load_no_internet">Het downloaden van het woordenboek voor de taal \"%1$s\" is mislukt. Controleer de internetverbinding.</string>
|
||||||
<string name="dictionary_load_cancelled">Laden geannuleerd.</string>
|
<string name="dictionary_load_cancelled">Laden geannuleerd.</string>
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@
|
||||||
<string name="pref_status_icon">Ícone de status</string>
|
<string name="pref_status_icon">Ícone de status</string>
|
||||||
<string name="pref_status_icon_summary">Mostrar um ícone quando a digitação estiver ativa.</string>
|
<string name="pref_status_icon_summary">Mostrar um ícone quando a digitação estiver ativa.</string>
|
||||||
<string name="dictionary_cancel_load">Cancelar Carregamento</string>
|
<string name="dictionary_cancel_load">Cancelar Carregamento</string>
|
||||||
<string name="dictionary_load_bad_char">Falha no carregamento. \"%1$s\" na linha %2$d do idioma \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">Falha no carregamento. Palavra inválida na linha %1$d do idioma \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">Falha no carregamento do dicionário para o idioma \"%1$s\" (%2$s).</string>
|
<string name="dictionary_load_error">Falha no carregamento do dicionário para o idioma \"%1$s\" (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Falha ao baixar o dicionário para o idioma \"%1$s\". Verifique a conexão com a Internet.</string>
|
<string name="dictionary_load_no_internet">Falha ao baixar o dicionário para o idioma \"%1$s\". Verifique a conexão com a Internet.</string>
|
||||||
<string name="dictionary_load_cancelled">Carregamento de dicionário cancelado.</string>
|
<string name="dictionary_load_cancelled">Carregamento de dicionário cancelado.</string>
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@
|
||||||
<string name="pref_auto_text_case_summary">Автоматически начинать предложение с заглавной буквы.</string>
|
<string name="pref_auto_text_case_summary">Автоматически начинать предложение с заглавной буквы.</string>
|
||||||
<string name="pref_double_zero_char">Символ при двойном нажатии клавиши 0</string>
|
<string name="pref_double_zero_char">Символ при двойном нажатии клавиши 0</string>
|
||||||
<string name="pref_hack_fb_messenger">Отправка с «ОК» в Messenger</string>
|
<string name="pref_hack_fb_messenger">Отправка с «ОК» в Messenger</string>
|
||||||
<string name="dictionary_load_bad_char">Не удалось загрузить словарь. Проблема в слове «%1$s» в строке %2$d для языка «%3$s».</string>
|
<string name="dictionary_load_bad_char">Не удалось загрузить словарь. Проблема в слове в строке %1$d для языка «%2$s».</string>
|
||||||
<string name="function_backspace">Стереть</string>
|
<string name="function_backspace">Стереть</string>
|
||||||
<string name="dictionary_no_notifications">Уведомления словаря</string>
|
<string name="dictionary_no_notifications">Уведомления словаря</string>
|
||||||
<string name="dictionary_no_notifications_summary">Получать уведомления о обновлениях словаря и о процессе загрузки.</string>
|
<string name="dictionary_no_notifications_summary">Получать уведомления о обновлениях словаря и о процессе загрузки.</string>
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@
|
||||||
<string name="pref_status_icon">Durum</string>
|
<string name="pref_status_icon">Durum</string>
|
||||||
<string name="pref_status_icon_summary">Klavye girişi etkin olduğunda bir simge göster.</string>
|
<string name="pref_status_icon_summary">Klavye girişi etkin olduğunda bir simge göster.</string>
|
||||||
<string name="dictionary_cancel_load">Yüklemeyi İptal Et</string>
|
<string name="dictionary_cancel_load">Yüklemeyi İptal Et</string>
|
||||||
<string name="dictionary_load_bad_char">Yükleme başarısız. \"%1$s\" sözcüğü \"%3$s\" dilinin %2$d satırında geçersiz.</string>
|
<string name="dictionary_load_bad_char">Yükleme başarısız. \"%2$s\" dilinde %1$d. satırda geçersiz kelime.</string>
|
||||||
<string name="dictionary_load_error">“%1$s” dili için sözlük yüklenemedi (%2$s).</string>
|
<string name="dictionary_load_error">“%1$s” dili için sözlük yüklenemedi (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">“%1$s” dili için sözlük indirilemedi. İnternet bağlantısını kontrol edin.</string>
|
<string name="dictionary_load_no_internet">“%1$s” dili için sözlük indirilemedi. İnternet bağlantısını kontrol edin.</string>
|
||||||
<string name="dictionary_load_cancelled">Yükleme iptal edildi.</string>
|
<string name="dictionary_load_cancelled">Yükleme iptal edildi.</string>
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@
|
||||||
<string name="pref_status_icon">Іконка статусу</string>
|
<string name="pref_status_icon">Іконка статусу</string>
|
||||||
<string name="pref_status_icon_summary">Показати іконку, коли активне введення з клавіатури.</string>
|
<string name="pref_status_icon_summary">Показати іконку, коли активне введення з клавіатури.</string>
|
||||||
<string name="dictionary_cancel_load">Скасувати завантаження</string>
|
<string name="dictionary_cancel_load">Скасувати завантаження</string>
|
||||||
<string name="dictionary_load_bad_char">Завантаження не вдалося. Невірне слово \"%1$s\" у рядку %2$d мови \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">Завантаження не вдалося. Невірне слово у рядку %1$d мови \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">Не вдалося завантажити словник для мови \"%1$s\" (%2$s).</string>
|
<string name="dictionary_load_error">Не вдалося завантажити словник для мови \"%1$s\" (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Не вдалося завантажити словник для мови \"%1$s\". Перевірте підключення до Інтернету.</string>
|
<string name="dictionary_load_no_internet">Не вдалося завантажити словник для мови \"%1$s\". Перевірте підключення до Інтернету.</string>
|
||||||
<string name="dictionary_load_cancelled">Завантаження словника скасовано.</string>
|
<string name="dictionary_load_cancelled">Завантаження словника скасовано.</string>
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<resources>
|
<resources>
|
||||||
<string translatable="false" name="dictionary_url">https://raw.githubusercontent.com/sspanak/tt9/%1$s/app/%2$s</string>
|
<string translatable="false" name="dictionary_url">https://raw.githubusercontent.com/sspanak/tt9/%1$s/downloads/%2$s</string>
|
||||||
<string name="app_name" translatable="false">Traditional T9</string>
|
<string name="app_name" translatable="false">Traditional T9</string>
|
||||||
<string name="app_name_short" translatable="false">TT9</string>
|
<string name="app_name_short" translatable="false">TT9</string>
|
||||||
<string name="app_settings">TT9 Settings</string>
|
<string name="app_settings">TT9 Settings</string>
|
||||||
|
|
@ -90,7 +90,7 @@
|
||||||
<string name="pref_upside_down_keys_summary">Enable if the keypad has 7–8–9 on the first row, instead of 1–2–3.</string>
|
<string name="pref_upside_down_keys_summary">Enable if the keypad has 7–8–9 on the first row, instead of 1–2–3.</string>
|
||||||
|
|
||||||
<string name="dictionary_cancel_load">Cancel Loading</string>
|
<string name="dictionary_cancel_load">Cancel Loading</string>
|
||||||
<string name="dictionary_load_bad_char">Loading failed. Invalid word \"%1$s\" on line %2$d of language \"%3$s\".</string>
|
<string name="dictionary_load_bad_char">Loading failed. Invalid word on line %1$d of language \"%2$s\".</string>
|
||||||
<string name="dictionary_load_error">Failed loading the dictionary for language \"%1$s\" (%2$s).</string>
|
<string name="dictionary_load_error">Failed loading the dictionary for language \"%1$s\" (%2$s).</string>
|
||||||
<string name="dictionary_load_no_internet">Failed downloading the dictionary for language \"%1$s\". Check the Internet connection.</string>
|
<string name="dictionary_load_no_internet">Failed downloading the dictionary for language \"%1$s\". Check the Internet connection.</string>
|
||||||
<string name="dictionary_load_cancelled">Dictionary load cancelled.</string>
|
<string name="dictionary_load_cancelled">Dictionary load cancelled.</string>
|
||||||
|
|
|
||||||
|
|
@ -1,58 +1,59 @@
|
||||||
static def validateDictionaryWord(String word, int lineNumber, String validCharacters, String errorMsgPrefix) {
|
apply from: 'dictionary-tools.gradle'
|
||||||
|
|
||||||
|
|
||||||
|
ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
def errors = ''
|
|
||||||
|
|
||||||
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
def errorStream = fileTree(dir: definitionsDir).getFiles().parallelStream().map { definition ->
|
||||||
errorCount++
|
if (errorCount >= MAX_ERRORS) {
|
||||||
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
return "Too many errors! Skipping: ${definition}\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (word.matches("^\\P{L}+\$")) {
|
def (alphabet, sounds, isAlphabeticLanguage, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageDefintion(definition, dictionariesDir)
|
||||||
errorCount++
|
|
||||||
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
def languageHash = DictionaryTools.getLanguageHash(definition, dictionaryFile)
|
||||||
|
def validationFile = new File("${validationDir}/${definition.name.replace(".yml", "")}.txt")
|
||||||
|
|
||||||
|
errorCount += langFileErrorCount
|
||||||
|
if (!langFileErrorMsg.isEmpty()) {
|
||||||
|
validationFile.text = "${languageHash} INVALID"
|
||||||
|
return langFileErrorMsg
|
||||||
}
|
}
|
||||||
|
|
||||||
if (word.matches("^(.|\\p{L}\\p{M}?)\$")) {
|
if (validationFile.exists() && validationFile.text == "${languageHash} OK") {
|
||||||
errorCount++
|
return ""
|
||||||
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (errorCount == 0 && !word.matches(validCharacters)) {
|
def (dictionaryErrorCount, dictionaryErrorMesages) = validateDictionary(dictionaryFile, alphabet, sounds, isAlphabeticLanguage, locale, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
|
||||||
errorCount++
|
errorCount += dictionaryErrorCount
|
||||||
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contains characters outside of the defined alphabet: $validCharacters.\n"
|
if (!dictionaryErrorMesages.isEmpty()) {
|
||||||
|
validationFile.text = "${languageHash} INVALID"
|
||||||
|
return dictionaryErrorMesages
|
||||||
}
|
}
|
||||||
|
|
||||||
return [errorCount, errors]
|
validationFile.text = "${languageHash} OK"
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
String errorsMsg = errorStream.reduce("", String::concat)
|
||||||
|
if (errorsMsg) {
|
||||||
|
throw new GradleException(errorsMsg)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def validateDictionaryLine(String line, int lineNumber) {
|
ext.parseLanguageDefintion = { File languageFile, String dictionariesDir ->
|
||||||
if (line == "") {
|
|
||||||
return "There is no word on line ${lineNumber}. Remove all empty lines."
|
|
||||||
} else if (line.contains(" ")) {
|
|
||||||
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed."
|
|
||||||
}
|
|
||||||
|
|
||||||
return ''
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static def extractAlphabetCharsFromLine(String line) {
|
|
||||||
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !line.matches('\\s+- \\[.+?\\].*')) {
|
|
||||||
return ''
|
|
||||||
}
|
|
||||||
|
|
||||||
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
|
||||||
String alphabet = ""
|
String alphabet = ""
|
||||||
|
int layoutKey = 0
|
||||||
|
HashMap<String, String> sounds = new HashMap<>()
|
||||||
|
HashMap<String, String> layoutSounds = new HashMap<>()
|
||||||
|
|
||||||
File dictionaryFile
|
File dictionaryFile
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
String errorMsg = ""
|
String errorMsg = ""
|
||||||
|
|
||||||
boolean hasLayout = false
|
boolean hasLayout = false
|
||||||
|
boolean hasSounds = false
|
||||||
boolean isLocaleValid = false
|
boolean isLocaleValid = false
|
||||||
String localeString = ""
|
String localeString = ""
|
||||||
String dictionaryFileName = ""
|
String dictionaryFileName = ""
|
||||||
|
|
@ -70,6 +71,7 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
||||||
&& !line.startsWith("layout")
|
&& !line.startsWith("layout")
|
||||||
&& !line.startsWith("locale")
|
&& !line.startsWith("locale")
|
||||||
&& !line.startsWith("name")
|
&& !line.startsWith("name")
|
||||||
|
&& !line.startsWith("sounds")
|
||||||
) {
|
) {
|
||||||
def parts = line.split(":")
|
def parts = line.split(":")
|
||||||
def property = parts.length > 0 ? parts[0] : line
|
def property = parts.length > 0 ? parts[0] : line
|
||||||
|
|
@ -92,6 +94,10 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
||||||
hasLayout = true
|
hasLayout = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (line.startsWith("sounds")) {
|
||||||
|
hasSounds = true
|
||||||
|
}
|
||||||
|
|
||||||
if (line.startsWith("locale")) {
|
if (line.startsWith("locale")) {
|
||||||
localeString = line.replace("locale:", "").trim()
|
localeString = line.replace("locale:", "").trim()
|
||||||
isLocaleValid = line.matches("^locale:\\s*[a-z]{2}(?:-[A-Z]{2})?")
|
isLocaleValid = line.matches("^locale:\\s*[a-z]{2}(?:-[A-Z]{2})?")
|
||||||
|
|
@ -101,8 +107,32 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
||||||
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
dictionaryFileName = line.replace("dictionaryFile:", "").trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// alphabet string
|
||||||
def lineCharacters = extractAlphabetCharsFromLine(line)
|
def lineCharacters = extractAlphabetCharsFromLine(line)
|
||||||
alphabet += lineCharacters
|
alphabet += lineCharacters
|
||||||
|
|
||||||
|
// sounds, single letters
|
||||||
|
if (lineCharacters) {
|
||||||
|
lineCharacters.each { letter ->
|
||||||
|
layoutSounds.put(letter, layoutKey.toString())
|
||||||
|
}
|
||||||
|
} else if (line.contains("PUNCTUATION")) {
|
||||||
|
layoutSounds.put("-", layoutKey.toString())
|
||||||
|
layoutSounds.put(".", layoutKey.toString())
|
||||||
|
layoutSounds.put("'", layoutKey.toString())
|
||||||
|
layoutSounds.put('"', layoutKey.toString())
|
||||||
|
layoutSounds.put('·', layoutKey.toString())
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLayoutLine(line)) {
|
||||||
|
layoutKey++
|
||||||
|
}
|
||||||
|
|
||||||
|
// sounds, syllables
|
||||||
|
def (sound, sequence) = extractSoundFromLine(line)
|
||||||
|
if (!sound.isEmpty() && !sequence.isEmpty()) {
|
||||||
|
sounds.put(sound, sequence)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hasLayout) {
|
if (!hasLayout) {
|
||||||
|
|
@ -115,6 +145,11 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
||||||
errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
errorMsg += "Language '${languageFile.name}' is invalid. No language characters found. Make sure 'layout' contains series of characters per each key in the format: ' - [a, b, c]' and so on\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (hasSounds && sounds.isEmpty()) {
|
||||||
|
errorCount++
|
||||||
|
errorMsg += "Language '${languageFile.name}' is invalid. 'sounds' property must contain series of phonetic transcriptions per digit sequence in the format: ' - [Yae,1221]' and so on.\n"
|
||||||
|
}
|
||||||
|
|
||||||
if (!isLocaleValid) {
|
if (!isLocaleValid) {
|
||||||
errorCount++
|
errorCount++
|
||||||
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
def msg = localeString.isEmpty() ? "Missing 'locale' property." : "Unrecognized locale format: '${localeString}'"
|
||||||
|
|
@ -130,112 +165,147 @@ static def parseLanguageFile(File languageFile, String dictionariesDir) {
|
||||||
String[] localeParts = localeString.split(("[-_]"))
|
String[] localeParts = localeString.split(("[-_]"))
|
||||||
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
|
Locale locale = new Locale(localeParts[0], localeParts.length > 1 ? localeParts[1] : "")
|
||||||
|
|
||||||
return [alphabet, locale, dictionaryFile, errorCount, errorMsg]
|
if (!hasSounds && locale != null) {
|
||||||
|
layoutSounds.forEach { sound, sequence ->
|
||||||
|
sounds.put(sound.toUpperCase(locale), sequence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [alphabet, sounds, !hasSounds, locale, dictionaryFile, errorCount, errorMsg]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static def parseDictionaryFile(String alphabet, Locale locale, File dictionaryFile, int MAX_ERRORS, String CSV_DELIMITER, int MAX_WORD_FREQUENCY) {
|
// this cannot be static, because DictionaryTools will not be visible
|
||||||
final GEOGRAPHICAL_NAME = ~"[A-Z]\\w+-[^\\n]+"
|
def validateDictionary(File dictionaryFile, String alphabet, HashMap<String, String> sounds, boolean isAlphabeticLanguage, Locale locale, int maxErrors, String csvDelimiter, int maxWordFrequency) {
|
||||||
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase(locale)}\\-\\.']+\$"
|
final VALID_CHARS = alphabet.toUpperCase(locale) == alphabet ? "^[${alphabet}\\-\\.']+\$" : "^[${alphabet}${alphabet.toUpperCase(locale)}\\-\\.']+\$"
|
||||||
final int MAX_SORTING_ERRORS = Math.ceil(MAX_ERRORS / 10)
|
|
||||||
|
|
||||||
def uniqueWords = [:]
|
|
||||||
|
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
int sortingErrorCount = 0
|
String errorMsg = ''
|
||||||
String errorMsg = ""
|
Set<String> uniqueWords = new HashSet<>()
|
||||||
|
|
||||||
def fileContents = dictionaryFile.readLines()
|
List<String> fileContents = dictionaryFile.readLines()
|
||||||
for (int lineNumber = 1, previousWordLength = 0; lineNumber <= fileContents.size() && errorCount < MAX_ERRORS; lineNumber++) {
|
for (int lineNumber = 1; lineNumber <= fileContents.size() && errorCount < maxErrors; lineNumber++) {
|
||||||
String line = fileContents.get(lineNumber - 1)
|
String line = fileContents.get(lineNumber - 1)
|
||||||
|
boolean lineHasErrors = false
|
||||||
|
|
||||||
String error = validateDictionaryLine(line, lineNumber)
|
String whiteSpaceError = validateNoWhitespace(line, lineNumber)
|
||||||
if (!error.isEmpty()) {
|
if (whiteSpaceError) {
|
||||||
|
lineHasErrors = true
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. ${error}\n"
|
errorMsg += whiteSpaceError
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] parts = line.split(CSV_DELIMITER, 2)
|
def (word, transcription, frequency) = DictionaryTools.getDictionaryLineData(line, csvDelimiter)
|
||||||
String word = parts[0]
|
|
||||||
int frequency
|
|
||||||
try {
|
|
||||||
frequency = (parts.length > 1 ? parts[1] : "0") as int
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
frequency = -1
|
|
||||||
}
|
|
||||||
|
|
||||||
if (frequency < 0 || frequency > MAX_WORD_FREQUENCY) {
|
String frequencyError = validateFrequency(frequency, maxWordFrequency, dictionaryFile.name, lineNumber)
|
||||||
|
if (frequencyError) {
|
||||||
|
lineHasErrors = true
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found out-of-range word frequency: '${parts[1]}' on line ${lineNumber}. Frequency must be an integer between 0 and ${MAX_WORD_FREQUENCY}.\n"
|
errorMsg += frequencyError
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sortingErrorCount < MAX_SORTING_ERRORS && word.length() < previousWordLength) {
|
def (wordErrorCount, wordErrors) = validateWord(word, VALID_CHARS, isAlphabeticLanguage, lineNumber, "Dictionary '${dictionaryFile.name}' is invalid")
|
||||||
sortingErrorCount++
|
if (wordErrorCount > 0) {
|
||||||
errorCount++
|
|
||||||
|
|
||||||
if (sortingErrorCount == MAX_SORTING_ERRORS) {
|
|
||||||
errorMsg += "Too many sorting errors in '${dictionaryFile.name}'. Disabling sorting check until the end of the file.\n"
|
|
||||||
} else {
|
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is not sorted. Word: '${word}' on line ${lineNumber} is shorter than the previous one. Ensure all words are sorted by length and sequence.\n"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
previousWordLength = word.length()
|
|
||||||
|
|
||||||
def (wordErrorCount, wordErrors) = validateDictionaryWord(word, lineNumber, VALID_CHARS, "Dictionary '${dictionaryFile.name}' is invalid")
|
|
||||||
errorCount += wordErrorCount
|
errorCount += wordErrorCount
|
||||||
errorMsg += wordErrors
|
errorMsg += wordErrors
|
||||||
|
}
|
||||||
|
|
||||||
String uniqueWordKey = word ==~ GEOGRAPHICAL_NAME ? word : word.toLowerCase(locale)
|
if (uniqueWords.contains(word)) {
|
||||||
if (uniqueWords[uniqueWordKey] != null && uniqueWords[uniqueWordKey] == true) {
|
lineHasErrors = true
|
||||||
errorCount++
|
errorCount++
|
||||||
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found a repeating word: '${word}' on line ${lineNumber}. Ensure all words appear only once.\n"
|
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Found duplicate word: '${word}' on line ${lineNumber}. Remove all duplicates.\n"
|
||||||
} else {
|
} else {
|
||||||
uniqueWords[uniqueWordKey] = true
|
uniqueWords.add(word)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lineHasErrors) {
|
||||||
|
// the validations below make no sense if the previous ones have failed
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
def transcribedWord = transcription.isEmpty() ? word : transcription
|
||||||
|
DictionaryTools.wordToDigitSequence(locale, transcribedWord, sounds, !transcription.isEmpty())
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
errorCount++
|
||||||
|
errorMsg += "Dictionary '${dictionaryFile.name}' is invalid. Failed generating digit sequence for word '${word}' on line ${lineNumber}. ${e.message}\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [errorMsg, errorCount]
|
return [errorCount, errorMsg]
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////// PARSING ////////////////////
|
||||||
|
|
||||||
|
static def extractAlphabetCharsFromLine(String line) {
|
||||||
|
if (line.contains('PUNCTUATION') || line.contains('SPECIAL') || !isLayoutLine(line)) {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
return line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(',', '').replace(' ', '')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ext.validateLanguageFiles = { definitionsDir, dictionariesDir, validationDir ->
|
static def extractSoundFromLine(String line) {
|
||||||
|
if (!line.matches('\\s+- \\[\\w+\\s*,\\s*\\d+\\].*')) {
|
||||||
|
return ['', '']
|
||||||
|
}
|
||||||
|
|
||||||
|
def cleanLine = line.replaceFirst('^\\s+- \\[', '').replaceFirst('\\].*', '').replace(' ', '')
|
||||||
|
def parts = cleanLine.split(',')
|
||||||
|
return parts.length > 1 ? [parts[0], parts[1]] : ['', '']
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static def isLayoutLine(String line) {
|
||||||
|
return line.matches('\\s+- \\[.+?\\].*') && !line.find('\\d+]')
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////// VALIDATION ////////////////////
|
||||||
|
|
||||||
|
static def validateNoWhitespace(String line, int lineNumber) {
|
||||||
|
if (line == "") {
|
||||||
|
return "There is no word on line ${lineNumber}. Remove all empty lines.\n"
|
||||||
|
} else if (line.contains(" ")) {
|
||||||
|
return "Found space on line ${lineNumber}. Make sure each word is on a new line. Phrases are not allowed.\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static def validateFrequency(int frequency, int maxFrequency, String dictionaryFileName, int lineNumber) {
|
||||||
|
if (frequency < 0 || frequency > maxFrequency) {
|
||||||
|
return "Dictionary '${dictionaryFileName}' is invalid. Found out-of-range word frequency: '${frequency}' on line ${lineNumber}. Frequency must be an integer between 0 and ${maxFrequency}.\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static def validateWord(String word, String validCharacters, boolean isAlphabeticLanguage, int lineNumber, String errorMsgPrefix) {
|
||||||
int errorCount = 0
|
int errorCount = 0
|
||||||
|
def errors = ''
|
||||||
|
|
||||||
def errorStream = fileTree(definitionsDir).getFiles().parallelStream().map { File languageFile ->
|
if (word.matches("(\\d.+?|.+?\\d|\\d)")) {
|
||||||
def contentHash = languageFile.text.digest("SHA-1")
|
errorCount++
|
||||||
def outputFile = new File("${validationDir}/${languageFile.name.replace(".yml", "")}.txt")
|
errors += "${errorMsgPrefix}. Found numbers on line ${lineNumber}. Remove all numbers.\n"
|
||||||
|
|
||||||
if (outputFile.exists() && outputFile.text == "${contentHash} OK") {
|
|
||||||
return ""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
outputFile.text = ""
|
if (word.matches("^\\P{L}+\$")) {
|
||||||
|
errorCount++
|
||||||
if (errorCount >= MAX_ERRORS) {
|
errors += "${errorMsgPrefix}. Found a garbage word: '${word}' on line ${lineNumber}.\n"
|
||||||
return "Too many errors! Skipping: ${languageFile}\n"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def (alphabet, locale, dictionaryFile, langFileErrorCount, langFileErrorMsg) = parseLanguageFile(languageFile, dictionariesDir)
|
if (isAlphabeticLanguage && word.matches("^(.|\\p{L}\\p{M}?)\$")) {
|
||||||
errorCount += langFileErrorCount
|
errorCount++
|
||||||
if (!langFileErrorMsg.isEmpty()) {
|
errors += "${errorMsgPrefix}. Found a single letter: '${word}' on line ${lineNumber}. Only uppercase single letters are allowed. The rest of the alphabet will be added automatically.\n"
|
||||||
outputFile.text += "${contentHash} INVALID"
|
|
||||||
return langFileErrorMsg
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def (dictionaryErrorMsg, dictionaryErrorCount) = parseDictionaryFile(alphabet, locale, dictionaryFile, MAX_ERRORS, CSV_DELIMITER, MAX_WORD_FREQUENCY)
|
if (errorCount == 0 && isAlphabeticLanguage && !word.matches(validCharacters)) {
|
||||||
errorCount += dictionaryErrorCount
|
errorCount++
|
||||||
if (!dictionaryErrorMsg.isEmpty()) {
|
errors += "${errorMsgPrefix}. Word '${word}' on line ${lineNumber} contains characters outside of the defined alphabet: $validCharacters.\n"
|
||||||
outputFile.text += "${contentHash} INVALID"
|
|
||||||
return dictionaryErrorMsg
|
|
||||||
}
|
}
|
||||||
|
|
||||||
outputFile.text += "${contentHash} OK"
|
return [errorCount, errors]
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
String errorsMsg = errorStream.reduce("", String::concat)
|
|
||||||
if (errorsMsg) {
|
|
||||||
throw new GradleException(errorsMsg)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
BIN
downloads/ar-utf8.zip
Normal file
BIN
downloads/ar-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/bg-utf8.zip
Normal file
BIN
downloads/bg-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/ca-utf8.zip
Normal file
BIN
downloads/ca-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/cz-utf8.zip
Normal file
BIN
downloads/cz-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/da-utf8.zip
Normal file
BIN
downloads/da-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/de-utf8.zip
Normal file
BIN
downloads/de-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/en-utf8.zip
Normal file
BIN
downloads/en-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/es-utf8.zip
Normal file
BIN
downloads/es-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/fi-utf8.zip
Normal file
BIN
downloads/fi-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/fr-utf8.zip
Normal file
BIN
downloads/fr-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/gr-utf8.zip
Normal file
BIN
downloads/gr-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/he-utf8.zip
Normal file
BIN
downloads/he-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/hing-utf8.zip
Normal file
BIN
downloads/hing-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/hr-utf8.zip
Normal file
BIN
downloads/hr-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/hu-utf8.zip
Normal file
BIN
downloads/hu-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/id-utf8.zip
Normal file
BIN
downloads/id-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/it-utf8.zip
Normal file
BIN
downloads/it-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/ji-utf8.zip
Normal file
BIN
downloads/ji-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/lt-utf8.zip
Normal file
BIN
downloads/lt-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/nb-utf8.zip
Normal file
BIN
downloads/nb-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/nl-utf8.zip
Normal file
BIN
downloads/nl-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/pl-utf8.zip
Normal file
BIN
downloads/pl-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/pt-BR-utf8.zip
Normal file
BIN
downloads/pt-BR-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/pt-PT-utf8.zip
Normal file
BIN
downloads/pt-PT-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/ro-utf8.zip
Normal file
BIN
downloads/ro-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/ru-utf8.zip
Normal file
BIN
downloads/ru-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/sv-utf8.zip
Normal file
BIN
downloads/sv-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/sw-utf8.zip
Normal file
BIN
downloads/sw-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/th-utf8.zip
Normal file
BIN
downloads/th-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/tr-utf8.zip
Normal file
BIN
downloads/tr-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/uk-utf8.zip
Normal file
BIN
downloads/uk-utf8.zip
Normal file
Binary file not shown.
BIN
downloads/vi-utf8.zip
Normal file
BIN
downloads/vi-utf8.zip
Normal file
Binary file not shown.
|
|
@ -6,7 +6,7 @@
|
||||||
# http://www.gradle.org/docs/current/userguide/build_environment.html
|
# http://www.gradle.org/docs/current/userguide/build_environment.html
|
||||||
# Specifies the JVM arguments used for the daemon process.
|
# Specifies the JVM arguments used for the daemon process.
|
||||||
# The setting is particularly useful for tweaking memory settings.
|
# The setting is particularly useful for tweaking memory settings.
|
||||||
org.gradle.jvmargs=-Xmx4096m -Dfile.encoding=UTF-8
|
org.gradle.jvmargs=-Xmx6192m -Dfile.encoding=UTF-8
|
||||||
# When configured, Gradle will run in incubating parallel mode.
|
# When configured, Gradle will run in incubating parallel mode.
|
||||||
# This option should only be used with decoupled projects. More details, visit
|
# This option should only be used with decoupled projects. More details, visit
|
||||||
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
|
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue