1
0
Fork 0

New dictionary loader (#89)

* new, simpler (and hopefully, more efficient) dictionary loader

* no more dict.properties

* dictionaries are now validated during the build process

* TraditionalT9Settings code cleanup and code style improvements

* removed English, French, Italian, Russian repeating words

* removed invalid and repeating German words
This commit is contained in:
Dimo Karaivanov 2022-10-27 14:31:57 +03:00 committed by GitHub
parent 0ac7ec1790
commit 10099f1c37
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 534 additions and 1855 deletions

View file

@ -2,6 +2,8 @@ const { basename } = require('path');
const { createReadStream, existsSync } = require('fs');
const GEO_NAME = /[A-Z]\w+\-[^\n]+/;
function printHelp() {
console.log(`Usage ${basename(process.argv[1])} LOCALE FILENAME.txt `);
@ -28,17 +30,44 @@ function validateInput() {
function getRegularWordKey(locale, word) {
if (typeof word !== 'string' || word.length === 0) {
return '';
}
return GEO_NAME.test(word) ? word : word.toLocaleLowerCase(locale);
}
function getWordKeyPreservingCaptialization(locale, word, wordMap) {
if (typeof word !== 'string' || word.length === 0 || typeof wordMap !== 'object') {
return '';
}
let wordKey = word.toLocaleLowerCase(locale);
if (GEO_NAME.test(word) || word.toLocaleLowerCase(locale) !== word) {
wordKey = word;
if (wordMap[word.toLocaleLowerCase(locale)]) {
delete wordMap[word.toLocaleLowerCase(locale)];
}
}
return wordKey;
}
async function removeRepeatingWords({ fileName, locale }) {
const lineReader = require('readline').createInterface({
input: createReadStream(fileName)
});
const geographicalName = /[A-Z]\w+\-[^\n]+/;
const wordMap = {};
for await (const line of lineReader) {
const wordKey = geographicalName.test(line) ? line : line.toLocaleLowerCase(locale);
wordMap[wordKey] = true
wordMap[getWordKeyPreservingCaptialization(locale, line, wordMap)] = true;
}
return Object.keys(wordMap);
@ -57,4 +86,6 @@ function printWords(wordList) {
/** main **/
removeRepeatingWords(validateInput()).then(words => printWords(words));
removeRepeatingWords(validateInput())
.then(words => printWords(words))
.catch(e => console.error(e));