New dictionary loader (#89)
* new, simpler (and hopefully, more efficient) dictionary loader * no more dict.properties * dictionaries are now validated during the build process * TraditionalT9Settings code cleanup and code style improvements * removed English, French, Italian, Russian repeating words * removed invalid and repeating German words
This commit is contained in:
parent
0ac7ec1790
commit
10099f1c37
24 changed files with 534 additions and 1855 deletions
|
|
@ -2,6 +2,8 @@ const { basename } = require('path');
|
|||
const { createReadStream, existsSync } = require('fs');
|
||||
|
||||
|
||||
const GEO_NAME = /[A-Z]\w+\-[^\n]+/;
|
||||
|
||||
|
||||
function printHelp() {
|
||||
console.log(`Usage ${basename(process.argv[1])} LOCALE FILENAME.txt `);
|
||||
|
|
@ -28,17 +30,44 @@ function validateInput() {
|
|||
|
||||
|
||||
|
||||
function getRegularWordKey(locale, word) {
|
||||
if (typeof word !== 'string' || word.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return GEO_NAME.test(word) ? word : word.toLocaleLowerCase(locale);
|
||||
}
|
||||
|
||||
|
||||
|
||||
function getWordKeyPreservingCaptialization(locale, word, wordMap) {
|
||||
if (typeof word !== 'string' || word.length === 0 || typeof wordMap !== 'object') {
|
||||
return '';
|
||||
}
|
||||
|
||||
let wordKey = word.toLocaleLowerCase(locale);
|
||||
|
||||
if (GEO_NAME.test(word) || word.toLocaleLowerCase(locale) !== word) {
|
||||
wordKey = word;
|
||||
if (wordMap[word.toLocaleLowerCase(locale)]) {
|
||||
delete wordMap[word.toLocaleLowerCase(locale)];
|
||||
}
|
||||
}
|
||||
|
||||
return wordKey;
|
||||
}
|
||||
|
||||
|
||||
|
||||
async function removeRepeatingWords({ fileName, locale }) {
|
||||
const lineReader = require('readline').createInterface({
|
||||
input: createReadStream(fileName)
|
||||
});
|
||||
|
||||
const geographicalName = /[A-Z]\w+\-[^\n]+/;
|
||||
const wordMap = {};
|
||||
|
||||
for await (const line of lineReader) {
|
||||
const wordKey = geographicalName.test(line) ? line : line.toLocaleLowerCase(locale);
|
||||
wordMap[wordKey] = true
|
||||
wordMap[getWordKeyPreservingCaptialization(locale, line, wordMap)] = true;
|
||||
}
|
||||
|
||||
return Object.keys(wordMap);
|
||||
|
|
@ -57,4 +86,6 @@ function printWords(wordList) {
|
|||
|
||||
|
||||
/** main **/
|
||||
removeRepeatingWords(validateInput()).then(words => printWords(words));
|
||||
removeRepeatingWords(validateInput())
|
||||
.then(words => printWords(words))
|
||||
.catch(e => console.error(e));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue