1
0
Fork 0

Dictionaries update (#80)

* added missing words to the Bulgarian dictionary

* English dictionary update

* removed repeating words from the Italian and Bulgarian dictionaries

* fixed incorrectly broken words and removed repeating ones from the Ukrainian dictionary

* Russian dictionary update

* documentation update

* made it possible to type words with apostrophes (Dutch, English and Ukrainian)
This commit is contained in:
Dimo Karaivanov 2022-10-24 13:32:31 +03:00 committed by GitHub
parent 6c19edc8a3
commit 8b67929a07
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 187613 additions and 57933 deletions

View file

@ -0,0 +1,60 @@
const { basename } = require('path');
const { createReadStream, existsSync } = require('fs');
function printHelp() {
console.log(`Usage ${basename(process.argv[1])} LOCALE FILENAME.txt `);
console.log('Removes repeating words from a word list');
console.log('\nLocale could any valid JS locale, for exmaple: en, en-US, etc...');
}
function validateInput() {
if (process.argv.length < 4) {
printHelp();
process.exit(1);
}
if (!existsSync(process.argv[3])) {
console.error(`Failure! Could not find file "${process.argv[3]}."`);
process.exit(2);
}
return { fileName: process.argv[3], locale: process.argv[2] };
}
async function removeRepeatingWords({ fileName, locale }) {
const lineReader = require('readline').createInterface({
input: createReadStream(fileName)
});
const geographicalName = /[A-Z]\w+\-[^\n]+/;
const wordMap = {};
for await (const line of lineReader) {
const wordKey = geographicalName.test(line) ? line : line.toLocaleLowerCase(locale);
wordMap[wordKey] = true
}
return Object.keys(wordMap);
}
function printWords(wordList) {
if (!Array.isArray(wordList)) {
return;
}
wordList.forEach(w => console.log(w));
}
/** main **/
removeRepeatingWords(validateInput()).then(words => printWords(words));