112 lines
No EOL
2.3 KiB
JavaScript
112 lines
No EOL
2.3 KiB
JavaScript
const { basename } = require('path');
|
|
const { createReadStream, existsSync } = require('fs');
|
|
const { createInterface } = require('readline');
|
|
|
|
|
|
function printHelp() {
|
|
console.log(`Usage ${basename(process.argv[1])} LOCALE word-list.txt`);
|
|
console.log('Searches for compound words with that also exsit as separate words and removes the compound variants.');
|
|
console.log('For example, "fly-by" will be removed, if the word list contains both "fly" and "by".')
|
|
console.log('LOCALE could be any valid JS locale, for exmaple: en, en-US, etc...')
|
|
}
|
|
|
|
|
|
function validateInput() {
|
|
if (process.argv.length < 4) {
|
|
printHelp();
|
|
process.exit(1);
|
|
}
|
|
|
|
if (!existsSync(process.argv[3])) {
|
|
console.error(`Failure! Could not find word list file "${process.argv[3]}."`);
|
|
process.exit(2);
|
|
}
|
|
|
|
return {
|
|
fileName: process.argv[3],
|
|
locale: process.argv[2],
|
|
separator: '-'
|
|
};
|
|
}
|
|
|
|
|
|
function printWords(wordList) {
|
|
if (wordList instanceof Set) {
|
|
wordList.forEach(w => console.log(w));
|
|
}
|
|
}
|
|
|
|
|
|
async function readWords(fileName) {
|
|
const words = new Set();
|
|
|
|
if (!fileName) {
|
|
return words;
|
|
}
|
|
|
|
for await (const line of createInterface({ input: createReadStream(fileName) })) {
|
|
words.add(line);
|
|
}
|
|
|
|
return words;
|
|
}
|
|
|
|
|
|
function removeCompoundWords(locale, words, lowerCaseWords, separator) {
|
|
if (!(words instanceof Set)) {
|
|
return new Set();
|
|
}
|
|
|
|
const uniqueWords = new Set();
|
|
words.forEach(w => {
|
|
// simple words
|
|
if (!w.includes(separator)) {
|
|
uniqueWords.add(w);
|
|
return;
|
|
}
|
|
|
|
// compound words
|
|
let partMissing = false;
|
|
const parts = w.split(separator);
|
|
if (parts.length > 1) {
|
|
for (const splw of parts) {
|
|
if (splw.length === 0) {
|
|
continue;
|
|
}
|
|
|
|
if (!lowerCaseWords.has(splw.toLocaleLowerCase(locale))) {
|
|
partMissing = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (partMissing) {
|
|
uniqueWords.add(w);
|
|
}
|
|
});
|
|
|
|
return uniqueWords;
|
|
}
|
|
|
|
|
|
function wordsToLowerCase(locale, words) {
|
|
const lowerWords = new Set();
|
|
if (words instanceof Set) {
|
|
words.forEach(w => lowerWords.add(w.toLocaleLowerCase(locale)))
|
|
}
|
|
return lowerWords;
|
|
}
|
|
|
|
|
|
async function work({ fileName, locale, separator }) {
|
|
const words = await readWords(fileName);
|
|
return removeCompoundWords(locale, words, wordsToLowerCase(locale, words), separator);
|
|
}
|
|
|
|
|
|
|
|
/** main **/
|
|
work(validateInput())
|
|
.then(words => printWords(words))
|
|
.catch(e => console.error(e)); |