1
0
Fork 0
tt9/scripts/normalize-frequencies.js

63 lines
1.5 KiB
JavaScript

const { basename } = require('path');
const { createReadStream, existsSync } = require('fs');
const { createInterface } = require('readline');
const { print, printError, printWordsWithFrequencies } = require('./_printers.js');
function printHelp() {
print(`Usage ${basename(process.argv[1])} WORD-LIST.txt`);
print('Normalizes dictionary frequencies up to 255.');
}
function validateInput() {
if (process.argv.length < 3) {
printHelp();
process.exit(1);
}
if (!existsSync(process.argv[2])) {
printError(`Failure! Could not find word list file "${process.argv[3]}".`);
process.exit(2);
}
return {
fileName: process.argv[2],
maxAllowedFrequency: 255
};
}
async function normalize({ fileName, maxAllowedFrequency }) {
const words = [];
if (!fileName) {
return words;
}
let maxWordFrequency = 0;
for await (const line of createInterface({ input: createReadStream(fileName) })) {
let [word, frequency] = line.split("\t");
frequency = Number.isNaN(Number.parseInt(frequency)) ? 0 : Number.parseInt(frequency)
maxWordFrequency = Math.max(maxWordFrequency, frequency);
words.push({word, frequency});
}
const normalizationRatio = maxAllowedFrequency / Math.log(maxWordFrequency);
for (word of words) {
word.frequency = word.frequency > 0 ? Math.ceil(Math.log(word.frequency) * normalizationRatio) : 0;
}
return words;
}
/** main **/
normalize(validateInput())
.then(words => printWordsWithFrequencies(words))
.catch(e => printError(e));