diff --git a/scripts/_printers.js b/scripts/_printers.js index d8ca1471..ecf1fef6 100644 --- a/scripts/_printers.js +++ b/scripts/_printers.js @@ -3,5 +3,11 @@ exports.print = function(str) { }; exports.printError = function(str) { - process.stderr.write(`${str}\n`); -}; \ No newline at end of file + process.stderr.write(`${str instanceof Error ? str.stack : str}\n`); +}; + +exports.printWordsWithFrequencies = function(words) { + if (Array.isArray(words)) { + words.forEach(w => exports.print(`${w.word}${w.frequency ? '\t' + w.frequency : ''}`)); + } +} diff --git a/scripts/normalize-frequencies.js b/scripts/normalize-frequencies.js new file mode 100644 index 00000000..e3b47d7d --- /dev/null +++ b/scripts/normalize-frequencies.js @@ -0,0 +1,63 @@ +const { basename } = require('path'); +const { createReadStream, existsSync } = require('fs'); +const { createInterface } = require('readline'); +const { print, printError, printWordsWithFrequencies } = require('./_printers.js'); + + +function printHelp() { + print(`Usage ${basename(process.argv[1])} WORD-LIST.txt`); + print('Normalizes dictionary frequencies up to 255.'); +} + + + +function validateInput() { + if (process.argv.length < 3) { + printHelp(); + process.exit(1); + } + + if (!existsSync(process.argv[2])) { + printError(`Failure! Could not find word list file "${process.argv[3]}".`); + process.exit(2); + } + + return { + fileName: process.argv[2], + maxAllowedFrequency: 255 + }; +} + + +async function normalize({ fileName, maxAllowedFrequency }) { + const words = []; + + if (!fileName) { + return words; + } + + let maxWordFrequency = 0; + + for await (const line of createInterface({ input: createReadStream(fileName) })) { + let [word, frequency] = line.split("\t"); + + frequency = Number.isNaN(Number.parseInt(frequency)) ? 0 : Number.parseInt(frequency) + maxWordFrequency = Math.max(maxWordFrequency, frequency); + + words.push({word, frequency}); + } + + const normalizationRatio = maxAllowedFrequency / maxWordFrequency; + + for (word of words) { + word.frequency = Math.ceil(word.frequency * normalizationRatio); + } + + return words; +} + + +/** main **/ +normalize(validateInput()) + .then(words => printWordsWithFrequencies(words)) + .catch(e => printError(e)); diff --git a/scripts/sort-dictionary.js b/scripts/sort-dictionary.js index 158f9b17..71f25c04 100644 --- a/scripts/sort-dictionary.js +++ b/scripts/sort-dictionary.js @@ -1,7 +1,7 @@ const { basename } = require('path'); const { createReadStream, existsSync } = require('fs'); const { createInterface } = require('readline'); -const { print, printError } = require('./_printers.js'); +const { print, printError, printWordsWithFrequencies } = require('./_printers.js'); function printHelp() { @@ -35,13 +35,6 @@ function validateInput() { } -function printWords(wordList) { - if (Array.isArray(wordList)) { - wordList.forEach(w => print(`${w.word}${w.frequency ? '\t' + w.frequency : ''}`)); - } -} - - async function readWords(fileName) { const words = []; @@ -51,9 +44,9 @@ async function readWords(fileName) { for await (const line of createInterface({ input: createReadStream(fileName) })) { const [word, frequency] = line.split("\t"); - words.push({ - word, - frequency: Number.isNaN(Number.parseInt(frequency)) ? 0 : Number.parseInt(frequency) + words.push({ + word, + frequency: Number.isNaN(Number.parseInt(frequency)) ? 0 : Number.parseInt(frequency) }); } @@ -122,5 +115,5 @@ async function work({ definitionFile, wordsFile, locale }) { /** main **/ work(validateInput()) - .then(words => printWords(words)) - .catch(e => printError(e)); \ No newline at end of file + .then(words => printWordsWithFrequencies(words)) + .catch(e => printError(e));