1
0
Fork 0
tt9/scripts/aosp2tt9.js
sspanak 8a971ea15e JavaScript code cleanup and optimizations
* removed all console statements

  * sorting script: no longer creating functions within the loop

  * aosp2tt9: fixed incorrect usage of a global variable

  * injest-words: fixed a non-strict comparison

  * remove-repeating-words: commented out the case-sensitive search function, as it may be going out of use
2024-03-28 09:59:27 +02:00

78 lines
No EOL
1.9 KiB
JavaScript

const { basename } = require('path');
const { createReadStream, existsSync } = require('fs');
const { createInterface } = require('readline');
const { print, printError } = require('./_printers.js');
function printHelp() {
print(`Usage ${basename(process.argv[1])} aosp-dictionary-file.txt [minimum-frequency] [--no-freq]`);
print('Converts an AOSP dictionary to TT9 compatible format. The second parameter must be an integer and allows for filtering words with frequency less than the given number. If "--no-freq" is set, only words without frequencies will be listed.');
}
function validateInput() {
if (process.argv.length < 3) {
printHelp();
process.exit(1);
}
if (!existsSync(process.argv[2])) {
printError(`Failure! Could not find dictionary file "${process.argv[2]}".`);
process.exit(2);
}
return {
fileName: process.argv[2],
minFrequency: Number.isNaN(Number.parseInt(process.argv[3])) ? 0 : Number.parseInt(process.argv[3]),
noFrequencies: process.argv[4] === '--no-freq'
};
}
function printWords(wordList) {
if (Array.isArray(wordList)) {
wordList.forEach(w => print(w));
}
}
async function convert({ fileName, minFrequency, noFrequencies }) {
const words = [];
let lineReader = createInterface({ input: createReadStream(fileName) });
for await (const line of lineReader) {
let word = line
.replace(/^dictionary=main.+$/, '')
.replace(/^\s+/, '')
.replace(/^shortcut=.+/, '')
.replace(/^word=([^,]+),f=(\d+)($|,.+$)/, '$1\t$2');
if (minFrequency && word !== '') {
const parts = word.split('\t');
if (!(parts.length > 1 && Number.parseInt(parts[1]) >= minFrequency)) {
word = '';
}
}
if (noFrequencies) {
const parts = word.split('\t');
if (parts.length > 0) {
word = parts[0];
}
}
if (word !== '') {
words.push(word);
}
}
return words;
}
/** main **/
convert(validateInput())
.then(words => printWords(words))
.catch(e => printError(e));