small improvements in the word processing scripts
This commit is contained in:
parent
acfc54ae3f
commit
ae85de128e
3 changed files with 22 additions and 30 deletions
|
|
@ -38,7 +38,7 @@ sed -E 's/[\t0-9]+//g' $DICTIONARY_FILE > $WORK_DIR/_TT9_base.txt \
|
||||||
&& echo >> $WORK_DIR/_TT9_combined.txt \
|
&& echo >> $WORK_DIR/_TT9_combined.txt \
|
||||||
&& cat $WORK_DIR/_TT9_2.txt >> $WORK_DIR/_TT9_combined.txt \
|
&& cat $WORK_DIR/_TT9_2.txt >> $WORK_DIR/_TT9_combined.txt \
|
||||||
&& node scripts/remove-dictionary-repeating-words.js $LOCALE $WORK_DIR/_TT9_combined.txt > $WORK_DIR/_TT9_clean.txt \
|
&& node scripts/remove-dictionary-repeating-words.js $LOCALE $WORK_DIR/_TT9_combined.txt > $WORK_DIR/_TT9_clean.txt \
|
||||||
&& node scripts/inject-dictionary-frequencies.js $WORK_DIR/_TT9_clean.txt $FREQUENCY_FILE $LOCALE > $WORK_DIR/_TT9_output.txt \
|
&& node scripts/inject-dictionary-frequencies.js $LOCALE $WORK_DIR/_TT9_clean.txt $FREQUENCY_FILE > $WORK_DIR/_TT9_output.txt \
|
||||||
&& cat $WORK_DIR/_TT9_output.txt
|
&& cat $WORK_DIR/_TT9_output.txt
|
||||||
|
|
||||||
rm -rf $WORK_DIR
|
rm -rf $WORK_DIR
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,9 @@ FREQUENCY_FILE=$4
|
||||||
WORK_DIR="/tmp/TT9_$(uuidgen)"
|
WORK_DIR="/tmp/TT9_$(uuidgen)"
|
||||||
|
|
||||||
mkdir -p $WORK_DIR \
|
mkdir -p $WORK_DIR \
|
||||||
&& node scripts/remove-dictionary-repeating-words.js $LOCALE $DICTIONARY_FILE > $WORK_DIR/clean.txt \
|
&& sed -E 's/[\t0-9]+//g' $DICTIONARY_FILE > $WORK_DIR/nofreq.txt \
|
||||||
&& node scripts/inject-dictionary-frequencies.js $WORK_DIR/clean.txt $FREQUENCY_FILE $LOCALE > $WORK_DIR/freqz.txt \
|
&& node scripts/remove-dictionary-repeating-words.js $LOCALE $WORK_DIR/nofreq.txt > $WORK_DIR/clean.txt \
|
||||||
|
&& node scripts/inject-dictionary-frequencies.js $LOCALE $WORK_DIR/clean.txt $FREQUENCY_FILE > $WORK_DIR/freqz.txt \
|
||||||
&& node scripts/sort-dictionary.js $LOCALE $WORK_DIR/freqz.txt $DEFINITION_FILE
|
&& node scripts/sort-dictionary.js $LOCALE $WORK_DIR/freqz.txt $DEFINITION_FILE
|
||||||
|
|
||||||
rm -rf $WORK_DIR
|
rm -rf $WORK_DIR
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ const DELIMITER = ' ';
|
||||||
|
|
||||||
|
|
||||||
function printHelp() {
|
function printHelp() {
|
||||||
console.log(`Usage ${basename(process.argv[1])} DICTIONARY-FILE-NAME.txt WORDS-WITH-FREQUENCIES.txt LOCALE`);
|
console.log(`Usage ${basename(process.argv[1])} LOCALE DICTIONARY-FILE-NAME.txt WORDS-WITH-FREQUENCIES.txt`);
|
||||||
console.log('Matches up the words from DICTIONARY-FILE-NAME with the frequencies in WORDS-WITH-FREQUENCIES file.');
|
console.log('Matches up the words from DICTIONARY-FILE-NAME with the frequencies in WORDS-WITH-FREQUENCIES file.');
|
||||||
console.log('LOCALE could be any valid JS locale, for exmaple: en, en-US, etc...');
|
console.log('LOCALE could be any valid JS locale, for exmaple: en, en-US, etc...');
|
||||||
}
|
}
|
||||||
|
|
@ -19,18 +19,22 @@ function validateInput() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (!existsSync(process.argv[4])) {
|
||||||
|
console.error(`Failure! Could not find the WORDS-WITH-FREQUENCIES file "${process.argv[4]}."`);
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!existsSync(process.argv[3])) {
|
if (!existsSync(process.argv[3])) {
|
||||||
console.error(`Failure! Could not find the WORDS-WITH-FREQUENCIES file "${process.argv[3]}."`);
|
console.error(`Failure! Could not find dictionary file "${process.argv[3]}."`);
|
||||||
process.exit(2);
|
process.exit(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
if (!existsSync(process.argv[2])) {
|
locale: process.argv[2],
|
||||||
console.error(`Failure! Could not find dictionary file "${process.argv[2]}."`);
|
dictionaryFileName: process.argv[3],
|
||||||
process.exit(2);
|
wordsWithFrequenciesFileName: process.argv[4]
|
||||||
}
|
};
|
||||||
|
|
||||||
return { wordsWithFrequenciesFileName: process.argv[3], dictionaryFileName: process.argv[2], locale: process.argv[4] };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -41,7 +45,7 @@ async function inject({ wordsWithFrequenciesFileName, dictionaryFileName, locale
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
const frequencies = {};
|
const frequencies = new Map();
|
||||||
for await (const line of lineReader) {
|
for await (const line of lineReader) {
|
||||||
if (!line.includes(DELIMITER)) {
|
if (!line.includes(DELIMITER)) {
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -54,7 +58,7 @@ async function inject({ wordsWithFrequenciesFileName, dictionaryFileName, locale
|
||||||
frequency = 0;
|
frequency = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
frequencies[word] = frequency;
|
frequencies.set(word, frequency)
|
||||||
}
|
}
|
||||||
|
|
||||||
// read the dictionary words
|
// read the dictionary words
|
||||||
|
|
@ -66,11 +70,7 @@ async function inject({ wordsWithFrequenciesFileName, dictionaryFileName, locale
|
||||||
const outputWords = [];
|
const outputWords = [];
|
||||||
for await (const word of lineReader) {
|
for await (const word of lineReader) {
|
||||||
const lowercaseWord = word.toLocaleLowerCase(locale);
|
const lowercaseWord = word.toLocaleLowerCase(locale);
|
||||||
|
outputWords.push(`${word}${ (frequencies.get(lowercaseWord) || 0) > 0 ? DELIMITER + frequencies.get(lowercaseWord) : '' }`);
|
||||||
outputWords.push({
|
|
||||||
w: `${word}`,
|
|
||||||
f: frequencies[lowercaseWord] || 0
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return outputWords;
|
return outputWords;
|
||||||
|
|
@ -78,18 +78,9 @@ async function inject({ wordsWithFrequenciesFileName, dictionaryFileName, locale
|
||||||
|
|
||||||
|
|
||||||
function printWords(wordList) {
|
function printWords(wordList) {
|
||||||
if (!Array.isArray(wordList)) {
|
if (Array.isArray(wordList)) {
|
||||||
return;
|
wordList.forEach(w => console.log(w));
|
||||||
}
|
}
|
||||||
|
|
||||||
wordList.forEach(w => {
|
|
||||||
let out = w.w;
|
|
||||||
if (w.f) {
|
|
||||||
out += `${DELIMITER}${w.f}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(out);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue