optimized the word injestion scripts for speed and allowed running multiple instances
This commit is contained in:
parent
93e3e0aec4
commit
b6b8d5bed0
2 changed files with 35 additions and 33 deletions
|
|
@ -26,14 +26,17 @@ LOCALE=$1
|
|||
DICTIONARY_FILE=$2
|
||||
NEW_WORDS_FILE=$3
|
||||
FREQUENCY_FILE=$4
|
||||
WORK_DIR="/tmp/TT9_$(uuidgen)"
|
||||
|
||||
sed -E 's/[\t0-9]+//g' $DICTIONARY_FILE > /tmp/_TT9_base.txt \
|
||||
&& node scripts/injest-words.js $NEW_WORDS_FILE > /tmp/_TT9_1.txt \
|
||||
&& node scripts/remove-foreign-words.js $LOCALE /tmp/_TT9_1.txt $LOCALE /tmp/_TT9_base.txt > /tmp/_TT9_2.txt \
|
||||
&& cp /tmp/_TT9_base.txt /tmp/_TT9_combined.txt \
|
||||
&& cat /tmp/_TT9_2.txt >> /tmp/_TT9_combined.txt \
|
||||
&& node scripts/remove-dictionary-repeating-words.js $LOCALE /tmp/_TT9_combined.txt > /tmp/_TT9_clean.txt \
|
||||
&& node scripts/inject-dictionary-frequencies.js /tmp/_TT9_clean.txt $FREQUENCY_FILE $LOCALE > /tmp/_TT9_output.txt \
|
||||
&& cat /tmp/_TT9_output.txt
|
||||
mkdir -p $WORK_DIR && \
|
||||
sed -E 's/[\t0-9]+//g' $DICTIONARY_FILE > $WORK_DIR/_TT9_base.txt \
|
||||
&& node scripts/injest-words.js $NEW_WORDS_FILE > $WORK_DIR/_TT9_1.txt \
|
||||
&& node scripts/remove-foreign-words.js $LOCALE $WORK_DIR/_TT9_1.txt $LOCALE $WORK_DIR/_TT9_base.txt > $WORK_DIR/_TT9_2.txt \
|
||||
&& cp $WORK_DIR/_TT9_base.txt $WORK_DIR/_TT9_combined.txt \
|
||||
&& echo >> $WORK_DIR/_TT9_combined.txt \
|
||||
&& cat $WORK_DIR/_TT9_2.txt >> $WORK_DIR/_TT9_combined.txt \
|
||||
&& node scripts/remove-dictionary-repeating-words.js $LOCALE $WORK_DIR/_TT9_combined.txt > $WORK_DIR/_TT9_clean.txt \
|
||||
&& node scripts/inject-dictionary-frequencies.js $WORK_DIR/_TT9_clean.txt $FREQUENCY_FILE $LOCALE > $WORK_DIR/_TT9_output.txt \
|
||||
&& cat $WORK_DIR/_TT9_output.txt
|
||||
|
||||
rm -f /tmp/_TT9*
|
||||
rm -rf $WORK_DIR
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue