language processing scripts improvements
* remove-foreign-words script now supports blacklist and whitelist mode * fixed .domain sorting * clean'n'sort script
This commit is contained in:
parent
071a4c29c2
commit
ef2f92c0ac
4 changed files with 71 additions and 46 deletions
|
|
@ -33,7 +33,7 @@ WORK_DIR="/tmp/TT9_$(uuidgen)"
|
|||
mkdir -p $WORK_DIR && \
|
||||
sed -E 's/[\t0-9]+//g' $DICTIONARY_FILE > $WORK_DIR/_TT9_base.txt \
|
||||
&& node scripts/injest-words.js $NEW_WORDS_FILE $IGNORE_SPLIT_LIST_FILE > $WORK_DIR/_TT9_1.txt \
|
||||
&& node scripts/remove-foreign-words.js $LOCALE $WORK_DIR/_TT9_1.txt $LOCALE $WORK_DIR/_TT9_base.txt > $WORK_DIR/_TT9_2.txt \
|
||||
&& node scripts/remove-foreign-words.js --blacklist $LOCALE $WORK_DIR/_TT9_1.txt $LOCALE $WORK_DIR/_TT9_base.txt > $WORK_DIR/_TT9_2.txt \
|
||||
&& cp $WORK_DIR/_TT9_base.txt $WORK_DIR/_TT9_combined.txt \
|
||||
&& echo >> $WORK_DIR/_TT9_combined.txt \
|
||||
&& cat $WORK_DIR/_TT9_2.txt >> $WORK_DIR/_TT9_combined.txt \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue