changed the injest-words script to break dashed words only up to the first dash
This commit is contained in:
parent
b6b8d5bed0
commit
cffa66899d
1 changed files with 21 additions and 12 deletions
|
|
@ -54,26 +54,35 @@ function splitDashedWords(inputWords) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const wordsSet = new Set();
|
const dashedRoots = new Set();
|
||||||
|
const repeatingDashedRoots = new Set();
|
||||||
|
|
||||||
for (const word of inputWords) {
|
for (const word of inputWords) {
|
||||||
if (!word.includes('-')) {
|
const [root, ...others] = word.split('-');
|
||||||
wordsSet.add(word);
|
if (root === undefined || others.length != 1) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const parts = word.split('-');
|
if (dashedRoots.has(root)) {
|
||||||
let root = '';
|
repeatingDashedRoots.add(root);
|
||||||
|
} else {
|
||||||
for (let i = 0; i < parts.length - 1; i++) {
|
dashedRoots.add(root);
|
||||||
root += `${parts[i]}-`;
|
|
||||||
wordsSet.add(root);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wordsSet.add(parts[parts.length - 1]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Array.from(wordsSet);
|
const outputWords = new Set();
|
||||||
|
|
||||||
|
for (const word of inputWords) {
|
||||||
|
const [root, ...others] = word.split('-');
|
||||||
|
if (root && others.length === 1 && repeatingDashedRoots.has(root)) {
|
||||||
|
outputWords.add(`${root}-`);
|
||||||
|
outputWords.add(others.join('-'));
|
||||||
|
} else {
|
||||||
|
outputWords.add(word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(outputWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue