1
0
Fork 0

changed the injest-words script to break dashed words only up to the first dash

This commit is contained in:
sspanak 2024-01-07 18:00:20 +02:00 committed by Dimo Karaivanov
parent b6b8d5bed0
commit cffa66899d

View file

@ -54,26 +54,35 @@ function splitDashedWords(inputWords) {
return []; return [];
} }
const wordsSet = new Set(); const dashedRoots = new Set();
const repeatingDashedRoots = new Set();
for (const word of inputWords) { for (const word of inputWords) {
if (!word.includes('-')) { const [root, ...others] = word.split('-');
wordsSet.add(word); if (root === undefined || others.length != 1) {
continue; continue;
} }
const parts = word.split('-'); if (dashedRoots.has(root)) {
let root = ''; repeatingDashedRoots.add(root);
} else {
for (let i = 0; i < parts.length - 1; i++) { dashedRoots.add(root);
root += `${parts[i]}-`; }
wordsSet.add(root);
} }
wordsSet.add(parts[parts.length - 1]); const outputWords = new Set();
for (const word of inputWords) {
const [root, ...others] = word.split('-');
if (root && others.length === 1 && repeatingDashedRoots.has(root)) {
outputWords.add(`${root}-`);
outputWords.add(others.join('-'));
} else {
outputWords.add(word);
}
} }
return Array.from(wordsSet); return Array.from(outputWords);
} }