Gujarati
This commit is contained in:
parent
240e5c444a
commit
e3d0bac90f
13 changed files with 1380245 additions and 18 deletions
|
|
@ -30,7 +30,7 @@ _If your phone does not have Google Play and it does not allow installing custom
|
||||||
## ⚙️ System Requirements
|
## ⚙️ System Requirements
|
||||||
- Android 4.4 or higher.
|
- Android 4.4 or higher.
|
||||||
- A hardware keypad or a keyboard. For touchscreen-only devices, an on-screen keypad can be activated from the Settings.
|
- A hardware keypad or a keyboard. For touchscreen-only devices, an on-screen keypad can be activated from the Settings.
|
||||||
- Storage space depends on the Android version. The lite version requires at least 4-7 Mb and the full version requires at least 120 Mb.
|
- Storage space depends on the Android version. The lite version requires at least 4-7 Mb and the full version requires at least 130 Mb.
|
||||||
- Extra storage space is necessary for language dictionaries in Predictive Mode.
|
- Extra storage space is necessary for language dictionaries in Predictive Mode.
|
||||||
- Languages with character predictions only (Korean): ~1 Mb per language
|
- Languages with character predictions only (Korean): ~1 Mb per language
|
||||||
- Very small languages (< 100k words; Kiswahili, Indonesian, Vietnamese): 5-6 Mb per language.
|
- Very small languages (< 100k words; Kiswahili, Indonesian, Vietnamese): 5-6 Mb per language.
|
||||||
|
|
|
||||||
15
app/languages/definitions/Gujarati.yml
Normal file
15
app/languages/definitions/Gujarati.yml
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
locale: gu-IN
|
||||||
|
dictionaryFile: gu-utf8.csv
|
||||||
|
abcString: કખગ
|
||||||
|
hasUpperCase: no
|
||||||
|
layout:
|
||||||
|
- [SPECIAL] # 0
|
||||||
|
- [્, ઼, ઃ, ઽ, ૐ, PUNCTUATION_IN] # 1 ==> [halant, nuqta, avagraha, visarga, om]
|
||||||
|
- [અ, આ, બ, ભ, ચ, છ, ા] # 2 ==> [a, aa, b, bh, c, ch, aa (m)]
|
||||||
|
- [દ, ધ, ડ, ઢ, એ, ઐ, ઍ, ફ, ે, ૈ, ૅ] # 3 ==> [d, dh, ḍ (dd), ḍh (ddh), e, ai, ae, ph, e (m), ai (m), ae (m)]
|
||||||
|
- [ગ, ઘ, હ, ઇ, ઈ, િ, ી] # 4 ==> [g, gh, h, i, ii, i (m), ii (m) ]
|
||||||
|
- [જ, ઝ, ક, ખ, લ, ળ, ઌ, ૢ] # 5 ==> [j, jh, k, kh, l, ḷ (ll), l̥, l̥ (m)]
|
||||||
|
- [મ, ઙ, ઞ, ણ, ન, ઓ, ઑ, ઔ, ો, ૉ, ૌ, ં, ઁ] # 6 ==> [m, ṅ (ng), ñ, ṇ (nn), n, o, ŏ, au, o (m), ŏ (m), au (m), anusvara, m̐ (m-combining with vowel, e.g. "am", "em")]
|
||||||
|
- [પ, ર, ઋ, સ, શ, ષ, ૃ, ૄ] # 7 ==> [p, r, r̥, s, sh, ss, r̥ (m), rr (m)]
|
||||||
|
- [ત, થ, ટ, ઠ, ઉ, ઊ, વ, ુ, ૂ] # 8 ==> [t, th, ṭ (tt), ṭh, u, uu, v, u (m), uu (m)]
|
||||||
|
- [ય] # 9 ==> [y]
|
||||||
1380080
app/languages/dictionaries/gu-utf8.csv
Normal file
1380080
app/languages/dictionaries/gu-utf8.csv
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -17,11 +17,7 @@ class LocaleWordsSorter {
|
||||||
|
|
||||||
|
|
||||||
LocaleWordsSorter(@Nullable Language language) {
|
LocaleWordsSorter(@Nullable Language language) {
|
||||||
if (LanguageKind.isHindi(language)) {
|
sortingPattern = LanguageKind.isIndic(language) ? Pattern.compile("\\p{L}\\p{M}+") : null;
|
||||||
sortingPattern = Pattern.compile("[\\u0904-\\u0939\\u0958-\\u0961][\\u0900-\\u0904\\u093A-\\u094F\\u0962\\u0963]+");
|
|
||||||
} else {
|
|
||||||
sortingPattern = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,17 +5,19 @@ import java.util.Locale;
|
||||||
public class LanguageKind {
|
public class LanguageKind {
|
||||||
public static final int KOREAN = 601579;
|
public static final int KOREAN = 601579;
|
||||||
|
|
||||||
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
|
|
||||||
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
|
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
|
||||||
|
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }
|
||||||
|
public static boolean isRTL(Language language) { return isArabic(language) || isHebrew(language); }
|
||||||
|
|
||||||
|
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
|
||||||
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
|
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
|
||||||
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
|
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
|
||||||
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
|
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
|
||||||
|
public static boolean isGujarati(Language language) { return language != null && language.getId() == 468647; }
|
||||||
public static boolean isHebrew(Language language) { return language != null && (language.getId() == 305450 || language.getId() == 403177); }
|
public static boolean isHebrew(Language language) { return language != null && (language.getId() == 305450 || language.getId() == 403177); }
|
||||||
public static boolean isHindi(Language language) { return language != null && language.getId() == 468264; }
|
public static boolean isHindi(Language language) { return language != null && language.getId() == 468264; }
|
||||||
public static boolean isIndic(Language language) { return isHindi(language); }
|
|
||||||
public static boolean isHinglish(Language language) { return language != null && language.getId() == 468421; }
|
public static boolean isHinglish(Language language) { return language != null && language.getId() == 468421; }
|
||||||
|
public static boolean isIndic(Language language) { return isGujarati(language) || isHindi(language); }
|
||||||
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
|
public static boolean isKorean(Language language) { return language != null && language.getId() == KOREAN; }
|
||||||
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }
|
|
||||||
public static boolean isRTL(Language language) { return isArabic(language) || isHebrew(language); }
|
|
||||||
public static boolean isUkrainian(Language language) { return language != null && language.getId() == 54645; }
|
public static boolean isUkrainian(Language language) { return language != null && language.getId() == 54645; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -211,9 +211,12 @@ public class SoftKeyNumber extends SoftKey {
|
||||||
// Greek diacritics and ending sigma
|
// Greek diacritics and ending sigma
|
||||||
|| currentLetter == 'ς'
|
|| currentLetter == 'ς'
|
||||||
|| (isGreek && (currentLetter < 'α' || currentLetter > 'ω'))
|
|| (isGreek && (currentLetter < 'α' || currentLetter > 'ω'))
|
||||||
// Hindi matras
|
// Hindi combining
|
||||||
|| (currentLetter >= 0x0900 && currentLetter <= 0x0903) || (currentLetter >= 0x093A && currentLetter <= 0x094F)
|
|| (currentLetter >= 0x0900 && currentLetter <= 0x0903) || (currentLetter >= 0x093A && currentLetter <= 0x094F)
|
||||||
|| (currentLetter >= 0x0951 && currentLetter <= 0x0957) || currentLetter == 0x0962 || currentLetter == 0x0963
|
|| (currentLetter >= 0x0951 && currentLetter <= 0x0957) || currentLetter == 0x0962 || currentLetter == 0x0963
|
||||||
|
// Gujarati combining
|
||||||
|
|| (currentLetter >= 0x0A81 && currentLetter <= 0x0A83) || (currentLetter >= 0xABC && currentLetter <= 0x0ACD)
|
||||||
|
|| currentLetter == 0x0AE2 || currentLetter == 0x0AE3
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,13 @@ class Punctuation {
|
||||||
',', '-', '\'', ':', ';', '!', '?', '.'
|
',', '-', '\'', ':', ';', '!', '?', '.'
|
||||||
));
|
));
|
||||||
|
|
||||||
final private static ArrayList<Character> CombiningPunctuationIndic = new ArrayList<>(Arrays.asList(
|
final private static ArrayList<Character> CombiningPunctuationGujarati = new ArrayList<>(Arrays.asList(
|
||||||
'्', '़', 'ऽ', 'ः', '।', '॰', '॥'
|
'્', '઼', 'ઽ', 'ઃ', '।', '॰', '॥' // Indic combining chars look the same, but have different Unicode values
|
||||||
));
|
));
|
||||||
|
|
||||||
|
final private static ArrayList<Character> CombiningPunctuationHindi = new ArrayList<>(Arrays.asList(
|
||||||
|
'्', '़', 'ऽ', 'ः', '।', '॰', '॥' // Indic combining chars look the same, but have different Unicode values
|
||||||
|
));
|
||||||
|
|
||||||
final private static ArrayList<Character> CombiningPunctuationHebrew = new ArrayList<>(Arrays.asList(
|
final private static ArrayList<Character> CombiningPunctuationHebrew = new ArrayList<>(Arrays.asList(
|
||||||
',' , '-', '\'', ':', ';', '!', '?', '.', '"'
|
',' , '-', '\'', ':', ';', '!', '?', '.', '"'
|
||||||
|
|
@ -65,11 +68,16 @@ class Punctuation {
|
||||||
public static boolean isCombiningPunctuation(Language language, char ch) {
|
public static boolean isCombiningPunctuation(Language language, char ch) {
|
||||||
return
|
return
|
||||||
CombiningPunctuation.contains(ch)
|
CombiningPunctuation.contains(ch)
|
||||||
|| (LanguageKind.isIndic(language) && CombiningPunctuationIndic.contains(ch))
|
|| (LanguageKind.isGujarati(language) && CombiningPunctuationGujarati.contains(ch))
|
||||||
|
|| (LanguageKind.isHindi(language) && CombiningPunctuationHindi.contains(ch))
|
||||||
|| (LanguageKind.isHebrew(language) && CombiningPunctuationHebrew.contains(ch));
|
|| (LanguageKind.isHebrew(language) && CombiningPunctuationHebrew.contains(ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isCombiningPunctuation(char ch) {
|
public static boolean isCombiningPunctuation(char ch) {
|
||||||
return CombiningPunctuation.contains(ch) || CombiningPunctuationIndic.contains(ch) || CombiningPunctuationHebrew.contains(ch);
|
return
|
||||||
|
CombiningPunctuation.contains(ch)
|
||||||
|
|| CombiningPunctuationGujarati.contains(ch)
|
||||||
|
|| CombiningPunctuationHindi.contains(ch)
|
||||||
|
|| CombiningPunctuationHebrew.contains(ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
16
docs/dictionaries/guWordlistReadme.txt
Normal file
16
docs/dictionaries/guWordlistReadme.txt
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
Gujarati word list 1 from Stardict, adapted by Docbroke
|
||||||
|
Source: https://github.com/sspanak/tt9/issues/577#issuecomment-2515314462
|
||||||
|
License: Public Domain; permission to use in the link
|
||||||
|
|
||||||
|
Conjunct consonants list obtained from Wikipedia
|
||||||
|
Version: 2024-12-30
|
||||||
|
Sources: https://en.wikipedia.org/wiki/Gujarati_script
|
||||||
|
License: Creative Commons Attribution-ShareAlike 4.0 License
|
||||||
|
|
||||||
|
Gujarati word list and frequencies by: CC-100
|
||||||
|
Version: 2020
|
||||||
|
Source: https://data.statmt.org/cc-100/
|
||||||
|
References (PDF links are available in the source URL):
|
||||||
|
- Unsupervised Cross-lingual Representation Learning at Scale, Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer, Veselin Stoyanov, Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (ACL), p. 8440-8451, July 2020.
|
||||||
|
- CCNet: Extracting High Quality Monolingual Datasets from Web Crawl Data, Guillaume Wenzek, Marie-Anne Lachaux, Alexis Conneau, Vishrav Chaudhary, Francisco Guzmán, Armand Joulin, Edouard Grave, Proceedings of the 12th Language Resources and Evaluation Conference (LREC), p. 4003-4012, May 2020.
|
||||||
|
Remark: Used all words that appear at least twice, and the words that appear once and are shorter than 10 characters.
|
||||||
BIN
downloads/gu-utf8.zip
Normal file
BIN
downloads/gu-utf8.zip
Normal file
Binary file not shown.
|
|
@ -1,6 +1,6 @@
|
||||||
TT9 е 12-клавишна Т9 клавиатура за устройства с физически бутони. Поддържа подскаващ текст на повече от 30 езика и бързи клавиши, а виртуалната клавиатура може да превърне всеки смартфон в Нокия от 2000 година. И най-хубавото — не Ви шпионира!
|
TT9 е 12-клавишна Т9 клавиатура за устройства с физически бутони. Поддържа подскаващ текст на повече от 30 езика и бързи клавиши, а виртуалната клавиатура може да превърне всеки смартфон в Нокия от 2000 година. И най-хубавото — не Ви шпионира!
|
||||||
|
|
||||||
Поддържани езици: английски, арабски, български, виетнамски, гръцки, датски, идиш, иврит, индонезийски, испански, италиански, каталонски, кисуахили, корейски, немски, норвежки, полски, португалски (европейски и бразилски), румънски, руски, тайски, марокански тамазихт (латиница и тифинаг), унгарски, украински, финландски, френски, хинглиш, хинди (фонетичен), холандски, хърватски, чешки, шведски, турски.
|
Поддържани езици: английски, арабски, български, виетнамски, гръцки, гуджарати (фонетичен), датски, идиш, иврит, индонезийски, испански, италиански, каталонски, кисуахили, корейски, немски, норвежки, полски, португалски (европейски и бразилски), румънски, руски, тайски, марокански тамазихт (латиница и тифинаг), унгарски, украински, финландски, френски, хинглиш, хинди (фонетичен), холандски, хърватски, чешки, шведски, турски.
|
||||||
|
|
||||||
Философия и защита на личните данни:
|
Философия и защита на личните данни:
|
||||||
- Без реклами, специални или платени функции. Всичко е напълно безплатно.
|
- Без реклами, специални или платени функции. Всичко е напълно безплатно.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
TT9 is a 12-key T9 keyboard for devices with a hardware numpad. It supports predictive text typing in 30+ languages, configurable hotkeys, and an on-screen keypad that can turn your smartphone into a Nokia from the 2000s. And, best of all, it doesn't spy on you!
|
TT9 is a 12-key T9 keyboard for devices with a hardware numpad. It supports predictive text typing in 30+ languages, configurable hotkeys, and an on-screen keypad that can turn your smartphone into a Nokia from the 2000s. And, best of all, it doesn't spy on you!
|
||||||
|
|
||||||
Supported languages: Arabic, Bulgarian, Catalan, Croatian, Czech, Danish, Dutch, English, Finnish, French, German, Greek, Hebrew, Hindi (phonetic), Hinglish, Hungarian, Indonesian, Italian, Kiswahili, Korean, Norwegian, Polish, Portuguese (European and Brazilian), Romanian, Russian, Spanish, Swedish, Moroccan Tamazight (Latin and Tifinagh), Thai, Turkish, Ukrainian, Vietnamese, Yiddish.
|
Supported languages: Arabic, Bulgarian, Catalan, Croatian, Czech, Danish, Dutch, English, Finnish, French, German, Greek, Gujarati (phonetic), Hebrew, Hindi (phonetic), Hinglish, Hungarian, Indonesian, Italian, Kiswahili, Korean, Norwegian, Polish, Portuguese (European and Brazilian), Romanian, Russian, Spanish, Swedish, Moroccan Tamazight (Latin and Tifinagh), Thai, Turkish, Ukrainian, Vietnamese, Yiddish.
|
||||||
|
|
||||||
Philosophy and Privacy Policy:
|
Philosophy and Privacy Policy:
|
||||||
- No ads, no premium or paid features. It's all free.
|
- No ads, no premium or paid features. It's all free.
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
const { basename } = require('path');
|
const { basename } = require('path');
|
||||||
const { existsSync, readFileSync } = require('fs');
|
const { existsSync, readFileSync } = require('fs');
|
||||||
const { print, printError, printWordsWithFrequencies } = require('./_printers.js');
|
const { print, printError, printWordsWithFrequencies } = require('../_printers.js');
|
||||||
|
|
||||||
|
|
||||||
function printHelp() {
|
function printHelp() {
|
||||||
107
scripts/indic/validate-gujarati.js
Normal file
107
scripts/indic/validate-gujarati.js
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
const { basename } = require('path');
|
||||||
|
const { existsSync, readFileSync } = require('fs');
|
||||||
|
const { print, printError, printWordsWithFrequencies } = require('../_printers.js');
|
||||||
|
|
||||||
|
|
||||||
|
function printHelp() {
|
||||||
|
print(`Usage: node ${basename(process.argv[1])} <file>`);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function validateInput() {
|
||||||
|
if (process.argv.length < 3) {
|
||||||
|
printHelp();
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!existsSync(process.argv[2])) {
|
||||||
|
printError(`Failure! Could not find the input file "${process.argv[2]}".`);
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { file: process.argv[2] };
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function getWordsFromFile(filename) {
|
||||||
|
const content = readFileSync(filename, 'utf8');
|
||||||
|
return new Set(content.split('\n').map(word => word.trim()).filter(word => word.length > 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
const FOREIGN_CHARS = '[^\\u{A80}-\\u{AFF}]';
|
||||||
|
const UNWANTED_CHARS = 'ૠ\\u{AC4}\\u{AE3}-\\u{AFF}\\u{AD1}-\\u{ADF}\\u{A80}\\u{A84}\\u{A8E}\\u{A92}\\u{AA9}\\u{AB1}\\u{AB4}\\u{ABA}\\u{ABB}\\u{AC6}\\u{ACA}\\u{ACE}\\u{ACF}';
|
||||||
|
const NUMBERS = '\\u{0AE6}-\\u{0AEF}';
|
||||||
|
const VOWELS = '\\u{0A85}-\\u{0A94}\\u{0AE0}\\u{0AE1}ૐ';
|
||||||
|
const CONSONANTS = '\\u{0A95}-\\u{0AB9}';
|
||||||
|
const VOWEL_MATRAS = '\\u{0ABE}-\\u{0AC5}\\u{0AC7}-\\u{0AC9}\\u{0ACB}-\\u{0ACC}\\u{0AE2}\\u{0AE3}';
|
||||||
|
const NASALIZATIONS = '\\u{0A81}-\\u{0A83}';
|
||||||
|
const HALANT = '\\u{0ACD}';
|
||||||
|
const NUQTA = '\\u{0ABC}';
|
||||||
|
const AVAGRAHA = '\\u{0ABD}';
|
||||||
|
const ZWJ = '\\u{200D}';
|
||||||
|
|
||||||
|
|
||||||
|
const INVALIDATORS = [
|
||||||
|
(word) => new RegExp(`(\\p{L}\\p{M}?)(?!${AVAGRAHA})\\1{2,}`, 'u').test(word), // too many repeated letters
|
||||||
|
(word) => new RegExp(`^[${VOWEL_MATRAS}${NASALIZATIONS}${HALANT}${NUQTA}${AVAGRAHA}]`, 'u').test(word), // starts with a combining character
|
||||||
|
(word) => new RegExp(`[${VOWELS}][${VOWEL_MATRAS}${NUQTA}${HALANT}]`, 'u').test(word),
|
||||||
|
(word) => new RegExp(`[${CONSONANTS}]${HALANT}[${VOWEL_MATRAS}]`, 'u').test(word),
|
||||||
|
(word) => new RegExp(`[${NASALIZATIONS}${VOWELS}${VOWEL_MATRAS}${AVAGRAHA}]${ZWJ}`, 'u').test(word), // invalid ZWJ
|
||||||
|
(word) => new RegExp(`([${VOWEL_MATRAS}]{2}|[${NASALIZATIONS}]{2}|${HALANT}{2}|${NUQTA}{2})`, 'u').test(word), // multiple combining
|
||||||
|
(word) => new RegExp(`([${VOWEL_MATRAS}][${NASALIZATIONS}]|[${NASALIZATIONS}][${VOWEL_MATRAS}])[${VOWEL_MATRAS}${NASALIZATIONS}]`, 'u').test(word), // multiple matra nasalizations
|
||||||
|
(word) => new RegExp(`[${NASALIZATIONS}${HALANT}][${VOWEL_MATRAS}]`, 'u').test(word), // modifier + matra
|
||||||
|
(word) => new RegExp(`[^${CONSONANTS}][${NUQTA}]`, 'u').test(word), // non-consonant + nukta
|
||||||
|
(word) => new RegExp(`[${UNWANTED_CHARS}]`, 'u').test(word),
|
||||||
|
(word) => new RegExp(`${FOREIGN_CHARS}`, 'u').test(word),
|
||||||
|
(word) => new RegExp(`[${NUMBERS}]`, 'u').test(word),
|
||||||
|
];
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* isValid
|
||||||
|
*
|
||||||
|
* Most validation rules are based on the comments here: https://github.com/harfbuzz/harfbuzz/issues/2803.
|
||||||
|
*/
|
||||||
|
function isValid(word) {
|
||||||
|
for (let i = 0; i < INVALIDATORS.length; i++) {
|
||||||
|
if (INVALIDATORS[i](word)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function fixNuqta(word) {
|
||||||
|
// return word.replaceAll('ऴ', '\u{933}\u{93c}');
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function getWordsWithObsoleteCandrabinduInGujarati(allWords) {
|
||||||
|
const allWordsAnusvara = new Set();
|
||||||
|
allWords.forEach(w => {
|
||||||
|
const converted = w.replaceAll('\u{A81}', '\u{A82}');
|
||||||
|
if (converted !== w) {
|
||||||
|
allWordsAnusvara.add(converted);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return allWordsAnusvara;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function work({ file }) {
|
||||||
|
const allWords = Array.from(getWordsFromFile(file));
|
||||||
|
|
||||||
|
// Detect obsolete candrabindu instead of anusvara in Gujarati
|
||||||
|
const allWordsAnusvara = getWordsWithObsoleteCandrabinduInGujarati(allWords);
|
||||||
|
|
||||||
|
allWords.forEach(w => {
|
||||||
|
const word = allWordsAnusvara.has(w) ? w.replaceAll('\u{A81}', '\u{A82}') : w;
|
||||||
|
if (isValid(word)) print(fixNuqta(word));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
work(validateInput());
|
||||||
Loading…
Add table
Add a link
Reference in a new issue