Farsi language
This commit is contained in:
parent
ffae563b95
commit
2098b1a994
9 changed files with 562357 additions and 2 deletions
17
app/languages/definitions/Farsi.yml
Normal file
17
app/languages/definitions/Farsi.yml
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
locale: fa-IR
|
||||||
|
currency: ﷼
|
||||||
|
dictionaryFile: fa-utf8.csv
|
||||||
|
abcString: أﺏﺕ
|
||||||
|
hasUpperCase: no
|
||||||
|
numerals: [۰,۱,۲,۳,۴,۵,۶,۷,۸,۹]
|
||||||
|
layout:
|
||||||
|
- [SPECIAL] # 0
|
||||||
|
- [ً,PUNCTUATION_FA] # 1
|
||||||
|
- [ب,پ,ت,ث,ة] # 2
|
||||||
|
- [ا,أ,إ,آ,ى,ؤ,ئ,ء] # 3
|
||||||
|
- [س,ش,ص,ض] # 4
|
||||||
|
- [د,ذ,ر,ز,ژ] # 5
|
||||||
|
- [ج,چ,ح,خ] # 6
|
||||||
|
- [ن,ه,و,ي,ی] # 7
|
||||||
|
- [ف,ق,ك,ک,گ,ل,م] # 8
|
||||||
|
- [ط,ظ,ع,غ] # 9
|
||||||
562295
app/languages/dictionaries/fa-utf8.csv
Normal file
562295
app/languages/dictionaries/fa-utf8.csv
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -7,10 +7,11 @@ public class LanguageKind {
|
||||||
|
|
||||||
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
|
public static boolean isCyrillic(Language language) { return language != null && language.getKeyCharacters(2).contains("а"); }
|
||||||
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }
|
public static boolean isLatinBased(Language language) { return language != null && language.getKeyCharacters(2).contains("a"); }
|
||||||
public static boolean isRTL(Language language) { return isArabic(language) || isHebrew(language); }
|
public static boolean isRTL(Language language) { return isArabic(language) || isFarsi(language) || isHebrew(language); }
|
||||||
|
|
||||||
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
|
public static boolean isArabic(Language language) { return language != null && language.getId() == 502337; }
|
||||||
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
|
public static boolean isEnglish(Language language) { return language != null && language.getLocale().equals(Locale.ENGLISH); }
|
||||||
|
public static boolean isFarsi(Language language) { return language != null && language.getId() == 599078; }
|
||||||
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
|
public static boolean isFrench(Language language) { return language != null && language.getId() == 596550; }
|
||||||
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
|
public static boolean isGreek(Language language) { return language != null && language.getId() == 597381; }
|
||||||
public static boolean isGujarati(Language language) { return language != null && language.getId() == 468647; }
|
public static boolean isGujarati(Language language) { return language != null && language.getId() == 468647; }
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,7 @@ public class NaturalLanguage extends Language implements Comparable<NaturalLangu
|
||||||
specialChars.put(SPECIAL_CHARS_PLACEHOLDER, Characters.Special);
|
specialChars.put(SPECIAL_CHARS_PLACEHOLDER, Characters.Special);
|
||||||
specialChars.put(PUNCTUATION_PLACEHOLDER, Characters.PunctuationEnglish);
|
specialChars.put(PUNCTUATION_PLACEHOLDER, Characters.PunctuationEnglish);
|
||||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_AR", Characters.PunctuationArabic);
|
specialChars.put(PUNCTUATION_PLACEHOLDER + "_AR", Characters.PunctuationArabic);
|
||||||
|
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FA", Characters.PunctuationFarsi);
|
||||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FR", Characters.PunctuationFrench);
|
specialChars.put(PUNCTUATION_PLACEHOLDER + "_FR", Characters.PunctuationFrench);
|
||||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_DE", Characters.PunctuationGerman);
|
specialChars.put(PUNCTUATION_PLACEHOLDER + "_DE", Characters.PunctuationGerman);
|
||||||
specialChars.put(PUNCTUATION_PLACEHOLDER + "_GR", Characters.PunctuationGreek);
|
specialChars.put(PUNCTUATION_PLACEHOLDER + "_GR", Characters.PunctuationGreek);
|
||||||
|
|
|
||||||
|
|
@ -229,6 +229,7 @@ public class SuggestionsBar {
|
||||||
char firstChar = trimmedSuggestion.charAt(0);
|
char firstChar = trimmedSuggestion.charAt(0);
|
||||||
|
|
||||||
String prefix = Character.isAlphabetic(firstChar) && !Characters.isCombiningPunctuation(firstChar) ? STEM_VARIATION_PREFIX : STEM_PUNCTUATION_VARIATION_PREFIX;
|
String prefix = Character.isAlphabetic(firstChar) && !Characters.isCombiningPunctuation(firstChar) ? STEM_VARIATION_PREFIX : STEM_PUNCTUATION_VARIATION_PREFIX;
|
||||||
|
prefix = Characters.isFathatan(firstChar) ? " " : prefix; // Fix incorrect display of fathatan without a base character. It is a combining character, but since it is a letter, we must include a base character not to break it, with a "..." prefix
|
||||||
suggestions.add(prefix + formatUnreadableSuggestion(trimmedSuggestion));
|
suggestions.add(prefix + formatUnreadableSuggestion(trimmedSuggestion));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -251,7 +252,6 @@ public class SuggestionsBar {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void setSuggestionsOnScreen() {
|
private void setSuggestionsOnScreen() {
|
||||||
if (mView != null) {
|
if (mView != null) {
|
||||||
mSuggestionsAdapter.resetItems(selectedIndex);
|
mSuggestionsAdapter.resetItems(selectedIndex);
|
||||||
|
|
|
||||||
|
|
@ -48,4 +48,8 @@ public class Characters extends Emoji {
|
||||||
public static boolean isCurrency(Language language, String c) {
|
public static boolean isCurrency(Language language, String c) {
|
||||||
return Currency.contains(c) || (language != null && language.getCurrency().equals(c));
|
return Currency.contains(c) || (language != null && language.getCurrency().equals(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isFathatan(char ch) {
|
||||||
|
return ch == 0x064B;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,10 @@ class Punctuation {
|
||||||
',', '-', '\'', ':', ';', '!', '?', '.'
|
',', '-', '\'', ':', ';', '!', '?', '.'
|
||||||
));
|
));
|
||||||
|
|
||||||
|
final public static ArrayList<Character> CombiningPunctuationFarsi = new ArrayList<>(Arrays.asList(
|
||||||
|
'،', ZWNJ.charAt(0), '-', '\'', ':', '؛', '!', '؟', '.'
|
||||||
|
));
|
||||||
|
|
||||||
final private static ArrayList<Character> CombiningPunctuationGujarati = new ArrayList<>(Arrays.asList(
|
final private static ArrayList<Character> CombiningPunctuationGujarati = new ArrayList<>(Arrays.asList(
|
||||||
'્', '઼', 'ઽ', 'ઃ', '।', '॰', '॥' // Indic combining chars look the same, but have different Unicode values
|
'્', '઼', 'ઽ', 'ઃ', '।', '॰', '॥' // Indic combining chars look the same, but have different Unicode values
|
||||||
));
|
));
|
||||||
|
|
@ -41,6 +45,11 @@ class Punctuation {
|
||||||
",", ".", "-", "(", ")", "&", "~", "`", ";", ":", "'", "\"", "!", "?"
|
",", ".", "-", "(", ")", "&", "~", "`", ";", ":", "'", "\"", "!", "?"
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// the same as Arabic + ZWNJ
|
||||||
|
final public static ArrayList<String> PunctuationFarsi = new ArrayList<>(Arrays.asList(
|
||||||
|
"،", ".", "-", ZWNJ, "(", ")", "&", "~", "`", "'", "\"", "؛", ":", "!", "؟"
|
||||||
|
));
|
||||||
|
|
||||||
final public static ArrayList<String> PunctuationFrench = new ArrayList<>(Arrays.asList(
|
final public static ArrayList<String> PunctuationFrench = new ArrayList<>(Arrays.asList(
|
||||||
",", ".", "-", "«", "»", "(", ")", "&", "`", "~", ";", ":", "'", "\"", "!", "?"
|
",", ".", "-", "«", "»", "(", ")", "&", "`", "~", ";", ":", "'", "\"", "!", "?"
|
||||||
));
|
));
|
||||||
|
|
@ -64,6 +73,7 @@ class Punctuation {
|
||||||
public static boolean isCombiningPunctuation(Language language, char ch) {
|
public static boolean isCombiningPunctuation(Language language, char ch) {
|
||||||
return
|
return
|
||||||
CombiningPunctuation.contains(ch)
|
CombiningPunctuation.contains(ch)
|
||||||
|
|| (LanguageKind.isFarsi(language) && CombiningPunctuationFarsi.contains(ch))
|
||||||
|| (LanguageKind.isGujarati(language) && CombiningPunctuationGujarati.contains(ch))
|
|| (LanguageKind.isGujarati(language) && CombiningPunctuationGujarati.contains(ch))
|
||||||
|| (LanguageKind.isHindi(language) && CombiningPunctuationHindi.contains(ch))
|
|| (LanguageKind.isHindi(language) && CombiningPunctuationHindi.contains(ch))
|
||||||
|| (LanguageKind.isHebrew(language) && CombiningPunctuationHebrew.contains(ch));
|
|| (LanguageKind.isHebrew(language) && CombiningPunctuationHebrew.contains(ch));
|
||||||
|
|
@ -72,6 +82,7 @@ class Punctuation {
|
||||||
public static boolean isCombiningPunctuation(char ch) {
|
public static boolean isCombiningPunctuation(char ch) {
|
||||||
return
|
return
|
||||||
CombiningPunctuation.contains(ch)
|
CombiningPunctuation.contains(ch)
|
||||||
|
|| CombiningPunctuationFarsi.contains(ch)
|
||||||
|| CombiningPunctuationGujarati.contains(ch)
|
|| CombiningPunctuationGujarati.contains(ch)
|
||||||
|| CombiningPunctuationHindi.contains(ch)
|
|| CombiningPunctuationHindi.contains(ch)
|
||||||
|| CombiningPunctuationHebrew.contains(ch);
|
|| CombiningPunctuationHebrew.contains(ch);
|
||||||
|
|
|
||||||
|
|
@ -263,6 +263,11 @@ static def extractAlphabetExtraCharsFromLine(String languageName, String line) {
|
||||||
.replace(',', '')
|
.replace(',', '')
|
||||||
.replace(' ', '')
|
.replace(' ', '')
|
||||||
|
|
||||||
|
// add Zero-width non-joiner for some languages
|
||||||
|
if (line.contains("PUNCTUATION_FA") || line.contains("PUNCTUATION_IN")) {
|
||||||
|
allChars += '\u200C'
|
||||||
|
}
|
||||||
|
|
||||||
return DEFAULT + allChars
|
return DEFAULT + allChars
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
21
docs/dictionaries/faWordlistReadme.txt
Normal file
21
docs/dictionaries/faWordlistReadme.txt
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
Farsi wordlist 1 by: shahind
|
||||||
|
Version: 1a9c953f6edaebaff5d434b319e1dd4b700a5fc3 (2022-03-04)
|
||||||
|
Source: https://github.com/shahind/Persian-Words-Database
|
||||||
|
License: Public Domain
|
||||||
|
|
||||||
|
Farsi wordlist 2 by: Semnan University
|
||||||
|
Version: 48bcda68e1b89d6d259e326e2c5985e9e87d5493 (2022-08-15)
|
||||||
|
Source: https://github.com/semnan-university-ai/English-Persian-Word-Database
|
||||||
|
License: (Apache 2.0) https://github.com/semnan-university-ai/English-Persian-Word-Database/blob/master/LICENSE
|
||||||
|
|
||||||
|
Farsi wordlist 3 by: AnySoft Keyboard
|
||||||
|
Version: f7b0b74c043259dc69b037f71f5109974cf15d59 (2020-04-20)
|
||||||
|
Source: https://github.com/AnySoftKeyboard/AnySoftKeyboard
|
||||||
|
License: (Apache 2.0) https://github.com/AnySoftKeyboard/AnySoftKeyboard/blob/main/LICENSE
|
||||||
|
|
||||||
|
All wordlists were manually cleaned up from repeating and invalid words (as much as possible).
|
||||||
|
|
||||||
|
Word frequencies by: Hermit Dave
|
||||||
|
Version: 525f9b560de45753a5ea01069454e72e9aa541c6 (2022-02-07)
|
||||||
|
Source: https://github.com/hermitdave/FrequencyWords
|
||||||
|
License: (MIT) https://github.com/hermitdave/FrequencyWords/blob/master/LICENSE
|
||||||
Loading…
Add table
Add a link
Reference in a new issue