Bulgarian update (#268)
* fixed Bulgarian layout: moved 'ь' to 8-key * added a migration for removing all Bulgarian words, since the digit sequences are no longer compatible with the new layout * fixed incorrect text case of some words * removed some nonsense words * added new Bulgarian words
This commit is contained in:
parent
c4a78c1931
commit
0aa934cebd
4 changed files with 251186 additions and 230906 deletions
|
|
@ -10,5 +10,5 @@ layout:
|
||||||
- [м, н, о, п] # 5
|
- [м, н, о, п] # 5
|
||||||
- [р, с, т, у] # 6
|
- [р, с, т, у] # 6
|
||||||
- [ф, х, ц, ч] # 7
|
- [ф, х, ц, ч] # 7
|
||||||
- [ш, щ, ъ] # 8
|
- [ш, щ, ъ, ь] # 8
|
||||||
- [ь, ю, я] # 9
|
- [ю, я] # 9
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,13 +1,18 @@
|
||||||
Bulgarian wordlist by: Miglen Georgiev
|
Bulgarian wordlist 1 by Miglen Georgiev
|
||||||
Version: f46eff1 (2022-04-26)
|
Version: f46eff1 (2022-04-26)
|
||||||
Source: https://github.com/miglen/bulgarian-wordlists/blob/master/wordlists/bg-words-validated-cyrillic.txt
|
Source: https://github.com/miglen/bulgarian-wordlists/blob/master/wordlists/bg-words-validated-cyrillic.txt
|
||||||
License: https://github.com/miglen/bulgarian-wordlists/blob/master/LICENSE
|
License: https://github.com/miglen/bulgarian-wordlists/blob/master/LICENSE
|
||||||
|
|
||||||
Additionally cleaned up repeating words and added some missing ones.
|
Bulgarian wordlist 2 by michmech
|
||||||
|
Version: 9c91fe4
|
||||||
|
Source: https://github.com/michmech/lemmatization-lists/blob/master/lemmatization-bg.txt
|
||||||
|
License: https://github.com/michmech/lemmatization-lists/blob/master/LICENCE
|
||||||
|
|
||||||
Also, used the wooorm's hunspell-compatible dictionary to determine which words need to start with a capital letter
|
Also, used the wooorm's hunspell-compatible dictionary to determine which words need to start with a capital letter
|
||||||
Link: https://github.com/wooorm/dictionaries/tree/main/dictionaries/bg
|
Link: https://github.com/wooorm/dictionaries/tree/main/dictionaries/bg
|
||||||
Git commit: 13 Apr 2022 [0c78cc810c8aafb2e6f5140bb6dcd4026b247eb8]
|
Git commit: 13 Apr 2022 [0c78cc810c8aafb2e6f5140bb6dcd4026b247eb8]
|
||||||
|
|
||||||
|
Additionally cleaned up repeating words and added some missing ones manually.
|
||||||
|
|
||||||
Word frequencies obtained from the "General" word frequency dictionary by the Department of Computational Linguistics of the Bulgarian Academy of Sciences.
|
Word frequencies obtained from the "General" word frequency dictionary by the Department of Computational Linguistics of the Bulgarian Academy of Sciences.
|
||||||
Link: https://dcl.bas.bg/frequency.html
|
Link: https://dcl.bas.bg/frequency.html
|
||||||
|
|
@ -33,7 +33,8 @@ public class DB11 {
|
||||||
assert Dutch != null;
|
assert Dutch != null;
|
||||||
|
|
||||||
database.beginTransaction();
|
database.beginTransaction();
|
||||||
database.execSQL(getDeleteEnglishSwordsQuery(English));
|
database.execSQL(getTruncateBulgarianQuery());
|
||||||
|
database.execSQL(getDeleteEnglishSwordsQuery());
|
||||||
database.execSQL(getDeleteWordsQuery(English.getId(), enWords));
|
database.execSQL(getDeleteWordsQuery(English.getId(), enWords));
|
||||||
database.execSQL(getDeleteWordsQuery(Dutch.getId(), nlWords));
|
database.execSQL(getDeleteWordsQuery(Dutch.getId(), nlWords));
|
||||||
database.setTransactionSuccessful();
|
database.setTransactionSuccessful();
|
||||||
|
|
@ -45,11 +46,19 @@ public class DB11 {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private String getDeleteEnglishSwordsQuery(Language English) {
|
private String getDeleteEnglishSwordsQuery() {
|
||||||
|
Language English = LanguageCollection.getByLocale(ctx, Locale.ENGLISH.toString());
|
||||||
|
assert English != null;
|
||||||
return "DELETE FROM words WHERE lang=" + English.getId() + " AND word LIKE '%''s'";
|
return "DELETE FROM words WHERE lang=" + English.getId() + " AND word LIKE '%''s'";
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getDeleteWordsQuery(int langId, String wordList) {
|
private String getDeleteWordsQuery(int langId, String wordList) {
|
||||||
return "DELETE FROM words WHERE lang=" + langId + " AND word IN(" + wordList + ")";
|
return "DELETE FROM words WHERE lang=" + langId + " AND word IN(" + wordList + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getTruncateBulgarianQuery() {
|
||||||
|
Language Bulgarian = LanguageCollection.getByLocale(ctx, "bg_BG");
|
||||||
|
assert Bulgarian != null;
|
||||||
|
return "DELETE FROM words WHERE lang=" + Bulgarian.getId();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue