-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlanguage_detector.py
More file actions
66 lines (65 loc) · 1.42 KB
/
language_detector.py
File metadata and controls
66 lines (65 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from langdetect import detect
language_dict = {
"af": "Afrikaans",
"ar": "Arabic",
"bg": "Bulgarian",
"bn": "Bengali",
"ca": "Catalan",
"cs": "Czech",
"cy": "Welsh",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"et": "Estonian",
"fa": "Persian",
"fi": "Finnish",
"fr": "French",
"gu": "Gujarati",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"kn": "Kannada",
"ko": "Korean",
"lt": "Lithuanian",
"lv": "Latvian",
"mk": "Macedonian",
"ml": "Malayalam",
"mr": "Marathi",
"ne": "Nepali",
"nl": "Dutch",
"no": "Norwegian",
"pa": "Punjabi",
"pl": "Polish",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sk": "Slovak",
"sl": "Slovenian",
"so": "Somali",
"sq": "Albanian",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"th": "Thai",
"tl": "Tagalog",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"vi": "Vietnamese",
"zh-cn": "Simplified Chinese",
"zh-tw": "Traditional Chinese"
}
def detect_language(text: str) -> str|None:
try:
language = detect(text)
return language_dict.get(language)
except:
print("Failed to detect language of: ", text)
return None