Sat, 04 Nov 2017 20:12:11 +0100
basic EIT parser crap added
ISO639.py | file | annotate | diff | comparison | revisions | |
eit.py | file | annotate | diff | comparison | revisions |
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ISO639.py Sat Nov 04 20:12:11 2017 +0100 @@ -0,0 +1,437 @@ +# -*- coding: iso-8859-2 -*- +LanguageCodes = { } + +LanguageCodes["aar"] = LanguageCodes["aa"] = ("Afar", "Hamitic") +LanguageCodes["abk"] = LanguageCodes["ab"] = ("Abkhazian", "Ibero-caucasian") +LanguageCodes["ace"] = ("Achinese", "") +LanguageCodes["ach"] = ("Acoli", "") +LanguageCodes["ada"] = ("Adangme", "") +LanguageCodes["afa"] = ("Afro-Asiatic (Other)", "") +LanguageCodes["afh"] = ("Afrihili", "") +LanguageCodes["afr"] = LanguageCodes["af"] = ("Afrikaans", "Germanic") +LanguageCodes["aka"] = ("Akan", "") +LanguageCodes["akk"] = ("Akkadian", "") +LanguageCodes["ale"] = ("Aleut", "") +LanguageCodes["alg"] = ("Algonquian languages", "") +LanguageCodes["amh"] = LanguageCodes["am"] = ("Amharic", "Semitic") +LanguageCodes["ang"] = ("English, Old (ca. 450-1100)", "") +LanguageCodes["apa"] = ("Apache languages", "") +LanguageCodes["ara"] = LanguageCodes["ar"] = ("Arabic", "Semitic") +LanguageCodes["arc"] = ("Aramaic", "") +LanguageCodes["arn"] = ("Araucanian", "") +LanguageCodes["arp"] = ("Arapaho", "") +LanguageCodes["art"] = ("Artificial (Other)", "") +LanguageCodes["arw"] = ("Arawak", "") +LanguageCodes["asm"] = LanguageCodes["as"] = ("Assamese", "Indian") +LanguageCodes["ath"] = ("Athapascan languages", "") +LanguageCodes["aus"] = ("Australian languages", "") +LanguageCodes["ava"] = ("Avaric", "") +LanguageCodes["ave"] = LanguageCodes["ae"] = ("Avestan", "") +LanguageCodes["awa"] = ("Awadhi", "") +LanguageCodes["aym"] = LanguageCodes["ay"] = ("Aymara", "Amerindian") +LanguageCodes["aze"] = LanguageCodes["az"] = ("Azerbaijani", "Turkic/altaic") +LanguageCodes["bad"] = ("Banda", "") +LanguageCodes["bai"] = ("Bamileke languages", "") +LanguageCodes["bak"] = LanguageCodes["ba"] = ("Bashkir", "Turkic/altaic") +LanguageCodes["bal"] = ("Baluchi", "") +LanguageCodes["bam"] = ("Bambara", "") +LanguageCodes["ban"] = ("Balinese", "") +LanguageCodes["bas"] = ("Basa", "") +LanguageCodes["bat"] = ("Baltic (Other)", "") +LanguageCodes["bej"] = ("Beja", "") +LanguageCodes["bel"] = LanguageCodes["be"] = ("Belarusian", "Slavic") +LanguageCodes["bem"] = ("Bemba", "") +LanguageCodes["ben"] = LanguageCodes["bn"] = ("Bengali", "Indian") +LanguageCodes["ber"] = ("Berber (Other)", "") +LanguageCodes["bho"] = ("Bhojpuri", "") +LanguageCodes["bih"] = LanguageCodes["bh"] = ("Bihari", "Indian") +LanguageCodes["bik"] = ("Bikol", "") +LanguageCodes["bin"] = ("Bini", "") +LanguageCodes["bis"] = LanguageCodes["bi"] = ("Bislama", "") +LanguageCodes["bla"] = ("Siksika", "") +LanguageCodes["bnt"] = ("Bantu (Other)", "") +LanguageCodes["bod"] = LanguageCodes["tib"] = LanguageCodes["bo"] = ("Tibetan", "Asian") +LanguageCodes["bos"] = LanguageCodes["bs"] = ("Bosnian", "") +LanguageCodes["bra"] = ("Braj", "") +LanguageCodes["bre"] = LanguageCodes["br"] = ("Breton", "Celtic") +LanguageCodes["btk"] = ("Batak (Indonesia)", "") +LanguageCodes["bua"] = ("Buriat", "") +LanguageCodes["bug"] = ("Buginese", "") +LanguageCodes["bul"] = LanguageCodes["bg"] = ("Bulgarian", "Slavic") +LanguageCodes["cad"] = ("Caddo", "") +LanguageCodes["cai"] = ("Central American Indian (Other)", "") +LanguageCodes["car"] = ("Carib", "") +LanguageCodes["cat"] = LanguageCodes["ca"] = ("Catalan", "Romance") +LanguageCodes["cau"] = ("Caucasian (Other)", "") +LanguageCodes["ceb"] = ("Cebuano", "") +LanguageCodes["cel"] = ("Celtic (Other)", "") +LanguageCodes["ces"] = LanguageCodes["cze"] = LanguageCodes["cs"] = ("Czech", "Slavic") +LanguageCodes["cha"] = LanguageCodes["ch"] = ("Chamorro", "") +LanguageCodes["chb"] = ("Chibcha", "") +LanguageCodes["che"] = LanguageCodes["ce"] = ("Chechen", "") +LanguageCodes["chg"] = ("Chagatai", "") +LanguageCodes["chk"] = ("Chuukese", "") +LanguageCodes["chm"] = ("Mari", "") +LanguageCodes["chn"] = ("Chinook jargon", "") +LanguageCodes["cho"] = ("Choctaw", "") +LanguageCodes["chp"] = ("Chipewyan", "") +LanguageCodes["chr"] = ("Cherokee", "") +LanguageCodes["chu"] = LanguageCodes["cu"] = ("Church Slavic", "") +LanguageCodes["chv"] = LanguageCodes["cv"] = ("Chuvash", "") +LanguageCodes["chy"] = ("Cheyenne", "") +LanguageCodes["cmc"] = ("Chamic languages", "") +LanguageCodes["cop"] = ("Coptic", "") +LanguageCodes["cor"] = LanguageCodes["kw"] = ("Cornish", "") +LanguageCodes["cos"] = LanguageCodes["co"] = ("Corsican", "Romance") +LanguageCodes["cpe"] = ("Creoles and pidgins, English based (Other)", "") +LanguageCodes["cpf"] = ("Creoles and pidgins, French-based (Other)", "") +LanguageCodes["cpp"] = ("Creoles and pidgins, Portuguese-based (Other)", "") +LanguageCodes["cre"] = ("Cree", "") +LanguageCodes["crp"] = ("Creoles and pidgins (Other)", "") +LanguageCodes["cus"] = ("Cushitic (Other)", "") +LanguageCodes["cym"] = LanguageCodes["wel"] = LanguageCodes["cy"] = ("Welsh", "Celtic") +LanguageCodes["dak"] = ("Dakota", "") +LanguageCodes["dan"] = LanguageCodes["da"] = ("Danish", "Germanic") +LanguageCodes["day"] = ("Dayak", "") +LanguageCodes["del"] = ("Delaware", "") +LanguageCodes["den"] = ("Slave (Athapascan)", "") +LanguageCodes["deu"] = LanguageCodes["ger"] = LanguageCodes["de"] = ("German", "Germanic") +LanguageCodes["dgr"] = ("Dogrib", "") +LanguageCodes["din"] = ("Dinka", "") +LanguageCodes["div"] = ("Divehi", "") +LanguageCodes["doi"] = ("Dogri", "") +LanguageCodes["dra"] = ("Dravidian (Other)", "") +LanguageCodes["dua"] = ("Duala", "") +LanguageCodes["dum"] = ("Dutch, Middle (ca. 1050-1350)", "") +LanguageCodes["dyu"] = ("Dyula", "") +LanguageCodes["dzo"] = LanguageCodes["dz"] = ("Dzongkha", "Asian") +LanguageCodes["efi"] = ("Efik", "") +LanguageCodes["egy"] = ("Egyptian (Ancient)", "") +LanguageCodes["eka"] = ("Ekajuk", "") +LanguageCodes["ell"] = LanguageCodes["gre"] = LanguageCodes["el"] = ("Greek, Modern (1453-)", "Latin/greek") +LanguageCodes["elx"] = ("Elamite", "") +LanguageCodes["eng"] = LanguageCodes["en"] = ("English", "Germanic") +LanguageCodes["enm"] = ("English, Middle (1100-1500)", "") +LanguageCodes["epo"] = LanguageCodes["eo"] = ("Esperanto", "International aux.") +LanguageCodes["est"] = LanguageCodes["et"] = ("Estonian", "Finno-ugric") +LanguageCodes["eus"] = LanguageCodes["baq"] = LanguageCodes["eu"] = ("Basque", "Basque") +LanguageCodes["ewe"] = ("Ewe", "") +LanguageCodes["ewo"] = ("Ewondo", "") +LanguageCodes["fan"] = ("Fang", "") +LanguageCodes["fao"] = LanguageCodes["fo"] = ("Faroese", "Germanic") +LanguageCodes["fas"] = LanguageCodes["per"] = LanguageCodes["fa"] = ("Persian", "") +LanguageCodes["fat"] = ("Fanti", "") +LanguageCodes["fij"] = LanguageCodes["fj"] = ("Fijian", "Oceanic/indonesian") +LanguageCodes["fin"] = LanguageCodes["fi"] = ("Finnish", "Finno-ugric") +LanguageCodes["fiu"] = ("Finno-Ugrian (Other)", "") +LanguageCodes["fon"] = ("Fon", "") +LanguageCodes["fra"] = LanguageCodes["fre"] = LanguageCodes["fr"] = ("French", "Romance") +LanguageCodes["frm"] = ("French, Middle (ca. 1400-1600)", "") +LanguageCodes["fro"] = ("French, Old (842-ca. 1400)", "") +LanguageCodes["fry"] = LanguageCodes["fy"] = ("Frisian", "Germanic") +LanguageCodes["ful"] = ("Fulah", "") +LanguageCodes["fur"] = ("Friulian", "") +LanguageCodes["gaa"] = ("Ga", "") +LanguageCodes["gay"] = ("Gayo", "") +LanguageCodes["gba"] = ("Gbaya", "") +LanguageCodes["gem"] = ("Germanic (Other)", "") +LanguageCodes["gez"] = ("Geez", "") +LanguageCodes["gil"] = ("Gilbertese", "") +LanguageCodes["gla"] = LanguageCodes["gd"] = ("Gaelic (Scots)", "Celtic") +LanguageCodes["gle"] = LanguageCodes["ga"] = ("Irish", "Celtic") +LanguageCodes["glg"] = LanguageCodes["gl"] = ("Gallegan", "Romance") +LanguageCodes["glv"] = LanguageCodes["gv"] = ("Manx", "") +LanguageCodes["gmh"] = ("German, Middle High (ca. 1050-1500)", "") +LanguageCodes["goh"] = ("German, Old High (ca. 750-1050)", "") +LanguageCodes["gon"] = ("Gondi", "") +LanguageCodes["gor"] = ("Gorontalo", "") +LanguageCodes["got"] = ("Gothic", "") +LanguageCodes["grb"] = ("Grebo", "") +LanguageCodes["grc"] = ("Greek, Ancient (to 1453)", "") +LanguageCodes["grn"] = LanguageCodes["gn"] = ("Guarani", "Amerindian") +LanguageCodes["guj"] = LanguageCodes["gu"] = ("Gujarati", "Indian") +LanguageCodes["gwi"] = ("Gwich�in", "") +LanguageCodes["hai"] = ("Haida", "") +LanguageCodes["hau"] = LanguageCodes["ha"] = ("Hausa", "Negro-african") +LanguageCodes["haw"] = ("Hawaiian", "") +LanguageCodes["heb"] = LanguageCodes["he"] = ("Hebrew", "") +LanguageCodes["her"] = LanguageCodes["hz"] = ("Herero", "") +LanguageCodes["hil"] = ("Hiligaynon", "") +LanguageCodes["him"] = ("Himachali", "") +LanguageCodes["hin"] = LanguageCodes["hi"] = ("Hindi", "Indian") +LanguageCodes["hit"] = ("Hittite", "") +LanguageCodes["hmn"] = ("Hmong", "") +LanguageCodes["hmo"] = LanguageCodes["ho"] = ("Hiri Motu", "") +LanguageCodes["hrv"] = LanguageCodes["scr"] = LanguageCodes["hr"] = ("Croatian", "Slavic") +LanguageCodes["hun"] = LanguageCodes["hu"] = ("Hungarian", "Finno-ugric") +LanguageCodes["hup"] = ("Hupa", "") +LanguageCodes["hye"] = LanguageCodes["arm"] = LanguageCodes["hy"] = ("Armenian", "Indo-european (other)") +LanguageCodes["iba"] = ("Iban", "") +LanguageCodes["ibo"] = ("Igbo", "") +LanguageCodes["ijo"] = ("Ijo", "") +LanguageCodes["iku"] = LanguageCodes["iu"] = ("Inuktitut", "") +LanguageCodes["ile"] = LanguageCodes["ie"] = ("Interlingue", "International aux.") +LanguageCodes["ilo"] = ("Iloko", "") +LanguageCodes["ina"] = LanguageCodes["ia"] = ("Interlingua (International Auxiliary Language Association)", "International aux.") +LanguageCodes["inc"] = ("Indic (Other)", "") +LanguageCodes["ind"] = LanguageCodes["id"] = ("Indonesian", "") +LanguageCodes["ine"] = ("Indo-European (Other)", "") +LanguageCodes["ipk"] = LanguageCodes["ik"] = ("Inupiaq", "Eskimo") +LanguageCodes["ira"] = ("Iranian (Other)", "") +LanguageCodes["iro"] = ("Iroquoian languages", "") +LanguageCodes["isl"] = LanguageCodes["ice"] = LanguageCodes["is"] = ("Icelandic", "Germanic") +LanguageCodes["ita"] = LanguageCodes["it"] = ("Italian", "Romance") +LanguageCodes["jaw"] = LanguageCodes["jav"] = LanguageCodes["jw"] = ("Javanese", "") +LanguageCodes["jpn"] = LanguageCodes["ja"] = ("Japanese", "Asian") +LanguageCodes["jpr"] = ("Judeo-Persian", "") +LanguageCodes["kaa"] = ("Kara-Kalpak", "") +LanguageCodes["kab"] = ("Kabyle", "") +LanguageCodes["kac"] = ("Kachin", "") +LanguageCodes["kal"] = LanguageCodes["kl"] = ("Kalaallisut", "Eskimo") +LanguageCodes["kam"] = ("Kamba", "") +LanguageCodes["kan"] = LanguageCodes["kn"] = ("Kannada", "Dravidian") +LanguageCodes["kar"] = ("Karen", "") +LanguageCodes["kas"] = LanguageCodes["ks"] = ("Kashmiri", "Indian") +LanguageCodes["kat"] = LanguageCodes["geo"] = LanguageCodes["ka"] = ("Georgian", "Ibero-caucasian") +LanguageCodes["kau"] = ("Kanuri", "") +LanguageCodes["kaw"] = ("Kawi", "") +LanguageCodes["kaz"] = LanguageCodes["kk"] = ("Kazakh", "Turkic/altaic") +LanguageCodes["kha"] = ("Khasi", "") +LanguageCodes["khi"] = ("Khoisan (Other)", "") +LanguageCodes["khm"] = LanguageCodes["km"] = ("Khmer", "Asian") +LanguageCodes["kho"] = ("Khotanese", "") +LanguageCodes["kik"] = LanguageCodes["ki"] = ("Kikuyu", "") +LanguageCodes["kin"] = LanguageCodes["rw"] = ("Kinyarwanda", "Negro-african") +LanguageCodes["kir"] = LanguageCodes["ky"] = ("Kirghiz", "Turkic/altaic") +LanguageCodes["kmb"] = ("Kimbundu", "") +LanguageCodes["kok"] = ("Konkani", "") +LanguageCodes["kom"] = LanguageCodes["kv"] = ("Komi", "") +LanguageCodes["kon"] = ("Kongo", "") +LanguageCodes["kor"] = LanguageCodes["ko"] = ("Korean", "Asian") +LanguageCodes["kos"] = ("Kosraean", "") +LanguageCodes["kpe"] = ("Kpelle", "") +LanguageCodes["kro"] = ("Kru", "") +LanguageCodes["kru"] = ("Kurukh", "") +LanguageCodes["kum"] = ("Kumyk", "") +LanguageCodes["kur"] = LanguageCodes["ku"] = ("Kurdish", "Iranian") +LanguageCodes["kut"] = ("Kutenai", "") +LanguageCodes["lad"] = ("Ladino", "") +LanguageCodes["lah"] = ("Lahnda", "") +LanguageCodes["lam"] = ("Lamba", "") +LanguageCodes["lao"] = LanguageCodes["lo"] = ("Lao", "Asian") +LanguageCodes["lat"] = LanguageCodes["la"] = ("Latin", "Latin/greek") +LanguageCodes["lav"] = LanguageCodes["lv"] = ("Latvian", "Baltic") +LanguageCodes["lez"] = ("Lezghian", "") +LanguageCodes["lin"] = LanguageCodes["ln"] = ("Lingala", "Negro-african") +LanguageCodes["lit"] = LanguageCodes["lt"] = ("Lithuanian", "Baltic") +LanguageCodes["lol"] = ("Mongo", "") +LanguageCodes["loz"] = ("Lozi", "") +LanguageCodes["ltz"] = LanguageCodes["lb"] = ("Letzeburgesch", "") +LanguageCodes["lua"] = ("Luba-Lulua", "") +LanguageCodes["lub"] = ("Luba-Katanga", "") +LanguageCodes["lug"] = ("Ganda", "") +LanguageCodes["lui"] = ("Luiseno", "") +LanguageCodes["lun"] = ("Lunda", "") +LanguageCodes["luo"] = ("Luo (Kenya and Tanzania)", "") +LanguageCodes["lus"] = ("lushai", "") +LanguageCodes["mad"] = ("Madurese", "") +LanguageCodes["mag"] = ("Magahi", "") +LanguageCodes["mah"] = LanguageCodes["mh"] = ("Marshall", "") +LanguageCodes["mai"] = ("Maithili", "") +LanguageCodes["mak"] = ("Makasar", "") +LanguageCodes["mal"] = LanguageCodes["ml"] = ("Malayalam", "Dravidian") +LanguageCodes["man"] = ("Mandingo", "") +LanguageCodes["map"] = ("Austronesian (Other)", "") +LanguageCodes["mar"] = LanguageCodes["mr"] = ("Marathi", "Indian") +LanguageCodes["mas"] = ("Masai", "") +LanguageCodes["mdr"] = ("Mandar", "") +LanguageCodes["men"] = ("Mende", "") +LanguageCodes["mga"] = ("Irish, Middle (900-1200)", "") +LanguageCodes["mic"] = ("Micmac", "") +LanguageCodes["min"] = ("Minangkabau", "") +LanguageCodes["mis"] = ("Miscellaneous languages", "") +LanguageCodes["mkd"] = LanguageCodes["mac"] = LanguageCodes["mk"] = ("Macedonian", "Slavic") +LanguageCodes["mkh"] = ("Mon-Khmer (Other)", "") +LanguageCodes["mlg"] = LanguageCodes["mg"] = ("Malagasy", "Oceanic/indonesian") +LanguageCodes["mlt"] = LanguageCodes["mt"] = ("Maltese", "Semitic") +LanguageCodes["mnc"] = ("Manchu", "") +LanguageCodes["mni"] = ("Manipuri", "") +LanguageCodes["mno"] = ("Manobo languages", "") +LanguageCodes["moh"] = ("Mohawk", "") +LanguageCodes["mol"] = LanguageCodes["mo"] = ("Moldavian", "Romance") +LanguageCodes["mon"] = LanguageCodes["mn"] = ("Mongolian", "") +LanguageCodes["mos"] = ("Mossi", "") +LanguageCodes["mri"] = LanguageCodes["mao"] = LanguageCodes["mi"] = ("Maori", "Oceanic/indonesian") +LanguageCodes["msa"] = LanguageCodes["may"] = LanguageCodes["ms"] = ("Malay", "Oceanic/indonesian") +LanguageCodes["mul"] = ("Multiple languages", "") +LanguageCodes["mun"] = ("Munda languages", "") +LanguageCodes["mus"] = ("Creek", "") +LanguageCodes["mwr"] = ("Marwari", "") +LanguageCodes["mya"] = LanguageCodes["bur"] = LanguageCodes["my"] = ("Burmese", "Asian") +LanguageCodes["myn"] = ("Mayan languages", "") +LanguageCodes["nah"] = ("Nahuatl", "") +LanguageCodes["nai"] = ("North American Indian", "") +LanguageCodes["nau"] = LanguageCodes["na"] = ("Nauru", "") +LanguageCodes["nav"] = LanguageCodes["nv"] = ("Navajo", "") +LanguageCodes["nbl"] = LanguageCodes["nr"] = ("Ndebele, South", "") +LanguageCodes["nde"] = LanguageCodes["nd"] = ("Ndebele, North", "") +LanguageCodes["ndo"] = LanguageCodes["ng"] = ("Ndonga", "") +LanguageCodes["nds"] = ("Low German; Low Saxon; German, Low; Saxon, Low", "") +LanguageCodes["nep"] = LanguageCodes["ne"] = ("Nepali", "Indian") +LanguageCodes["new"] = ("Newari", "") +LanguageCodes["nia"] = ("Nias", "") +LanguageCodes["nic"] = ("Niger-Kordofanian (Other)", "") +LanguageCodes["niu"] = ("Niuean", "") +LanguageCodes["nld"] = LanguageCodes["dut"] = LanguageCodes["nl"] = ("Dutch", "Germanic") +LanguageCodes["nno"] = LanguageCodes["nn"] = ("Norwegian Nynorsk", "") +LanguageCodes["nob"] = LanguageCodes["nb"] = ("Norwegian Bokm�l", "") +LanguageCodes["non"] = ("Norse, Old", "") +LanguageCodes["nor"] = LanguageCodes["no"] = ("Norwegian", "Germanic") +LanguageCodes["nso"] = ("Sotho, Northern", "") +LanguageCodes["nub"] = ("Nubian languages", "") +LanguageCodes["nya"] = LanguageCodes["ny"] = ("Chichewa; Nyanja", "") +LanguageCodes["nym"] = ("Nyamwezi", "") +LanguageCodes["nyn"] = ("Nyankole", "") +LanguageCodes["nyo"] = ("Nyoro", "") +LanguageCodes["nzi"] = ("Nzima", "") +LanguageCodes["oci"] = LanguageCodes["oc"] = ("Occitan (post 1500); Proven�al", "Romance") +LanguageCodes["oji"] = ("Ojibwa", "") +LanguageCodes["ori"] = LanguageCodes["or"] = ("Oriya", "Indian") +LanguageCodes["orm"] = LanguageCodes["om"] = ("Oromo", "Hamitic") +LanguageCodes["osa"] = ("Osage", "") +LanguageCodes["oss"] = LanguageCodes["os"] = ("Ossetian; Ossetic", "") +LanguageCodes["ota"] = ("Turkish, Ottoman (1500-1928)", "") +LanguageCodes["oto"] = ("Otomian languages", "") +LanguageCodes["paa"] = ("Papuan (Other)", "") +LanguageCodes["pag"] = ("Pangasinan", "") +LanguageCodes["pal"] = ("Pahlavi", "") +LanguageCodes["pam"] = ("Pampanga", "") +LanguageCodes["pan"] = LanguageCodes["pa"] = ("Panjabi", "Indian") +LanguageCodes["pap"] = ("Papiamento", "") +LanguageCodes["pau"] = ("Palauan", "") +LanguageCodes["peo"] = ("Persian, Old (ca. 600-400 b.c.)", "") +LanguageCodes["phi"] = ("Philippine (Other)", "") +LanguageCodes["pli"] = LanguageCodes["pi"] = ("Pali", "") +LanguageCodes["pol"] = LanguageCodes["pl"] = ("Polish", "Slavic") +LanguageCodes["pon"] = ("Pohnpeian", "") +LanguageCodes["por"] = LanguageCodes["pt"] = ("Portuguese", "Romance") +LanguageCodes["pra"] = ("Prakrit languages", "") +LanguageCodes["pro"] = ("Proven�al, Old (to 1500)", "") +LanguageCodes["pus"] = LanguageCodes["ps"] = ("Pushto", "Iranian") +LanguageCodes["que"] = LanguageCodes["qu"] = ("Quechua", "Amerindian") +LanguageCodes["raj"] = ("Rajasthani", "") +LanguageCodes["rap"] = ("Rapanui", "") +LanguageCodes["rar"] = ("Rarotongan", "") +LanguageCodes["roa"] = ("Romance (Other)", "") +LanguageCodes["rom"] = ("Romany", "") +LanguageCodes["ron"] = LanguageCodes["rum"] = LanguageCodes["ro"] = ("Romanian", "Romance") +LanguageCodes["run"] = LanguageCodes["rn"] = ("Rundi", "Negro-african") +LanguageCodes["rus"] = LanguageCodes["ru"] = ("Russian", "Slavic") +LanguageCodes["sad"] = ("Sandawe", "") +LanguageCodes["sag"] = LanguageCodes["sg"] = ("Sango", "Negro-african") +LanguageCodes["sah"] = ("Yakut", "") +LanguageCodes["sai"] = ("South American Indian (Other)", "") +LanguageCodes["sal"] = ("Salishan languages", "") +LanguageCodes["sam"] = ("Samaritan Aramaic", "") +LanguageCodes["san"] = LanguageCodes["sa"] = ("Sanskrit", "Indian") +LanguageCodes["sas"] = ("Sasak", "") +LanguageCodes["sat"] = ("Santali", "") +LanguageCodes["sco"] = ("Scots", "") +LanguageCodes["sel"] = ("Selkup", "") +LanguageCodes["sem"] = ("Semitic (Other)", "") +LanguageCodes["sga"] = ("Irish, Old (to 900)", "") +LanguageCodes["sgn"] = ("Sign Languages", "") +LanguageCodes["shn"] = ("Shan", "") +LanguageCodes["sid"] = ("Sidamo", "") +LanguageCodes["sin"] = LanguageCodes["si"] = ("Sinhalese", "Indian") +LanguageCodes["sio"] = ("Siouan languages", "") +LanguageCodes["sit"] = ("Sino-Tibetan (Other)", "") +LanguageCodes["sla"] = ("Slavic (Other)", "") +LanguageCodes["slk"] = LanguageCodes["slo"] = LanguageCodes["sk"] = ("Slovak", "Slavic") +LanguageCodes["slv"] = LanguageCodes["sl"] = ("Slovenian", "Slavic") +LanguageCodes["sme"] = LanguageCodes["se"] = ("Northern Sami", "") +LanguageCodes["smi"] = ("Sami languages (Other)", "") +LanguageCodes["smo"] = LanguageCodes["sm"] = ("Samoan", "Oceanic/indonesian") +LanguageCodes["sna"] = LanguageCodes["sn"] = ("Shona", "Negro-african") +LanguageCodes["snd"] = LanguageCodes["sd"] = ("Sindhi", "Indian") +LanguageCodes["snk"] = ("Soninke", "") +LanguageCodes["sog"] = ("Sogdian", "") +LanguageCodes["som"] = LanguageCodes["so"] = ("Somali", "Hamitic") +LanguageCodes["son"] = ("Songhai", "") +LanguageCodes["sot"] = LanguageCodes["st"] = ("Sotho, Southern", "Negro-african") +LanguageCodes["esl"] = LanguageCodes["spa"] = LanguageCodes["es"] = ("Spanish", "Romance") +LanguageCodes["sqi"] = LanguageCodes["alb"] = LanguageCodes["sq"] = ("Albanian", "Indo-european (other)") +LanguageCodes["srd"] = LanguageCodes["sc"] = ("Sardinian", "") +LanguageCodes["srp"] = LanguageCodes["scc"] = LanguageCodes["sr"] = ("Serbian", "Slavic") +LanguageCodes["srr"] = ("Serer", "") +LanguageCodes["ssa"] = ("Nilo-Saharan (Other)", "") +LanguageCodes["ssw"] = LanguageCodes["ss"] = ("Swati", "Negro-african") +LanguageCodes["suk"] = ("Sukuma", "") +LanguageCodes["sun"] = LanguageCodes["su"] = ("Sundanese", "Oceanic/indonesian") +LanguageCodes["sus"] = ("Susu", "") +LanguageCodes["sux"] = ("Sumerian", "") +LanguageCodes["swa"] = LanguageCodes["sw"] = ("Swahili", "Negro-african") +LanguageCodes["swe"] = LanguageCodes["sv"] = ("Swedish", "Germanic") +LanguageCodes["syr"] = ("Syriac", "") +LanguageCodes["tah"] = LanguageCodes["ty"] = ("Tahitian", "") +LanguageCodes["tai"] = ("Tai (Other)", "") +LanguageCodes["tam"] = LanguageCodes["ta"] = ("Tamil", "Dravidian") +LanguageCodes["tat"] = LanguageCodes["tt"] = ("Tatar", "Turkic/altaic") +LanguageCodes["tel"] = LanguageCodes["te"] = ("Telugu", "Dravidian") +LanguageCodes["tem"] = ("Timne", "") +LanguageCodes["ter"] = ("Tereno", "") +LanguageCodes["tet"] = ("Tetum", "") +LanguageCodes["tgk"] = LanguageCodes["tg"] = ("Tajik", "Iranian") +LanguageCodes["tgl"] = LanguageCodes["tl"] = ("Tagalog", "Oceanic/indonesian") +LanguageCodes["tha"] = LanguageCodes["th"] = ("Thai", "Asian") +LanguageCodes["tig"] = ("Tigre", "") +LanguageCodes["tir"] = LanguageCodes["ti"] = ("Tigrinya", "Semitic") +LanguageCodes["tiv"] = ("Tiv", "") +LanguageCodes["tkl"] = ("Tokelau", "") +LanguageCodes["tli"] = ("Tlingit", "") +LanguageCodes["tmh"] = ("Tamashek", "") +LanguageCodes["tog"] = ("Tonga (Nyasa)", "") +LanguageCodes["ton"] = LanguageCodes["to"] = ("Tonga (Tonga Islands)", "Oceanic/indonesian") +LanguageCodes["tpi"] = ("Tok Pisin", "") +LanguageCodes["tsi"] = ("Tsimshian", "") +LanguageCodes["tsn"] = LanguageCodes["tn"] = ("Tswana", "Negro-african") +LanguageCodes["tso"] = LanguageCodes["ts"] = ("Tsonga", "Negro-african") +LanguageCodes["tuk"] = LanguageCodes["tk"] = ("Turkmen", "Turkic/altaic") +LanguageCodes["tum"] = ("Tumbuka", "") +LanguageCodes["tur"] = LanguageCodes["tr"] = ("Turkish", "Turkic/altaic") +LanguageCodes["tut"] = ("Altaic (Other)", "") +LanguageCodes["tvl"] = ("Tuvalu", "") +LanguageCodes["twi"] = LanguageCodes["tw"] = ("Twi", "Negro-african") +LanguageCodes["tyv"] = ("Tuvinian", "") +LanguageCodes["uga"] = ("Ugaritic", "") +LanguageCodes["uig"] = LanguageCodes["ug"] = ("Uighur", "") +LanguageCodes["ukr"] = LanguageCodes["uk"] = ("Ukrainian", "Slavic") +LanguageCodes["umb"] = ("Umbundu", "") +LanguageCodes["und"] = ("Undetermined", "") +LanguageCodes["urd"] = LanguageCodes["ur"] = ("Urdu", "Indian") +LanguageCodes["uzb"] = LanguageCodes["uz"] = ("Uzbek", "Turkic/altaic") +LanguageCodes["vai"] = ("Vai", "") +LanguageCodes["ven"] = ("Venda", "") +LanguageCodes["vie"] = LanguageCodes["vi"] = ("Vietnamese", "Asian") +LanguageCodes["vol"] = LanguageCodes["vo"] = ("Volap�k", "International aux.") +LanguageCodes["vot"] = ("Votic", "") +LanguageCodes["wak"] = ("Wakashan languages", "") +LanguageCodes["wal"] = ("Walamo", "") +LanguageCodes["war"] = ("Waray", "") +LanguageCodes["was"] = ("Washo", "") +LanguageCodes["wen"] = ("Sorbian languages", "") +LanguageCodes["wol"] = LanguageCodes["wo"] = ("Wolof", "Negro-african") +LanguageCodes["xho"] = LanguageCodes["xh"] = ("Xhosa", "Negro-african") +LanguageCodes["yao"] = ("Yao", "") +LanguageCodes["yap"] = ("Yapese", "") +LanguageCodes["yid"] = LanguageCodes["yi"] = ("Yiddish", "") +LanguageCodes["yor"] = LanguageCodes["yo"] = ("Yoruba", "Negro-african") +LanguageCodes["ypk"] = ("Yupik languages", "") +LanguageCodes["zap"] = ("Zapotec", "") +LanguageCodes["zen"] = ("Zenaga", "") +LanguageCodes["zha"] = LanguageCodes["za"] = ("Zhuang", "") +LanguageCodes["zho"] = LanguageCodes["chi"] = LanguageCodes["zh"] = ("Chinese", "Asian") +LanguageCodes["znd"] = ("Zande", "") +LanguageCodes["zul"] = LanguageCodes["zu"] = ("Zulu", "Negro-african") +LanguageCodes["zun"] = ("Zuni", "") +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eit.py Sat Nov 04 20:12:11 2017 +0100 @@ -0,0 +1,377 @@ +#!/usr/bin/python +# -*- coding: iso-8859-2 -*- +# encoding: utf-8 +# +# EitSupport +# Copyright (C) 2011 betonme +# Copyright (C) 2016 Wolfgang Fahl +# +# This EITParser is based on: +# https://github.com/betonme/e2openplugin-EnhancedMovieCenter/blob/master/src/EitSupport.py +# +# In case of reuse of this source code please do not remove this copyright. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# For more information on the GNU General Public License see: +# <http://www.gnu.org/licenses/>. +# + +import os +import struct +import time + +from datetime import datetime + +from ISO639 import LanguageCodes + +#from Components.config import config +#from Components.Language import language +#from EMCTasker import emcDebugOut +#from IsoFileSupport import IsoSupport +#from MetaSupport import getInfoFile + +#def crc32(data): +# poly = 0x4c11db7 +# crc = 0xffffffffL +# for byte in data: +# byte = ord(byte) +# for bit in range(7,-1,-1): # MSB to LSB +# z32 = crc>>31 # top bit +# crc = crc << 1 +# if ((byte>>bit)&1) ^ z32: +# crc = crc ^ poly +# crc = crc & 0xffffffffL +# return crc + +decoding_charSpecHR = {u'Ć': u'\u0106', u'æ': u'\u0107', u'®': u'\u017D', u'¾': u'\u017E', u'©': u'\u0160', u'¹': u'\u0161', u'Č': u'\u010C', u'è': u'\u010D', u'ð': u'\u0111'} + +decoding_charSpecCZSK = {u'Ï'+u'C': u'Č',u'Ï'+u'E': u'Ě',u'Ï'+u'L': u'Ľ',u'Ï'+u'N': u'Ň',u'Ï'+u'R': u'Ř',u'Ï'+u'S': u'Š',u'Ï'+u'T': u'Ť',u'Ï'+u'Z': u'Ž',u'Ï'+u'c': u'č',u'Ï'+u'd': u'ď',u'Ï'+u'e': u'ě',u'Ï'+u'l': u'ľ', u'Ï'+u'n': u'ň', +u'Ï'+u'r': u'ř',u'Ï'+u's': u'š',u'Ï'+u't': u'ť',u'Ï'+u'z': u'ž',u'Ï'+u'D': u'Ď',u'Â'+u'A': u'Á',u'Â'+u'E': u'É',u'Â'+u'I': u'Í',u'Â'+u'O': u'Ó',u'Â'+u'U': u'Ú',u'Â'+u'a': u'á',u'Â'+u'e': u'é',u'Â'+u'i': u'í',u'Â'+u'o': u'ó', +u'Â'+u'u': u'ú',u'Â'+u'y': u'ý',u'Ã'+u'o': u'ô',u'Ã'+u'O': u'Ô',u'Ê'+u'u': u'ů',u'Ê'+u'U': u'Ů',u'È'+u'A': u'Ä',u'È'+u'E': u'Ë',u'È'+u'I': u'Ï',u'È'+u'O': u'Ö',u'È'+u'U': u'Ü',u'È'+u'Y': u'Ÿ',u'È'+u'a': u'ä',u'È'+u'e': u'ë', +u'È'+u'i': u'ï',u'È'+u'o': u'ö',u'È'+u'u': u'ü',u'È'+u'y': u'ÿ'} + +def convertCharSpecHR(text): + for i, j in decoding_charSpecHR.iteritems(): + text = text.replace(i, j) + return text + +def convertCharSpecCZSK(text): + for i, j in decoding_charSpecCZSK.iteritems(): + text = text.replace(i, j) + return text + +def parseMJD(MJD): + # Parse 16 bit unsigned int containing Modified Julian Date, + # as per DVB-SI spec + # returning year,month,day + YY = int( (MJD - 15078.2) / 365.25 ) + MM = int( (MJD - 14956.1 - int(YY*365.25) ) / 30.6001 ) + D = MJD - 14956 - int(YY*365.25) - int(MM * 30.6001) + K=0 + if MM == 14 or MM == 15: K=1 + + return (1900 + YY+K), (MM-1-K*12), D + +def unBCD(byte): + return (byte>>4)*10 + (byte & 0xf) + +def language_iso639_2to3(alpha2): + ret = alpha2 + if alpha2 in LanguageCodes: + language = LanguageCodes[alpha2] + for alpha, name in LanguageCodes.items(): + if name == language: + if len(alpha) == 3: + return alpha + return ret + + +class EitList(): + """Eit File support class + Description + http://de.wikipedia.org/wiki/Event_Information_Table + """ + EIT_SHORT_EVENT_DESCRIPTOR = 0x4d + EIT_EXTENDED_EVENT_DESCRIPOR = 0x4e + + def __init__(self, path=None): + self.eit_file = None + + self.eit = {} + self.iso = None + + self.__newPath(path) + self.__readEitFile() + + def __newPath(self, path): + name = None + if path: + if self.eit_file != path: + self.eit_file = path + + def __mk_int(self, s): + return int(s) if s else 0 + + def __toDate(self, d, t): + if d and t: + try: + return datetime(int(d[0]), int(d[1]), int(d[2]), int(t[0]), int(t[1])) + except ValueError: + return None + else: + return None + + def getEitsid(self): + return self.eit.get('service', "") #TODO + + def getEitTsId(self): + return self.eit.get('transportstream', "") #TODO + + def getEitWhen(self): + return self.eit.get('when', "") + + def getEitStartDate(self): + return self.eit.get('startdate', "") + + def getEitStartTime(self): + return self.eit.get('starttime', "") + + def getEitDuration(self): + return self.eit.get('duration', "") + + def getEitName(self): + return self.eit.get('name', "").strip() + + def getEitDescription(self): + return self.eit.get('description', "").strip() + + # Wrapper + def getEitShortDescription(self): + return self.getEitName() + + def getEitExtendedDescription(self): + return self.getEitDescription() + + def getEitLengthInSeconds(self): + length = self.eit.get('duration', "") + if len(length)>2: + return self.__mk_int((length[0]*60 + length[1])*60 + length[2]) + elif len(length)>1: + return self.__mk_int(length[0]*60 + length[1]) + else: + return self.__mk_int(length) + + def getEitDate(self): + return self.__toDate(self.getEitStartDate(), self.getEitStartTime()) + + def dumpEit(self): + print self.eit + + ############################################################################## + ## File IO Functions + def __readEitFile(self): + data = "" + path = self.eit_file + + #lang = language.getLanguage()[:2] + lang = language_iso639_2to3( "de" ) + #print lang + str(path) + + if path and os.path.exists(path): + #print "Reading Event Information Table " + str(path) + + # Read data from file + # OE1.6 with Pyton 2.6 + #with open(self.eit_file, 'r') as file: lines = file.readlines() + f = None + try: + f = open(path, 'rb') + #lines = f.readlines() + data = f.read() + except Exception, e: + emcDebugOut("[META] Exception in readEitFile: " + str(e)) + finally: + if f is not None: + f.close() + + # Parse the data + if data and 12 <= len(data): + # go through events + pos = 0 + e = struct.unpack(">HHBBBBBBH", data[pos:pos+12]) + event_id = e[0] + date = parseMJD(e[1]) # Y, M, D + time = unBCD(e[2]), unBCD(e[3]), unBCD(e[4]) # HH, MM, SS + duration = unBCD(e[5]), unBCD(e[6]), unBCD(e[7]) # HH, MM, SS + running_status = (e[8] & 0xe000) >> 13 + free_CA_mode = e[8] & 0x1000 + descriptors_len = e[8] & 0x0fff + + if running_status in [1,2]: + self.eit['when'] = "NEXT" + elif running_status in [3,4]: + self.eit['when'] = "NOW" + + self.eit['startdate'] = date + self.eit['starttime'] = time + self.eit['duration'] = duration + + pos = pos + 12 + short_event_descriptor = [] + short_event_descriptor_multi = [] + extended_event_descriptor = [] + extended_event_descriptor_multi = [] + component_descriptor = [] + content_descriptor = [] + linkage_descriptor = [] + parental_rating_descriptor = [] + endpos = len(data) - 1 + while pos < endpos: + rec = ord(data[pos]) + length = ord(data[pos+1]) + 2 + if rec == 0x4D: + descriptor_tag = ord(data[pos+1]) + descriptor_length = ord(data[pos+2]) + ISO_639_language_code = str(data[pos+3:pos+5]) + event_name_length = ord(data[pos+5]) + short_event_description = data[pos+6:pos+6+event_name_length] + if ISO_639_language_code == lang: + short_event_descriptor.append(short_event_description) + short_event_descriptor_multi.append(short_event_description) + elif rec == 0x4E: + ISO_639_language_code = str(data[pos+3:pos+5]) + extended_event_description = "" + extended_event_description_multi = "" + for i in range (pos+8,pos+length): + if str(ord(data[i]))=="138": + extended_event_description += '\n' + extended_event_description_multi += '\n' + else: + if data[i]== '\x10' or data[i]== '\x00' or data[i]== '\x02': + pass + else: + extended_event_description += data[i] + extended_event_description_multi += data[i] + if ISO_639_language_code == lang: + extended_event_descriptor.append(extended_event_description) + extended_event_descriptor_multi.append(extended_event_description) + elif rec == 0x50: + component_descriptor.append(data[pos+8:pos+length]) + elif rec == 0x54: + content_descriptor.append(data[pos+8:pos+length]) + elif rec == 0x4A: + linkage_descriptor.append(data[pos+8:pos+length]) + elif rec == 0x55: + parental_rating_descriptor.append(data[pos+2:pos+length]) + else: + print "unsopported descriptor: %x %x" %(rec, pos + 12) + #print data[pos:pos+length] + pass + pos += length + + ### TODO: right identication?! + + # Very bad but there can be both encodings + # User files can be in cp1252 + # Is there no other way? + if short_event_descriptor: + short_event_descriptor = "".join(short_event_descriptor) + else: + short_event_descriptor = "".join(short_event_descriptor_multi) + if short_event_descriptor: + #try: + # short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8") + #except UnicodeDecodeError: + # pass + try: + short_event_descriptor.decode('utf-8') + except UnicodeDecodeError: + try: + short_event_descriptor = short_event_descriptor.decode("cp1252").encode("utf-8") + except UnicodeDecodeError: + # do nothing, otherwise cyrillic wont properly displayed + #short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8") + pass + if (lang == "cs") or (lang == "sk"): + short_event_descriptor = str(convertCharSpecCZSK(short_event_descriptor)) + if (lang == "hr"): + short_event_descriptor = str(convertCharSpecHR(short_event_descriptor)) + self.eit['name'] = short_event_descriptor + + # Very bad but there can be both encodings + # User files can be in cp1252 + # Is there no other way? + if extended_event_descriptor: + extended_event_descriptor = "".join(extended_event_descriptor) + else: + extended_event_descriptor = "".join(extended_event_descriptor_multi) + if extended_event_descriptor: + #try: + # extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8") + #except UnicodeDecodeError: + # pass + try: + extended_event_descriptor.decode('utf-8') + except UnicodeDecodeError: + try: + extended_event_descriptor = extended_event_descriptor.decode("cp1252").encode("utf-8") + except UnicodeDecodeError: + # do nothing, otherwise cyrillic wont properly displayed + #extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8") + pass + if (lang == "cs") or (lang == "sk"): + extended_event_descriptor = str(convertCharSpecCZSK(extended_event_descriptor)) + if (lang == "hr"): + extended_event_descriptor = str(convertCharSpecHR(extended_event_descriptor)) + self.eit['description'] = extended_event_descriptor + + else: + # No date clear all + self.eit = {} + +"""Module docstring. + +Read Eit File and show the information. +""" +import sys +import getopt + +def readeit(eitfile): + eitlist=EitList(eitfile) + print "Name: ", eitlist.getEitName() + print "StartDate: ", eitlist.getEitStartDate() + print "Description: ", eitlist.getEitDescription() + print "Duration: ", eitlist.getEitDuration() + print "Seconds: ", eitlist.getEitLengthInSeconds() + + #eitlist.dumpEit() + +def main(): + # parse command line options + try: + opts, args = getopt.getopt(sys.argv[1:], "h", ["help"]) + except getopt.error, msg: + print msg + print "for help use --help" + sys.exit(2) + # process options + for o, a in opts: + if o in ("-h", "--help"): + print __doc__ + sys.exit(0) + # process arguments + for arg in args: + readeit(arg) # process() is defined elsewhere + +if __name__ == "__main__": + main() +