finished EIT parser

2017-11-04

author
mdd
date
Sat, 04 Nov 2017 22:30:19 +0100 (2017-11-04)
changeset 2
319f8c3fd394
parent 1
88b1fefb7c92
child 3
569fa9a431b9

finished EIT parser

eit.py file | annotate | diff | comparison | revisions
--- a/eit.py	Sat Nov 04 20:56:03 2017 +0100
+++ b/eit.py	Sat Nov 04 22:30:19 2017 +0100
@@ -1,11 +1,12 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # iso-8859-2
-#
-# EitSupport
-# Copyright (C) 2011 betonme
-# Copyright (C) 2016 Wolfgang Fahl
-#
+"""
+EitSupport
+Copyright (C) 2011 betonme
+Copyright (C) 2016 Wolfgang Fahl
+Cleanup 2017 by mdd
+"""
 # This EITParser is based on:
 # https://github.com/betonme/e2openplugin-EnhancedMovieCenter/blob/master/src/EitSupport.py
 #
@@ -25,6 +26,9 @@
 #   <http://www.gnu.org/licenses/>.
 #
 
+# seite 36, inhalt der for schleife!
+# https://www.dvb.org/resources/public/standards/a38_dvb-si_specification.pdf
+
 import os
 import struct
 import sys
@@ -47,12 +51,15 @@
 #           crc = crc & 0xffffffffL
 #   return crc
 
-decoding_charSpecHR = {
+EIT_SHORT_EVENT_DESCRIPTOR = 0x4d
+EIT_EXTENDED_EVENT_DESCRIPOR = 0x4e
+
+CHARSPEC_HR = {
     u'Ć': u'\u0106', u'æ': u'\u0107', u'®': u'\u017D', u'¾': u'\u017E',
     u'©': u'\u0160', u'¹': u'\u0161', u'Č': u'\u010C', u'è': u'\u010D', u'ð': u'\u0111'
 }
 
-decoding_charSpecCZSK = {
+CHARSPEC_CZSK = {
     u'Ï'+u'C': u'Č', u'Ï'+u'E': u'Ě', u'Ï'+u'L': u'Ľ', u'Ï'+u'N': u'Ň', u'Ï'+u'R': u'Ř',
     u'Ï'+u'S': u'Š', u'Ï'+u'T': u'Ť', u'Ï'+u'Z': u'Ž', u'Ï'+u'c': u'č', u'Ï'+u'd': u'ď',
     u'Ï'+u'e': u'ě', u'Ï'+u'l': u'ľ', u'Ï'+u'n': u'ň', u'Ï'+u'r': u'ř', u'Ï'+u's': u'š',
@@ -64,26 +71,23 @@
     u'È'+u'e': u'ë', u'È'+u'i': u'ï', u'È'+u'o': u'ö', u'È'+u'u': u'ü', u'È'+u'y': u'ÿ'
 }
 
-def emcDebugOut(msg):
-    print msg
-
-def convertCharSpecHR(text):
-    for i, j in decoding_charSpecHR.iteritems():
+def convert_charspec_hr(text):
+    for i, j in CHARSPEC_HR.iteritems():
         text = text.replace(i, j)
     return text
 
-def convertCharSpecCZSK(text):
-    for i, j in decoding_charSpecCZSK.iteritems():
+def convert_charspec_czsk(text):
+    for i, j in CHARSPEC_CZSK.iteritems():
         text = text.replace(i, j)
     return text
 
-def parseMJD(mjd):
+def parse_mjd(mjd):
     """Parse 16 bit unsigned int containing Modified Julian Date,
     as per DVB-SI spec
     returning year,month,day"""
     year = int((mjd - 15078.2) / 365.25)
     month = int((mjd - 14956.1 - int(year * 365.25)) / 30.6001)
-    day  = mjd - 14956 - int(year * 365.25) - int(month * 30.6001)
+    day = mjd - 14956 - int(year * 365.25) - int(month * 30.6001)
     correction = 0
     if month == 14 or month == 15:
         correction = 1
@@ -92,6 +96,34 @@
 def bcd2dec(byte):
     return (byte >> 4) * 10 + (byte & 0xf)
 
+
+def mkint(data):
+    """
+    Convert string to Integer
+    """
+    return int(data) if data else 0
+
+def todate(sdate, stime):
+    """
+    Convert date and time to datetime tuple
+    """
+    if sdate and stime:
+        try:
+            return datetime(
+                int(sdate[0]), int(sdate[1]), int(sdate[2]),
+                int(stime[0]), int(stime[1]))
+        except ValueError:
+            return None
+    else:
+        return None
+
+def cleanstring(data):
+    """remove nonprintable chars from short desc
+    """
+    for char in ['\x10', '\x00', '\x02', '\x15']:
+        data = data.replace(char, '')
+    return data
+
 def language_iso639_2to3(alpha2):
     ret = alpha2
     if alpha2 in LanguageCodes:
@@ -102,96 +134,67 @@
                     return alpha
     return ret
 
-class EitList():
+class EitList(object):
     """Eit File support class
     Description
     http://de.wikipedia.org/wiki/Event_Information_Table
     """
-    EIT_SHORT_EVENT_DESCRIPTOR = 0x4d
-    EIT_EXTENDED_EVENT_DESCRIPOR = 0x4e
-
     def __init__(self, path=None):
         self.eit_file = None
 
         self.eit = {}
         self.iso = None
 
-        self.__newPath(path)
-        self.__readEitFile()
+        self.load(path)
 
-    def __newPath(self, path):
-        name = None
+    def load(self, path):
         if path:
-            if self.eit_file != path:
-                self.eit_file = path
-
-    def __mk_int(self, s):
-        return int(s) if s else 0
+            self.eit_file = path
+            self._read_file()
 
-    def __toDate(self, d, t):
-        if d and t:
-            try:
-                return datetime(
-                    int(d[0]), int(d[1]), int(d[2]),
-                    int(t[0]), int(t[1]))
-            except ValueError:
-                return None
-        else:
-            return None
+    def get_genre(self):
+        return self.eit.get('genre', "")
 
-    def getEitsid(self):
-        return self.eit.get('service', "") #TODO
+    def get_components(self):
+        return self.eit.get('components', "")
 
-    def getEitTsId(self):
-        return self.eit.get('transportstream', "") #TODO
-
-    def getEitWhen(self):
-        return self.eit.get('when', "")
-
-    def getEitStartDate(self):
+    def get_startdate(self):
         return self.eit.get('startdate', "")
 
-    def getEitStartTime(self):
+    def get_starttime(self):
         return self.eit.get('starttime', "")
 
-    def getEitDuration(self):
+    def get_duration(self):
         return self.eit.get('duration', "")
 
-    def getEitName(self):
+    def get_name(self):
         return self.eit.get('name', "").strip()
 
-    def getEitDescription(self):
+    def get_description(self):
         return self.eit.get('description', "").strip()
 
-    # Wrapper
-    def getEitShortDescription(self):
-        return self.getEitName()
-
-    def getEitExtendedDescription(self):
-        return self.getEitDescription()
-
-    def getEitLengthInSeconds(self):
+    def get_duration_seconds(self):
         length = self.eit.get('duration', "")
         if len(length) > 2:
-            return self.__mk_int((length[0] * 60 + length[1]) * 60 + length[2])
+            return mkint((length[0] * 60 + length[1]) * 60 + length[2])
         elif len(length) > 1:
-            return self.__mk_int(length[0] * 60 + length[1])
+            return mkint(length[0] * 60 + length[1])
         else:
-            return self.__mk_int(length)
+            return mkint(length)
 
-    def getEitDate(self):
-        return self.__toDate(self.getEitStartDate(), self.getEitStartTime())
+    def get_date(self):
+        return todate(self.get_startdate(), self.get_starttime())
 
     def dumpEit(self):
         print self.eit
 
     ##############################################################################
     ## File IO Functions
-    def __readEitFile(self):
+    def _read_file(self):
         data = ""
         path = self.eit_file
 
-        lang = language_iso639_2to3( "de" )
+        lang = language_iso639_2to3("de")
 
         if path and os.path.exists(path):
             print "Reading Event Information Table " + str(path)
@@ -214,17 +217,17 @@
                 pos = 0
                 e = struct.unpack(">HHBBBBBBH", data[pos:pos + 12])
                 event_id = e[0]
-                date = parseMJD(e[1]) # Y, M, D
+                date = parse_mjd(e[1]) # Y, M, D
                 time = bcd2dec(e[2]), bcd2dec(e[3]), bcd2dec(e[4]) # HH, MM, SS
                 duration = bcd2dec(e[5]), bcd2dec(e[6]), bcd2dec(e[7]) # HH, MM, SS
-                running_status = (e[8] & 0xe000) >> 13
-                free_CA_mode = e[8] & 0x1000
+                #running_status = (e[8] & 0xe000) >> 13
+                #free_CA_mode = e[8] & 0x1000
                 descriptors_len = e[8] & 0x0fff
 
-                if running_status in [1, 2]:
-                    self.eit['when'] = "NEXT"
-                elif running_status in [3, 4]:
-                    self.eit['when'] = "NOW"
+                #if running_status in [1, 2]:
+                #    self.eit['when'] = "NEXT"
+                #elif running_status in [3, 4]:
+                #    self.eit['when'] = "NOW"
 
                 self.eit['startdate'] = date
                 self.eit['starttime'] = time
@@ -248,7 +251,11 @@
                         descriptor_length = ord(data[pos + 2])
                         ISO_639_language_code = str(data[pos + 3:pos + 5])
                         event_name_length = ord(data[pos + 5])
-                        short_event_description = data[pos + 6:pos + 6 + event_name_length]
+                        short_event_description = cleanstring(data[pos + 6:pos + 6 + event_name_length])
+
+                        tmp_length = ord(data[pos + 6 + event_name_length])
+                        self.eit['genre'] = cleanstring(data[pos + 7 + event_name_length:pos + 7 + tmp_length + event_name_length])
+
                         if ISO_639_language_code == lang:
                             short_event_descriptor.append(short_event_description)
                         short_event_descriptor_multi.append(short_event_description)
@@ -256,33 +263,36 @@
                         ISO_639_language_code = str(data[pos + 3:pos + 5])
                         extended_event_description = ""
                         extended_event_description_multi = ""
-                        for i in range (pos+8,pos+length):
-                            if str(ord(data[i]))=="138":
+                        for i in range(pos + 8, pos + length):
+                            if str(ord(data[i])) == "138":
                                 extended_event_description += '\n'
                                 extended_event_description_multi += '\n'
-                            else:
-                                if data[i]=='\x10' or data[i]=='\x00' or data[i]=='\x02':
-                                    pass
-                                else:
-                                    extended_event_description += data[i]
-                                    extended_event_description_multi += data[i]
+                            elif data[i] not in ['\x10', '\x00', '\x02', '\x15']:
+                                extended_event_description += data[i]
+                                extended_event_description_multi += data[i]
                         if ISO_639_language_code == lang:
                             extended_event_descriptor.append(extended_event_description)
                         extended_event_descriptor_multi.append(extended_event_description)
                     elif rec == 0x50:
-                        component_descriptor.append(data[pos + 8:pos + length])
+                        #tmp_type = ord(data[pos + 3:pos + 4])
+                        #print "type: %x" % tmp_type
+                        component_descriptor.append(cleanstring(data[pos + 8:pos + length]))
                     elif rec == 0x54:
-                        content_descriptor.append(data[pos + 8:pos + length])
+                        content_descriptor.append(cleanstring(data[pos + 8:pos + length]))
                     elif rec == 0x4A:
-                        linkage_descriptor.append(data[pos + 8:pos + length])
+                        linkage_descriptor.append(cleanstring(data[pos + 8:pos + length]))
                     elif rec == 0x55:
-                        parental_rating_descriptor.append(data[pos + 2:pos + length])
+                        parental_rating_descriptor.append(cleanstring(data[pos + 2:pos + length]))
                     else:
                         print "unsupported descriptor: %x %x" % (rec, pos + 12)
-                        #print data[pos:pos+length]
-                        pass
+                        print data[pos:pos + length]
+
                     pos += length
 
+                self.eit['components'] = ", ".join(component_descriptor)
+
+
+
                 # Very bad but there can be both encodings
                 # User files can be in cp1252
                 # Is there no other way?
@@ -305,9 +315,9 @@
                             #short_event_descriptor = short_event_descriptor.decode("iso-8859-1").encode("utf-8")
                             pass
                         if (lang == "cs") or (lang == "sk"):
-                            short_event_descriptor = str(convertCharSpecCZSK(short_event_descriptor))
+                            short_event_descriptor = str(convert_charspec_czsk(short_event_descriptor))
                         if lang == "hr":
-                            short_event_descriptor = str(convertCharSpecHR(short_event_descriptor))
+                            short_event_descriptor = str(convert_charspec_hr(short_event_descriptor))
                 self.eit['name'] = short_event_descriptor
 
                 # Very bad but there can be both encodings
@@ -332,9 +342,9 @@
                             #extended_event_descriptor = extended_event_descriptor.decode("iso-8859-1").encode("utf-8")
                             pass
                         if (lang == "cs") or (lang == "sk"):
-                            extended_event_descriptor = str(convertCharSpecCZSK(extended_event_descriptor))
+                            extended_event_descriptor = str(convert_charspec_czsk(extended_event_descriptor))
                         if lang == "hr":
-                            extended_event_descriptor = str(convertCharSpecHR(extended_event_descriptor))
+                            extended_event_descriptor = str(convert_charspec_hr(extended_event_descriptor))
                 self.eit['description'] = extended_event_descriptor
 
             else:
@@ -347,11 +357,13 @@
     Read Eit File and show the information.
     """
     eitlist = EitList(eitfile)
-    print "Name: ", eitlist.getEitName()
-    print "StartDate: ", eitlist.getEitStartDate()
-    print "Description: ", eitlist.getEitDescription()
-    print "Duration: ", eitlist.getEitDuration()
-    print "Seconds: ", eitlist.getEitLengthInSeconds()
+    print "Name: ", eitlist.get_name()
+    print "Genre: ", eitlist.get_genre()
+    print "Components: ", eitlist.get_components()
+    print "StartDate: ", eitlist.get_date()
+    print "Description: ", eitlist.get_description()
+    print "Duration: ", eitlist.get_duration()
+    print "Minutes: ", eitlist.get_duration_seconds() / 60
 
     #eitlist.dumpEit()
 

mercurial