ts2mkv.py

#!/usr/bin/env python
"""
DVB-TS to MKV kung-fu
2017 by mdd

Toolkit / executable to automagically convert DVB recordings to h264 mkv.
Automatic audio stream selection (deu/eng)
Automatic crop detection to remove cinematic bars
"""

import subprocess
from eit import readeit, eitinfo
import os, shlex

def filter_lines(data, search):
    """
    input: data = \n separated string
    output: tuple containing all lines where search is found
    """
    ret = []
    for line in data.split("\n"):
        if line.find(search) == -1:
            continue
        ret.append(line)
    return "\n".join(ret)

def run_command(command):
    """
    run command as blocking subprocess, returns exit code
    """
    process = subprocess.Popen(shlex.split(command), \
        stdout=subprocess.PIPE)
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print output.strip()
    rc = process.poll()
    return rc

def ffmpeg_filename(filename):
    """
    Escape filename path contents for ffmpeg shell command
    """
    fn = "\\'".join(p for p in filename.split("'"))
    fn = fn.replace(" ", "\\ ")
    return fn

class ts2mkv(object):
    """
    Main worker class, contains all the magic & ffmpeg voodoo
    """
    def __init__(self, crf=19, tune='film', scaleto_720p=True, rename=False):
        self.msg_prepare = ""
        self.msg_eit = ""
        self.msg_ffmpeg = ""
        self.command = None
        self.filename = None
        self.outfilebase = None

        self.scaleto_720p = scaleto_720p
        self.rename = rename

        self.video_options = [
            "-c:v libx264",
            "-preset faster", # slow
            "-tune %s" % tune, # film / animation
            "-crf %i" % crf, # 21, better 19
            ]
        self.audio_options = [
            "-c:a copy",
            ]


    def get_stream_index(self, data):
        """
        input: ffmpeg stream info string
        output: ffmpeg stream mapping part
        """
        idx = data.find("Stream #")
        if idx == -1:
            return ""
        idx += 8
        self.msg_prepare += "Selecting: %s\n" % data
        return data[idx:idx+3]

    def get_movie_description(self):
        """
        looks for eit file with same basename of current filename
        parse the eit file for txt infofile and optional build new
        output filename base with movie name and genre

        output: nothing, manipulates internal variables
        """
        if not self.filename:
            return
        # read the EIT file
        filename = os.path.splitext(self.filename)[0] + ".eit"
        self.msg_eit = readeit(filename)
        if not self.rename or not self.msg_eit:
            return
        info = eitinfo(filename)
        name = info.eit.get("name")
        if name == "":
            # cancel rename, no movie title found!
            return
        genre = info.eit.get("genre")
        if genre != "":
            name = "%s (%s)" % (name, genre)
        # build new filename
        name = name.replace(' : ', ' - ')
        name = name.replace(': ', ' - ')
        name = name.replace(':', '-')
        name = name.replace('/', '')
        name = name.replace('\\', '')
        name = name.replace('?', '')
        name = name.replace('*', '')
        name = name.replace('\"', '\'')

        self.outfilebase = os.path.join(
            os.path.dirname(filename),
            name
            )


    def get_crop_option(self):
        """
        parse the ffmpeg analyze output cropdetect lines
        returns None or valid crop string for ffmpeg video filter
        """
        lines = filter_lines(self.msg_ffmpeg, "[Parsed_cropdetect").split("\n")
        option = None
        for line in lines:
            tmp = line[line.find(" crop="):].strip()
            #print "DEBUG: " + tmp
            if not option:
                option = tmp
            else:
                if option != tmp:
                    self.msg_prepare += "WARNING: cropdetect inconsistent over scan time, disabling autocrop\n"
                    return None
        self.msg_prepare += "Crop detected: %s\n" % option
        return option

    def __get_audiomap(self, info):
        """
        Select the wanted german and english audio streams from ffmpeg info
        output: mapping list
        """
        audiomap = []
        audioall = filter_lines(info, "Audio:")
        audio = filter_lines(audioall, "(deu):")
        aidx = self.get_stream_index(
            filter_lines(audio, "ac3"))
        if aidx == "":
            print audioall
            print "No AC3 german audio stream found"
            # try to find the first german audio stream
            aidx = self.get_stream_index(audio.split("\n")[0])
            if aidx == "":
                print "No other german audio streams, trying english..."
            else:
                print "Selecting first german stream."
                audiomap.append(aidx)
        else:
            audiomap.append(aidx)

        audio = filter_lines(audioall, "(eng):")
        aidx = self.get_stream_index(
            filter_lines(audio, "ac3"))
        if aidx != "":
            # append english audio too!
            print "Selecting english ac3 stream."
            audiomap.append(aidx)
        return audiomap

    def get_ffmpeg_command(self):
        """
        Too complex to describe, this does all the magic
        output: produces internal ffmpeg command list (empty command list on error)
        """
        if not self.filename:
            return None


        commands = []
        fn = ffmpeg_filename(self.filename)
        outfn = self.outfilebase + ".mkv"
        # double-check: pull the kill switch and exit if outfile exists already!
        # we do not want to overwrite files in accident (caused by automatic file naming)
        if len(glob.glob(outfn)) > 0:
            print "Output file exists: %s" % outfn
            print "NOT overwriting it!"
            return None
        outfn = ffmpeg_filename(outfn)

        cmd = [
            "ffmpeg", "-hide_banner",
            "-ss 00:05:00", "-t 1", # search to 5 minutes, analyze 1 second
            "-i %s" % fn,
            "-vf \"cropdetect=24:2:0\"", # detect black bar crop on top and bottom
            "-f null", "-" # no output file
            ]
        p = subprocess.Popen(shlex.split(" ".join(cmd)), \
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = p.communicate()
        self.msg_ffmpeg = out + "\n" + err
        self.msg_ffmpeg = self.msg_ffmpeg[self.msg_ffmpeg.find("Input #0"):]

        # find "Stream #0:" lines
        info = filter_lines(self.msg_ffmpeg, "Stream #0:")

        v = self.get_stream_index(
            filter_lines(info, "Video:"))
        if v == "":
            print "No video stream found"
            return None


        # TODO: copy ALL subtitle streams if present!
        # Stream #0:0[0x20](deu): Subtitle: dvb_teletext ([6][0][0][0] / 0x0006), 492x250
        submap = []
        for tmp in filter_lines(info, "Subtitle: dvb_teletext").split("\n"):
            if self.get_stream_index(tmp):
                submap.append(self.get_stream_index(tmp))

        # select audio streams
        audiomap = self.__get_audiomap(info)
        if len(audiomap) == 0:
            print "No suitable audio stream found, aborting."
            return None

        # Old dreambox images did a file split: .ts .ts.001 .ts.002 etc.
        # Find all these files and join them!
        inputs = [fn]
        if os.path.splitext(fn)[1].lower() == '.ts':
            for fpart in glob.glob(self.filename + '.' + ('[0-9]' * 3)):
                fn = "\\'".join(p for p in fpart.split("'"))
                fn = fn.replace(" ", "\\ ")
                inputs.append(fn)

        if len(inputs) > 1:
            # use ffmpeg input concat function
            # attention, ffmpeg doesnt like escape sequences
            fn = "\"concat:" + \
                "|".join(inputs)\
                .replace('\ ', ' ')\
                .replace("\'", "'")\
                + "\""

        idx = 0
        for tmp in inputs:
            self.msg_prepare += "Input file #%i: %s\n" % (
                idx, os.path.basename(tmp))
            idx += 1

        cmd = [
            "ffmpeg", "-hide_banner",
            "-i %s" % fn,
            ]

        for tmp in submap:
            self.msg_prepare += "Subtitle Stream selected: Stream #%s\n" % tmp
            cmd.append("-map %s" % tmp)

        cmd.append("-map %s" % v)
        self.msg_prepare += "Video Stream selected: Stream #%s\n" % v

        flt = []
        crop = self.get_crop_option()
        if crop:
            flt.append(crop)
        if self.scaleto_720p:
            # -2 ensures division by two for codec
            flt.append("scale='min(1280,iw)':-2'")
            self.msg_prepare += "Scaling output stream to 720p if width >1280\n"
        if len(flt) > 0:
            # append video filters
            cmd.append('-filter:v "%s"' % ",".join(flt))
        for tmp in audiomap:
            self.msg_prepare += "Audio Stream selected: Stream #%s\n" % tmp
            cmd.append("-map %s" % tmp)
        if len(submap) > 0:
            cmd.append("-c:s dvdsub")
        cmd.extend(self.video_options)
        cmd.extend(self.audio_options)
        cmd.append(outfn)

        commands.append(" ".join(cmd))
        return commands

    def load(self, filename):
        """
        First step: setup, analyze & prepare for conversion
        """
        self.msg_prepare = ""
        self.msg_eit = ""
        self.msg_ffmpeg = ""

        self.filename = filename
        self.outfilebase = os.path.splitext(filename)[0]
        self.get_movie_description()
        self.command = self.get_ffmpeg_command()

    def convert(self):
        """
        Second step: write info text file and start ffmpeg conversion
        requires successful load as first step
        returns ffmpeg conversion exit status
        """
        if not self.command:
            return None
        fd = open(self.outfilebase + ".txt", "wb")
        fd.write(self.msg_eit)
        fd.write("\n\n# ---DEBUG---\n\n")
        fd.write(self.msg_prepare)
        fd.write(self.msg_ffmpeg)
        fd.close()
        #print self.msg_ffmpeg

        for cmd in self.command:
            print "Executing ffmpeg:\n%s\n" % cmd
            return run_command(cmd)


if __name__ == "__main__":
    # parse command line options
    import argparse, glob

    parser = argparse.ArgumentParser(description='DVB-TS to MKV kung-fu')
    parser.add_argument('--crf', type=int, default=19, \
        help='h264 crf (default 19)')
    parser.add_argument('--tune', default='film', \
        help='ffmpeg tune preset [film, animation] (default is film)')
    parser.add_argument('--ns', action='store_true', default=False, \
        help='no rescaling (default is scale to 720p)')
    parser.add_argument('--rename', action='store_true', default=False, \
        help='rename file basename to name and genre from EIT file if present')
    parser.add_argument('input', metavar='input', nargs='+', \
        help='one or more files, glob style syntax')

    args = parser.parse_args()
    processor = ts2mkv(crf=args.crf, tune=args.tune, scaleto_720p=(not args.ns), \
        rename=args.rename)

    for srcstr in args.input:
        src = glob.glob(srcstr)
        for srcfile in src:
            print "Processing: %s" % srcfile
            processor.load(srcfile)
            processor.convert()