Mercurial > hg-public > dreambox_tools / file revision

#!/usr/bin/env python
"""
DVB-TS to MKV kung-fu
2017 by mdd

Toolkit / executable to automagically convert DVB recordings to h264 mkv.
Automatic audio stream selection (deu/eng)
Automatic crop detection to remove cinematic bars
"""

import subprocess
import pexpect
from eit import readeit, eitinfo
import os, shlex, sys, time

def filter_lines(data, search):
    """
    input: data = \n separated string
    output: tuple containing all lines where search is found
    """
    ret = []
    for line in data.split("\n"):
        if line.find(search) == -1:
            continue
        ret.append(line)
    return "\n".join(ret)

def run_command(command):
    """
    run command as blocking subprocess, returns exit code
    if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output
    """
    process = subprocess.Popen(shlex.split(command), \
        stdout=subprocess.PIPE)
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print output.strip()
    rc = process.poll()
    return rc

def run_ffmpeg_watch(command, frames_total = 0):
    """
    run command as blocking subprocess, returns exit code
    if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output
    """
    thread = pexpect.spawn(command)
    cpl = thread.compile_pattern_list([
        pexpect.EOF,
        "frame= *(\d+)",
        '(.+)'
    ])
    percent = 0
    eta = 0
    time_start = time.time() - 0.1 # start in the past
    while True:
        i = thread.expect_list(cpl, timeout=None)
        if i == 0: # EOF
            print "the sub process exited"
            break
        elif i == 1:
            try:
                frame_number = int(thread.match.group(1))
                if frames_total > 0:
                    percent = frame_number * 100.00 / frames_total
                    eta = frame_number / (time.time() - time_start)
                    # eta is frames per second so far
                    eta = (frames_total - frame_number) / eta / 60
                sys.stdout.write("\rFrame %i of %i, %.1f%% done, ETA %.0f minutes, " % (
                    frame_number, frames_total, percent, eta
                ))
            except:
                sys.stdout.write(thread.match.group(0))
            sys.stdout.flush()
            thread.close
        elif i == 2:
            unknown_line = thread.match.group(0)
            sys.stdout.write(unknown_line)
            sys.stdout.flush()
            pass

def ffmpeg_filename(filename):
    """
    Escape filename path contents for ffmpeg shell command
    """
    #fn = "\\'".join(p for p in filename.split("'"))
    fn = filename.replace("'", "\\'")
    fn = fn.replace(" ", "\\ ")
    return fn

class ts2mkv(object):
    """
    Main worker class, contains all the magic & ffmpeg voodoo
    """
    def __init__(self, crf=19, tune='film', scaleto_720p=True, rename=False):
        self.msg_prepare = ""
        self.msg_eit = ""
        self.msg_ffmpeg = ""
        self.command = None
        self.filename = None
        self.outfilebase = None
        self.fps = 0
        self.frames_total = 0
        self.overwrite = False

        self.scaleto_720p = scaleto_720p
        self.rename = rename

        self.video_options = [
            "-c:v libx264",
            "-preset faster", # slow
            "-tune %s" % tune, # film / animation
            "-crf %i" % crf, # 21, better 19
            ]
        self.audio_options = [
            "-c:a copy",
            ]


    def get_stream_index(self, data):
        """
        input: ffmpeg stream info string
        output: ffmpeg stream mapping part
        """
        idx = data.find("Stream #")
        if idx == -1:
            return ""
        idx += 8
        self.msg_prepare += "Selecting: %s\n" % data
        return data[idx:idx+3]

    def get_movie_description(self):
        """
        looks for eit file with same basename of current filename
        parse the eit file for txt infofile and optional build new
        output filename base with movie name and genre

        output: nothing, manipulates internal variables
        """
        if not self.filename:
            return
        # read the EIT file
        filename = os.path.splitext(self.filename)[0] + ".eit"
        self.msg_eit = readeit(filename)
        if not self.rename or not self.msg_eit:
            return
        info = eitinfo(filename)
        name = info.eit.get("name")
        if name == "":
            # cancel rename, no movie title found!
            return
        genre = info.eit.get("genre")
        if genre != "":
            name = "%s (%s)" % (name, genre)
        # build new filename
        name = name.replace(' : ', ' - ')
        name = name.replace(': ', ' - ')
        name = name.replace(':', '-')
        name = name.replace('/', '')
        name = name.replace('\\', '')
        name = name.replace('?', '')
        name = name.replace('*', '')
        name = name.replace('\"', '\'')

        self.outfilebase = os.path.join(
            os.path.dirname(filename),
            name
            )


    def get_crop_option(self):
        """
        parse the ffmpeg analyze output cropdetect lines
        returns None or valid crop string for ffmpeg video filter
        """
        lines = filter_lines(self.msg_ffmpeg, "[Parsed_cropdetect").split("\n")
        option = None
        failcount = 0
        for line in lines:
            tmp = line[line.find(" crop="):].strip()
            #print "DEBUG: " + tmp
            if not option:
                option = tmp
            else:
                if option != tmp:
                    failcount += 1
                    if failcount > 12:
                        print "!!! Crop detect is inconsistent"
                        self.msg_prepare += "WARNING: cropdetect >50% inconsistent over scan time, disabling autocrop\n"
                        return None
        self.msg_prepare += "Crop detected: %s\n" % option
        return option

    def __get_audiomap(self, info):
        """
        Select the wanted german and english audio streams from ffmpeg info
        output: mapping list
        """
        audiomap = []
        audioall = filter_lines(info, "Audio:")
        audio = filter_lines(audioall, "(deu):")
        aidx = self.get_stream_index(
            filter_lines(audio, "ac3"))
        if aidx == "":
            print audioall
            print "No AC3 german audio stream found"
            # try to find the first german audio stream
            aidx = self.get_stream_index(audio.split("\n")[0])
            if aidx == "":
                print "No other german audio streams, trying english..."
            else:
                print "Selecting first german stream."
                audiomap.append(aidx)
        else:
            audiomap.append(aidx)

        audio = filter_lines(audioall, "(eng):")
        aidx = self.get_stream_index(
            filter_lines(audio, "ac3"))
        if aidx != "" and filter_lines(audio, "ac3").find(" 0 channels ") < 1:
            # append english audio too!
            print "Selecting english ac3 stream."
            audiomap.append(aidx)
        return audiomap

    def get_ffmpeg_command(self):
        """
        Too complex to describe, this does all the magic
        output: produces internal ffmpeg command list (empty command list on error)
        """
        if not self.filename:
            return None


        commands = []
        fn = ffmpeg_filename(self.filename)
        outfn = self.outfilebase + ".mkv"
        # double-check: pull the kill switch and exit if outfile exists already!
        # we do not want to overwrite files in accident (caused by automatic file naming)
        if not self.overwrite and len(glob.glob(outfn)) > 0:
            print "Output file exists: %s" % outfn
            print "NOT overwriting it!"
            return None
        outfn = ffmpeg_filename(outfn)

        cmd = [
            "ffmpeg", "-hide_banner",
            "-ss 00:05:00", "-t 2", # search to 5 minutes, analyze 2 seconds
            "-i %s" % fn,
            "-vf \"cropdetect=24:2:0\"", # detect black bar crop on top and bottom
            "-f null", "-" # no output file
            ]
        p = subprocess.Popen(shlex.split(" ".join(cmd)), \
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = p.communicate()
        self.msg_ffmpeg = out + "\n" + err
        self.msg_ffmpeg = self.msg_ffmpeg[self.msg_ffmpeg.find("Input #0"):]

        # find "Stream #0:" lines
        info = filter_lines(self.msg_ffmpeg, "Stream #0:")

        v = self.get_stream_index(
            filter_lines(info, "Video:"))
        if v == "":
            print "No video stream found"
            return None

        # get total duration and fps from input stream
        # Input #0, mpegts, from '/srv/storage0/DREAMBOX/Action/Transporter/20101201 0630 - Sky Action HD - Transporter 3.ts':
        #  Duration: 01:39:59.88, start: 93674.825111, bitrate: 9365 kb/s
        #  Stream #0:1[0x1ff]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(tv, bt709), 1920x1080 [SAR 1:1 DAR 16:9], 25 fps, 50 tbr, 90k tbn, 50 tbc
        self.frames_total = filter_lines(self.msg_ffmpeg, "Duration:").strip()[10:]
        self.frames_total = self.frames_total[0:self.frames_total.find(",")].strip()
        print "Input duration: %s" % self.frames_total
        try:
            self.frames_total = int(self.frames_total[0:2]) * 3600 + \
                int(self.frames_total[3:5]) * 60 + int(self.frames_total[6:8])
        except ValueError:
            self.frames_total = 0

        tmp = filter_lines(info, "Video:").split(",")
        for fps in tmp:
            if fps.strip().endswith('fps'):
                try:
                    self.fps = float(fps.strip().split(' ')[0])
                except ValueError:
                    self.fps = 0
                break
        self.frames_total = round(self.frames_total * self.fps, 0)
        print "Input framerate: %f fps" % self.fps
        print "Total frames of input file: %i" % (self.frames_total)

        # copy ALL subtitle streams if present!
        # Stream #0:0[0x20](deu): Subtitle: dvb_teletext ([6][0][0][0] / 0x0006), 492x250
        submap = []
        for tmp in filter_lines(info, "Subtitle: dvb_teletext").split("\n"):
            if self.get_stream_index(tmp):
                submap.append(self.get_stream_index(tmp))
        # Subtitles disabled, that doesnt work as expected, dreambox crashes on copied subtitle stream
        submap = []

        # select audio streams
        audiomap = self.__get_audiomap(info)
        if len(audiomap) == 0:
            print "No suitable audio stream found, aborting."
            return None

        # Old dreambox images did a file split: .ts .ts.001 .ts.002 etc.
        # Find all these files and join them!
        inputs = [fn]
        if os.path.splitext(fn)[1].lower() == '.ts':
            for fpart in glob.glob(self.filename + '.' + ('[0-9]' * 3)):
                fn = "\\'".join(p for p in fpart.split("'"))
                fn = fn.replace(" ", "\\ ")
                inputs.append(fn)

        if len(inputs) > 1:
            # use ffmpeg input concat function
            # attention, ffmpeg doesnt like escape sequences
            fn = "\"concat:" + \
                "|".join(inputs)\
                .replace('\ ', ' ')\
                .replace("\'", "'")\
                + "\""
            # no ETA calculation possible since we have only the length of first file
            # TODO: we COULD estimate by multiplying with factor generated by input file sizes
            print "NO ETA POSSIBLE"
            self.frames_total = 0

        idx = 0
        for tmp in inputs:
            self.msg_prepare += "Input file #%i: %s\n" % (
                idx, os.path.basename(tmp))
            idx += 1

        cmd = [
            "ffmpeg", "-hide_banner",
            "-i %s" % fn,
            ]

        if self.overwrite:
            cmd.append("-y")

        for tmp in submap:
            self.msg_prepare += "Subtitle Stream selected: Stream #%s\n" % tmp
            cmd.append("-map %s" % tmp)

        cmd.append("-map %s" % v)
        self.msg_prepare += "Video Stream selected: Stream #%s\n" % v

        flt = []
        crop = self.get_crop_option()
        if crop:
            flt.append(crop)
        if self.scaleto_720p:
            # -2 ensures division by two for codec
            flt.append("scale='min(1280,iw)':-2'")
            self.msg_prepare += "Scaling output stream to 720p if width >1280\n"
        if len(flt) > 0:
            # append video filters
            cmd.append('-filter:v "%s"' % ",".join(flt))
        for tmp in audiomap:
            self.msg_prepare += "Audio Stream selected: Stream #%s\n" % tmp
            cmd.append("-map %s" % tmp)
        if len(submap) > 0:
            cmd.append("-c:s dvdsub")
        cmd.extend(self.video_options)
        cmd.extend(self.audio_options)
        cmd.append(outfn)

        commands.append(" ".join(cmd))
        return commands

    def load(self, filename):
        """
        First step: setup, analyze & prepare for conversion
        """
        self.msg_prepare = ""
        self.msg_eit = ""
        self.msg_ffmpeg = ""
        self.fps = 0
        self.frames_total = 0

        self.filename = filename
        self.outfilebase = os.path.splitext(filename)[0]
        self.get_movie_description()
        self.command = self.get_ffmpeg_command()

    def convert(self):
        """
        Second step: write info text file and start ffmpeg conversion
        requires successful load as first step
        returns ffmpeg conversion exit status
        """
        if not self.command:
            return None
        fd = open(self.outfilebase + ".txt", "wb")
        fd.write(self.msg_eit)
        fd.write("\n\n# ---DEBUG---\n\n")
        fd.write(self.msg_prepare)
        fd.write(self.msg_ffmpeg)
        fd.close()
        #print self.msg_ffmpeg

        for cmd in self.command:
            print "Executing ffmpeg:\n%s\n" % cmd
            #return run_command(cmd, self.total_frames)
            return run_ffmpeg_watch(cmd, frames_total=self.frames_total)


if __name__ == "__main__":
    # parse command line options
    import argparse, glob

    parser = argparse.ArgumentParser(description='DVB-TS to MKV kung-fu')
    parser.add_argument('--crf', type=int, default=19, \
        help='h264 crf (default 19)')
    parser.add_argument('--tune', default='film', \
        help='ffmpeg tune preset [film, animation] (default is film)')
    parser.add_argument('--ns', action='store_true', default=False, \
        help='no rescaling (default is scale to 720p)')
    parser.add_argument('--rename', action='store_true', default=False, \
        help='rename file basename to name and genre from EIT file if present')
    parser.add_argument('input', metavar='input', nargs='+', \
        help='one or more files, glob style syntax')
    parser.add_argument('-f', action='store_true', default=False, \
        help='force overwrite of existing file')

    args = parser.parse_args()
    processor = ts2mkv(crf=args.crf, tune=args.tune, scaleto_720p=(not args.ns), \
        rename=args.rename)
    processor.overwrite = args.f

    for srcstr in args.input:
        src = glob.glob(srcstr)
        for srcfile in src:
            print "Processing: %s" % srcfile
            processor.load(srcfile)
            processor.convert()