Mercurial > hg-public > dreambox_tools / file revision

#!/usr/bin/env python
"""
DVB-TS to MKV kung-fu
2017 by mdd

Toolkit / executable to automagically convert DVB recordings to h264 mkv.
Automatic audio stream selection
    deu: ac3, otherwise fallback to first german stream
    eng: ac3, no fallback
Automatic crop detection to remove cinematic bars
percentage + ETA for ffmpeg conversion subprocess
"""
#pylint: disable=line-too-long
#pylint: disable=invalid-name


import subprocess
import pexpect
from eit import eitinfo
import os, shlex, sys, time

def filter_lines(data, search):
    """
    input: data = \n separated string
    output: all lines where search is found
    """
    ret = []
    for line in data.split("\n"):
        if line.find(search) == -1:
            continue
        ret.append(line)
    return "\n".join(ret)

def run_command(command):
    """
    run command as blocking subprocess, returns exit code
    if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output
    """
    process = subprocess.Popen(shlex.split(command), \
        stdout=subprocess.PIPE)
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print output.strip()
    rc = process.poll()
    return rc

def run_ffmpeg_watch(command, frames_total=0):
    """
    run command as blocking subprocess, returns exit code
    if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output
    """
    #pylint: disable=maybe-no-member

    thread = pexpect.spawn(command)
    cpl = thread.compile_pattern_list([
        pexpect.EOF,
        "frame= *(\d+)",
        "(.+)\n",
        '(.+)'
    ])
    percent = 0
    eta = 0
    time_start = time.time() - 0.1 # start in the past
    while True:
        i = thread.expect_list(cpl, timeout=None)
        if i == 0: # EOF
            print "\nffmpeg subprocess finished!"
            break
        elif i == 1:
            try:
                frame_number = int(thread.match.group(1))
                if frames_total > 0:
                    percent = frame_number * 100.00 / frames_total
                    eta = frame_number / (time.time() - time_start)
                    # eta is frames per second so far
                    if eta == 0:
                        eta = 1
                    eta = (frames_total - frame_number) / eta / 60
                sys.stdout.write("\rFrame %i of %i, %.1f%% done, ETA %.0f minutes, " % (
                    frame_number, frames_total, percent, eta
                ))
            except ValueError:
                sys.stdout.write(thread.match.group(0))
            sys.stdout.flush()
            thread.close
        #elif i == 2:
            # normal newline line, just ignore them...
        #    pass
        elif i == 3:
            unknown_line = thread.match.group(0)
            sys.stdout.write(unknown_line)
            sys.stdout.flush()
    thread.close()
    return thread.exitstatus

def ffmpeg_filename(filename):
    """
    Escape filename path contents for ffmpeg shell command
    """
    fn = filename.replace("'", r"\'")
    fn = fn.replace(" ", r"\ ")
    return fn

class ts2mkv(object):
    """
    Main worker class, contains all the magic & ffmpeg voodoo
    """
    def __init__(self, crf=19, tune='film'):
        self.command = None
        self.filename = None
        self.outfilebase = None
        self.info = {}
        self.__reset()

        self.config = {
            "overwrite": False,
            "scaledown": True,
            "rename": True,
            "video": [
                "-c:v libx264",
                "-preset faster", # slow
                "-tune %s" % tune, # film / animation
                "-crf %i" % crf, # 21, better 19
                ],
            "audio": [
                "-c:a copy",
                ]
            }

    def __reset(self):
        """
        Reset internal stuff before loading new task
        """
        self.info = {
            "msg_prepare": "",
            "msg_eit": "",
            "msg_ffmpeg": "",
            "fps": 0,
            "frames_total": 0
        }
        self.command = None
        self.filename = None
        self.outfilebase = None

    def get_stream_index(self, data):
        """
        input: ffmpeg stream info string
        output: ffmpeg stream mapping part
        """
        idx = data.find("Stream #")
        if idx == -1:
            return ""
        idx += 8
        self.info["msg_prepare"] += "GetStreamIndex: %s\n" % data.strip()
        return data[idx:idx+3]

    def __get_movie_description(self):
        """
        looks for eit file with same basename of current filename
        parse the eit file for txt infofile and optional build new
        output filename base with movie name and genre

        output: nothing, manipulates internal variables
        """
        if not self.filename:
            return
        # read the EIT file
        filename = os.path.splitext(self.filename)[0] + ".eit"
        info = eitinfo(filename)
        self.info["msg_eit"] = info.dump()
        if not self.config["rename"] or not self.info["msg_eit"]:
            return
        name = info.eit.get("name")
        if name == "":
            # cancel rename, no movie title found!
            return
        genre = info.eit.get("genre")
        if genre != "":
            name = "%s (%s)" % (name, genre)
        # build new filename
        name = name.replace(' : ', ' - ')
        name = name.replace(': ', ' - ')
        name = name.replace(':', '-')
        name = name.replace('/', '')
        name = name.replace('\\', '')
        name = name.replace('?', '')
        name = name.replace('*', '')
        name = name.replace('\"', '\'')

        self.outfilebase = os.path.join(
            os.path.dirname(filename),
            name
            )


    def get_crop_option(self):
        """
        parse the ffmpeg analyze output cropdetect lines
        returns None or valid crop string for ffmpeg video filter
        """
        lines = filter_lines(self.info["msg_ffmpeg"], "[Parsed_cropdetect").split("\n")
        option = None
        failcount = 0
        for line in lines:
            tmp = line[line.find(" crop="):].strip()
            # crop=1920:804:0:138
            if len(tmp.split(":")) != 4:
                print "Warning, invalid cropdetect: %s" % tmp
                return None
            if tmp.split(":")[2] != "0":
                print "!!! X crop detected, disabling autocrop (%s)" % tmp
                self.info["msg_prepare"] += "WARNING: cropdetect suggested X crop, disabling autocrop\n"
                return None
            #print "DEBUG: " + tmp
            if not option:
                option = tmp
            else:
                if option != tmp:
                    failcount += 1
                    if failcount > 6:
                        print "!!! Crop detect is inconsistent"
                        self.info["msg_prepare"] += "WARNING: cropdetect inconsistent, disabling autocrop\n"
                        return None
        self.info["msg_prepare"] += "Crop detected: %s\n" % option
        return option

    def __get_audiomap(self, info):
        """
        Select the wanted german and english audio streams from ffmpeg info
        output: mapping list
        """
        audiomap = []
        audioall = filter_lines(info, "Audio:")
        audio = filter_lines(audioall, "(deu):")
        aidx = self.get_stream_index(
            filter_lines(audio, "ac3"))
        if aidx == "":
            print audioall
            print "No AC3 german audio stream found"
            # try to find the first german audio stream
            aidx = self.get_stream_index(audio.split("\n")[0])
            if aidx == "":
                print "No other german audio streams, trying english..."
            else:
                print "Selecting first german stream."
                audiomap.append(aidx)
        else:
            audiomap.append(aidx)

        audio = filter_lines(audioall, "(eng):")
        aidx = self.get_stream_index(
            filter_lines(audio, "ac3"))
        if aidx != "":
            try:
                filter_lines(audio, "ac3").index(" 0 channels")
                print "Skipping english stream with 0 channels"
            except ValueError:
                # append english audio too!
                print "Selecting english ac3 stream."
                audiomap.append(aidx)
        if len(audiomap) == 0 and self.config["firstaudio"]:
            # append first audio stream as forced fallback
            aidx = self.get_stream_index(audioall)
            if aidx != "":
                print "Forcing first found audio stream: %s" % aidx
                audiomap.append(aidx)
        return audiomap

    def __parse_info(self):
        """
        get total duration and fps from input stream
        output: sets local variables
            #  Duration: 01:39:59.88, start: 93674.825111, bitrate: 9365 kb/s
            #  Stream #0:1[0x1ff]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(tv, bt709), 1920x1080 [SAR 1:1 DAR 16:9], 25 fps, 50 tbr, 90k tbn, 50 tbc
        """
        tmp = filter_lines(self.info["msg_ffmpeg"], "Duration:").strip()[10:]
        tmp = tmp[0:tmp.find(",")].strip()
        print "Input duration: %s" % tmp
        try:
            self.info["frames_total"] = int(tmp[0:2]) * 3600 + \
                int(tmp[3:5]) * 60 + int(tmp[6:8])
        except ValueError:
            self.info["frames_total"] = 0

        tmp = filter_lines(self.info["msg_ffmpeg"], "Stream #0:")
        tmp = filter_lines(tmp, "Video:").split(",")
        for fps in tmp:
            if fps.strip().endswith('fps'):
                try:
                    self.info["fps"] = float(fps.strip().split(' ')[0])
                except ValueError:
                    self.info["fps"] = 0
                break
        self.info["frames_total"] = round(self.info["frames_total"] * self.info["fps"], 0)
        print "Input framerate: %f fps" % self.info["fps"]
        print "Total frames of input file: %i" % (self.info["frames_total"])


    def __get_ffmpeg_input_info(self, filename, crop_minute = 5):
        """
        Run ffmpeg for cropdetect and general input information
        """
        cmd = [
            "ffmpeg", "-hide_banner",
            "-ss 00:%02i:00" % crop_minute, "-t 1", # search to 5 minutes, analyze 1 seconds
            "-i %s" % filename,
            "-vf \"cropdetect=24:2:0\"", # detect black bar crop on top and bottom
            "-f null", "-" # no output file
            ]
        p = subprocess.Popen(shlex.split(" ".join(cmd)), \
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = p.communicate()
        self.info["msg_ffmpeg"] = out + "\n" + err
        self.info["msg_ffmpeg"] = self.info["msg_ffmpeg"][self.info["msg_ffmpeg"].find("Input #0"):]

    def get_ffmpeg_command(self):
        """
        Too complex to describe, this does all the magic
        output: produces internal ffmpeg command list (empty command list on error)
        """
        if not self.filename:
            return None

        fn = {
            "in": ffmpeg_filename(self.filename),
            "out": self.outfilebase + ".mkv"
        }

        # double-check: pull the kill switch and exit if outfile exists already!
        # we do not want to overwrite files in accident (caused by automatic file naming)
        if not self.config["overwrite"] and len(glob.glob(fn["out"])) > 0:
            print "Output file exists: %s" % fn["out"]
            print "NOT overwriting it!"
            return None

        # load input file to get informations about
        self.__get_ffmpeg_input_info(fn["in"])

        # find "Stream #0:" lines
        info = filter_lines(self.info["msg_ffmpeg"], "Stream #0:")

        v = self.get_stream_index(
            filter_lines(info, "Video:"))
        if v == "":
            print "No video stream found"
            return None

        self.__parse_info()

        # copy ALL subtitle streams if present!
        # Stream #0:0[0x20](deu): Subtitle: dvb_teletext ([6][0][0][0] / 0x0006), 492x250
        submap = []
        for tmp in filter_lines(info, "Subtitle: dvb_teletext").split("\n"):
            if self.get_stream_index(tmp):
                submap.append(self.get_stream_index(tmp))
        # Subtitles disabled, that doesnt work as expected, dreambox crashes on copied subtitle stream
        submap = []

        # select audio streams
        audiomap = self.__get_audiomap(info)
        if len(audiomap) == 0:
            print "No suitable audio stream found, aborting."
            return None

        # Old dreambox images did a file split: .ts .ts.001 .ts.002 etc.
        # Find all these files and join them!
        inputs = [fn["in"]]
        if os.path.splitext(self.filename)[1].lower() == '.ts':
            for tmp in glob.glob(self.filename + '.' + ('[0-9]' * 3)):
                inputs.append(ffmpeg_filename(tmp))

        if len(inputs) > 1:
            # use ffmpeg input concat function
            # attention, ffmpeg concat protocol doesnt like escape sequences
            for tmp in range(len(inputs)):
                inputs[tmp] = inputs[tmp].replace(r"\ ", " ").replace(r"\'", "'")\

            fn["in"] = "\"concat:" + "|".join(inputs) + "\""
            # no ETA calculation possible since we have only the length of first file
            # we could estimate by multiplying with factor generated by input file sizes
            totalbytes = 0.0
            for tmp in inputs:
                totalbytes += os.path.getsize(tmp)
            print "estimating total frames for ETA based on file sizes (we have multiple inputs here)"
            self.info["frames_total"] *= totalbytes / os.path.getsize(inputs[0])

        idx = 0
        for tmp in inputs:
            self.info["msg_prepare"] += "Input file #%i: %s\n" % (
                idx, os.path.basename(tmp))
            idx += 1

        cmd = [
            "ffmpeg", "-hide_banner",
            "-i %s" % fn["in"],
            ]

        if self.config["overwrite"]:
            cmd.append("-y")

        for tmp in submap:
            self.info["msg_prepare"] += "Subtitle Stream selected: Stream #%s\n" % tmp
            cmd.append("-map %s" % tmp)

        cmd.append("-map %s" % v)
        self.info["msg_prepare"] += "Video Stream selected: Stream #%s\n" % v

        flt = []
        crop = self.get_crop_option()
        if not crop:
            # load input file to get informations about
            # scan to other position and try again
            print "Scanning again for autocrop..."
            self.info["msg_prepare"] += "Rescan autocrop on other position in input stream...\n"
            self.__get_ffmpeg_input_info(fn["in"], 9)
            crop = self.get_crop_option()

        if crop:
            flt.append(crop)
        if self.config["scaledown"]:
            # -2 ensures division by two for codec
            flt.append("scale='min(1280,iw)':-2'")
            self.info["msg_prepare"] += "Scaling output stream to 720p if width >1280\n"
        if len(flt) > 0:
            # append video filters
            cmd.append('-filter:v "%s"' % ",".join(flt))

        for tmp in audiomap:
            self.info["msg_prepare"] += "Audio Stream selected: Stream #%s\n" % tmp
            cmd.append("-map %s" % tmp)
        if len(submap) > 0:
            cmd.append("-c:s dvdsub")
        cmd.extend(self.config["video"])
        cmd.extend(self.config["audio"])
        cmd.append(ffmpeg_filename(fn["out"]))

        return [" ".join(cmd)]

    def load(self, filename):
        """
        First step: setup, analyze & prepare for conversion
        """
        self.__reset()

        self.filename = filename
        self.outfilebase = os.path.splitext(filename)[0]
        self.__get_movie_description()
        self.command = self.get_ffmpeg_command()

    def convert(self):
        """
        Second step: write info text file and start ffmpeg conversion
        requires successful load as first step
        returns ffmpeg conversion exit status
        """
        if not self.command:
            return None
        if not self.info["msg_eit"]:
            self.info["msg_eit"] = "No EIT file found, sorry - no description"
        if not self.config["dryrun"]:
            fd = open(self.outfilebase + ".txt", "wb")
            fd.write(self.info["msg_eit"])
            fd.write("\n\n# ---DEBUG---\n\n")
            fd.write(self.info["msg_prepare"])
            fd.write(self.info["msg_ffmpeg"])
            fd.close()
        #print self.info["msg_ffmpeg"]

        for cmd in self.command:
            print "Executing ffmpeg:\n%s\n" % cmd
            #return run_command(cmd, self.total_frames)
            if not self.config["dryrun"]:
                return run_ffmpeg_watch(cmd, frames_total=self.info["frames_total"])
            else:
                return 0


if __name__ == "__main__":
    # parse command line options
    import argparse, glob

    parser = argparse.ArgumentParser(description='DVB-TS to MKV kung-fu')
    parser.add_argument('--crf', type=int, default=19, \
        help='h264 crf (default 19)')
    parser.add_argument('--tune', default='film', \
        help='ffmpeg tune preset [film, animation] (default is film)')
    parser.add_argument('--ns', action='store_true', default=False, \
        help='no rescaling (default is scale to 720p)')
    parser.add_argument('-f', action='store_true', default=False, \
        help='force overwrite of existing file')
    parser.add_argument('--fa', action='store_true', default=False, \
        help='force use first audio stream')
    parser.add_argument('--rename', action='store_true', default=False, \
        help='rename file basename to name and genre from EIT file if present')
    parser.add_argument('--moveto', default='', \
        help='specify base directory to move processed files to')
    parser.add_argument('--dryrun', action='store_true', default=False, \
        help='Dry-run, dont touch anything')
    parser.add_argument('input', metavar='input', nargs='+', \
        help='one or more files, glob style syntax')

    args = parser.parse_args()
    processor = ts2mkv(crf=args.crf, tune=args.tune)
    processor.config["scaledown"] = not args.ns
    processor.config["rename"] = args.rename
    processor.config["overwrite"] = args.f
    processor.config["firstaudio"] = args.fa
    processor.config["dryrun"] = args.dryrun

    src = []
    for srcstr in args.input:
        src.extend(glob.glob(srcstr))
    idx = 1
    for srcfile in src:
        print "\nProcessing file %i/%i: %s" % (idx, len(src), srcfile)
        processor.load(srcfile)
        exitcode = processor.convert()
        if exitcode == 0:
            print "Successful conversion."
            if args.moveto:
                mvlist = glob.glob(os.path.splitext(srcfile)[0] + ".*")
                mvtarget = os.path.dirname(srcfile).replace('../', '')
                mvtarget = os.path.join(
                    args.moveto, mvtarget)
                mvsource = os.path.dirname(srcfile)
                print "Moving processed files from %s to %s" % (
                    mvsource, mvtarget)
                if not args.dryrun:
                    os.makedirs(mvtarget)
                for mvsrc in mvlist:
                    mvfn = os.path.basename(mvsrc)
                    if os.path.splitext(mvfn)[1] in ['.txt', '.mkv', '.nfo']:
                        continue
                    print mvfn
                    if not args.dryrun:
                        os.rename(
                            os.path.join(mvsource, mvfn),
                            os.path.join(mvtarget, mvfn))
        else:
            print "ERROR while executing ffmpeg!"
        idx += 1