Thu, 30 Nov 2017 00:20:52 +0100
div0 bug if we have frame errors at the video stream beginning
#!/usr/bin/env python """ DVB-TS to MKV kung-fu 2017 by mdd Toolkit / executable to automagically convert DVB recordings to h264 mkv. Automatic audio stream selection deu: ac3, otherwise fallback to first german stream eng: ac3, no fallback Automatic crop detection to remove cinematic bars percentage + ETA for ffmpeg conversion subprocess """ #pylint: disable=line-too-long #pylint: disable=invalid-name import subprocess import pexpect from eit import eitinfo import os, shlex, sys, time def filter_lines(data, search): """ input: data = \n separated string output: all lines where search is found """ ret = [] for line in data.split("\n"): if line.find(search) == -1: continue ret.append(line) return "\n".join(ret) def run_command(command): """ run command as blocking subprocess, returns exit code if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output """ process = subprocess.Popen(shlex.split(command), \ stdout=subprocess.PIPE) while True: output = process.stdout.readline() if output == '' and process.poll() is not None: break if output: print output.strip() rc = process.poll() return rc def run_ffmpeg_watch(command, frames_total=0): """ run command as blocking subprocess, returns exit code if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output """ #pylint: disable=maybe-no-member thread = pexpect.spawn(command) cpl = thread.compile_pattern_list([ pexpect.EOF, "frame= *(\d+)", "(.+)\n", '(.+)' ]) percent = 0 eta = 0 time_start = time.time() - 0.1 # start in the past while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF print "\nffmpeg subprocess finished!" break elif i == 1: try: frame_number = int(thread.match.group(1)) if frames_total > 0: percent = frame_number * 100.00 / frames_total eta = frame_number / (time.time() - time_start) # eta is frames per second so far if eta == 0: eta = 1 eta = (frames_total - frame_number) / eta / 60 sys.stdout.write("\rFrame %i of %i, %.1f%% done, ETA %.0f minutes, " % ( frame_number, frames_total, percent, eta )) except ValueError: sys.stdout.write(thread.match.group(0)) sys.stdout.flush() thread.close #elif i == 2: # normal newline line, just ignore them... # pass elif i == 3: unknown_line = thread.match.group(0) sys.stdout.write(unknown_line) sys.stdout.flush() def ffmpeg_filename(filename): """ Escape filename path contents for ffmpeg shell command """ fn = filename.replace("'", r"\'") fn = fn.replace(" ", r"\ ") return fn class ts2mkv(object): """ Main worker class, contains all the magic & ffmpeg voodoo """ def __init__(self, crf=19, tune='film'): self.command = None self.filename = None self.outfilebase = None self.info = {} self.__reset() self.config = { "overwrite": False, "scaledown": True, "rename": True, "video": [ "-c:v libx264", "-preset faster", # slow "-tune %s" % tune, # film / animation "-crf %i" % crf, # 21, better 19 ], "audio": [ "-c:a copy", ] } def __reset(self): """ Reset internal stuff before loading new task """ self.info = { "msg_prepare": "", "msg_eit": "", "msg_ffmpeg": "", "fps": 0, "frames_total": 0 } self.command = None self.filename = None self.outfilebase = None def get_stream_index(self, data): """ input: ffmpeg stream info string output: ffmpeg stream mapping part """ idx = data.find("Stream #") if idx == -1: return "" idx += 8 self.info["msg_prepare"] += "GetStreamIndex: %s\n" % data.strip() return data[idx:idx+3] def __get_movie_description(self): """ looks for eit file with same basename of current filename parse the eit file for txt infofile and optional build new output filename base with movie name and genre output: nothing, manipulates internal variables """ if not self.filename: return # read the EIT file filename = os.path.splitext(self.filename)[0] + ".eit" info = eitinfo(filename) self.info["msg_eit"] = info.dump() if not self.config["rename"] or not self.info["msg_eit"]: return name = info.eit.get("name") if name == "": # cancel rename, no movie title found! return genre = info.eit.get("genre") if genre != "": name = "%s (%s)" % (name, genre) # build new filename name = name.replace(' : ', ' - ') name = name.replace(': ', ' - ') name = name.replace(':', '-') name = name.replace('/', '') name = name.replace('\\', '') name = name.replace('?', '') name = name.replace('*', '') name = name.replace('\"', '\'') self.outfilebase = os.path.join( os.path.dirname(filename), name ) def get_crop_option(self): """ parse the ffmpeg analyze output cropdetect lines returns None or valid crop string for ffmpeg video filter """ lines = filter_lines(self.info["msg_ffmpeg"], "[Parsed_cropdetect").split("\n") option = None failcount = 0 for line in lines: tmp = line[line.find(" crop="):].strip() #print "DEBUG: " + tmp if not option: option = tmp else: if option != tmp: failcount += 1 if failcount > 12: print "!!! Crop detect is inconsistent" self.info["msg_prepare"] += "WARNING: cropdetect >50% inconsistent over scan time, disabling autocrop\n" return None self.info["msg_prepare"] += "Crop detected: %s\n" % option return option def __get_audiomap(self, info): """ Select the wanted german and english audio streams from ffmpeg info output: mapping list """ audiomap = [] audioall = filter_lines(info, "Audio:") audio = filter_lines(audioall, "(deu):") aidx = self.get_stream_index( filter_lines(audio, "ac3")) if aidx == "": print audioall print "No AC3 german audio stream found" # try to find the first german audio stream aidx = self.get_stream_index(audio.split("\n")[0]) if aidx == "": print "No other german audio streams, trying english..." else: print "Selecting first german stream." audiomap.append(aidx) else: audiomap.append(aidx) audio = filter_lines(audioall, "(eng):") aidx = self.get_stream_index( filter_lines(audio, "ac3")) if aidx != "": try: filter_lines(audio, "ac3").index(" 0 channels") print "Skipping english stream with 0 channels" except ValueError: # append english audio too! print "Selecting english ac3 stream." audiomap.append(aidx) if len(audiomap) == 0 and self.config["firstaudio"]: # append first audio stream as forced fallback aidx = self.get_stream_index(audioall) if aidx != "": print "Forcing first found audio stream: %s" % aidx audiomap.append(aidx) return audiomap def __parse_info(self): """ get total duration and fps from input stream output: sets local variables # Duration: 01:39:59.88, start: 93674.825111, bitrate: 9365 kb/s # Stream #0:1[0x1ff]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(tv, bt709), 1920x1080 [SAR 1:1 DAR 16:9], 25 fps, 50 tbr, 90k tbn, 50 tbc """ tmp = filter_lines(self.info["msg_ffmpeg"], "Duration:").strip()[10:] tmp = tmp[0:tmp.find(",")].strip() print "Input duration: %s" % tmp try: self.info["frames_total"] = int(tmp[0:2]) * 3600 + \ int(tmp[3:5]) * 60 + int(tmp[6:8]) except ValueError: self.info["frames_total"] = 0 tmp = filter_lines(self.info["msg_ffmpeg"], "Stream #0:") tmp = filter_lines(tmp, "Video:").split(",") for fps in tmp: if fps.strip().endswith('fps'): try: self.info["fps"] = float(fps.strip().split(' ')[0]) except ValueError: self.info["fps"] = 0 break self.info["frames_total"] = round(self.info["frames_total"] * self.info["fps"], 0) print "Input framerate: %f fps" % self.info["fps"] print "Total frames of input file: %i" % (self.info["frames_total"]) def __get_ffmpeg_input_info(self, filename): """ Run ffmpeg for cropdetect and general input information """ cmd = [ "ffmpeg", "-hide_banner", "-ss 00:05:00", "-t 2", # search to 5 minutes, analyze 2 seconds "-i %s" % filename, "-vf \"cropdetect=24:2:0\"", # detect black bar crop on top and bottom "-f null", "-" # no output file ] p = subprocess.Popen(shlex.split(" ".join(cmd)), \ stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() self.info["msg_ffmpeg"] = out + "\n" + err self.info["msg_ffmpeg"] = self.info["msg_ffmpeg"][self.info["msg_ffmpeg"].find("Input #0"):] def get_ffmpeg_command(self): """ Too complex to describe, this does all the magic output: produces internal ffmpeg command list (empty command list on error) """ if not self.filename: return None fn = { "in": ffmpeg_filename(self.filename), "out": self.outfilebase + ".mkv" } # double-check: pull the kill switch and exit if outfile exists already! # we do not want to overwrite files in accident (caused by automatic file naming) if not self.config["overwrite"] and len(glob.glob(fn["out"])) > 0: print "Output file exists: %s" % fn["out"] print "NOT overwriting it!" return None # load input file to get informations about self.__get_ffmpeg_input_info(fn["in"]) # find "Stream #0:" lines info = filter_lines(self.info["msg_ffmpeg"], "Stream #0:") v = self.get_stream_index( filter_lines(info, "Video:")) if v == "": print "No video stream found" return None self.__parse_info() # copy ALL subtitle streams if present! # Stream #0:0[0x20](deu): Subtitle: dvb_teletext ([6][0][0][0] / 0x0006), 492x250 submap = [] for tmp in filter_lines(info, "Subtitle: dvb_teletext").split("\n"): if self.get_stream_index(tmp): submap.append(self.get_stream_index(tmp)) # Subtitles disabled, that doesnt work as expected, dreambox crashes on copied subtitle stream submap = [] # select audio streams audiomap = self.__get_audiomap(info) if len(audiomap) == 0: print "No suitable audio stream found, aborting." return None # Old dreambox images did a file split: .ts .ts.001 .ts.002 etc. # Find all these files and join them! inputs = [fn["in"]] if os.path.splitext(self.filename)[1].lower() == '.ts': for tmp in glob.glob(self.filename + '.' + ('[0-9]' * 3)): inputs.append(ffmpeg_filename(tmp)) if len(inputs) > 1: # use ffmpeg input concat function # attention, ffmpeg concat protocol doesnt like escape sequences for tmp in range(len(inputs)): inputs[tmp] = inputs[tmp].replace(r"\ ", " ").replace(r"\'", "'")\ fn["in"] = "\"concat:" + "|".join(inputs) + "\"" # no ETA calculation possible since we have only the length of first file # we could estimate by multiplying with factor generated by input file sizes totalbytes = 0.0 for tmp in inputs: totalbytes += os.path.getsize(tmp) print "estimating total frames for ETA based on file sizes (we have multiple inputs here)" self.info["frames_total"] *= totalbytes / os.path.getsize(inputs[0]) idx = 0 for tmp in inputs: self.info["msg_prepare"] += "Input file #%i: %s\n" % ( idx, os.path.basename(tmp)) idx += 1 cmd = [ "ffmpeg", "-hide_banner", "-i %s" % fn["in"], ] if self.config["overwrite"]: cmd.append("-y") for tmp in submap: self.info["msg_prepare"] += "Subtitle Stream selected: Stream #%s\n" % tmp cmd.append("-map %s" % tmp) cmd.append("-map %s" % v) self.info["msg_prepare"] += "Video Stream selected: Stream #%s\n" % v flt = [] crop = self.get_crop_option() if crop: flt.append(crop) if self.config["scaledown"]: # -2 ensures division by two for codec flt.append("scale='min(1280,iw)':-2'") self.info["msg_prepare"] += "Scaling output stream to 720p if width >1280\n" if len(flt) > 0: # append video filters cmd.append('-filter:v "%s"' % ",".join(flt)) for tmp in audiomap: self.info["msg_prepare"] += "Audio Stream selected: Stream #%s\n" % tmp cmd.append("-map %s" % tmp) if len(submap) > 0: cmd.append("-c:s dvdsub") cmd.extend(self.config["video"]) cmd.extend(self.config["audio"]) cmd.append(ffmpeg_filename(fn["out"])) return [" ".join(cmd)] def load(self, filename): """ First step: setup, analyze & prepare for conversion """ self.__reset() self.filename = filename self.outfilebase = os.path.splitext(filename)[0] self.__get_movie_description() self.command = self.get_ffmpeg_command() def convert(self): """ Second step: write info text file and start ffmpeg conversion requires successful load as first step returns ffmpeg conversion exit status """ if not self.command: return None if not self.info["msg_eit"]: self.info["msg_eit"] = "No EIT file found, sorry - no description" fd = open(self.outfilebase + ".txt", "wb") fd.write(self.info["msg_eit"]) fd.write("\n\n# ---DEBUG---\n\n") fd.write(self.info["msg_prepare"]) fd.write(self.info["msg_ffmpeg"]) fd.close() #print self.info["msg_ffmpeg"] for cmd in self.command: print "Executing ffmpeg:\n%s\n" % cmd #return run_command(cmd, self.total_frames) return run_ffmpeg_watch(cmd, frames_total=self.info["frames_total"]) if __name__ == "__main__": # parse command line options import argparse, glob parser = argparse.ArgumentParser(description='DVB-TS to MKV kung-fu') parser.add_argument('--crf', type=int, default=19, \ help='h264 crf (default 19)') parser.add_argument('--tune', default='film', \ help='ffmpeg tune preset [film, animation] (default is film)') parser.add_argument('--ns', action='store_true', default=False, \ help='no rescaling (default is scale to 720p)') parser.add_argument('--rename', action='store_true', default=False, \ help='rename file basename to name and genre from EIT file if present') parser.add_argument('input', metavar='input', nargs='+', \ help='one or more files, glob style syntax') parser.add_argument('-f', action='store_true', default=False, \ help='force overwrite of existing file') parser.add_argument('--fa', action='store_true', default=False, \ help='use first audio stream found') args = parser.parse_args() processor = ts2mkv(crf=args.crf, tune=args.tune) processor.config["scaledown"] = not args.ns processor.config["rename"] = args.rename processor.config["overwrite"] = args.f processor.config["firstaudio"] = args.fa for srcstr in args.input: src = glob.glob(srcstr) for srcfile in src: print "Processing: %s" % srcfile processor.load(srcfile) processor.convert()