Tue, 28 Nov 2017 19:02:01 +0100
some changes, also implemented ffmpeg progress info and added force overwrite mode
#!/usr/bin/env python """ DVB-TS to MKV kung-fu 2017 by mdd Toolkit / executable to automagically convert DVB recordings to h264 mkv. Automatic audio stream selection (deu/eng) Automatic crop detection to remove cinematic bars """ import subprocess import pexpect from eit import readeit, eitinfo import os, shlex, sys, time def filter_lines(data, search): """ input: data = \n separated string output: tuple containing all lines where search is found """ ret = [] for line in data.split("\n"): if line.find(search) == -1: continue ret.append(line) return "\n".join(ret) def run_command(command): """ run command as blocking subprocess, returns exit code if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output """ process = subprocess.Popen(shlex.split(command), \ stdout=subprocess.PIPE) while True: output = process.stdout.readline() if output == '' and process.poll() is not None: break if output: print output.strip() rc = process.poll() return rc def run_ffmpeg_watch(command, frames_total = 0): """ run command as blocking subprocess, returns exit code if total_frames > 0 parse ffmpeg status line and insert ETA at line start before output """ thread = pexpect.spawn(command) cpl = thread.compile_pattern_list([ pexpect.EOF, "frame= *(\d+)", '(.+)' ]) percent = 0 eta = 0 time_start = time.time() - 0.1 # start in the past while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF print "the sub process exited" break elif i == 1: try: frame_number = int(thread.match.group(1)) if frames_total > 0: percent = frame_number * 100.00 / frames_total eta = frame_number / (time.time() - time_start) # eta is frames per second so far eta = (frames_total - frame_number) / eta / 60 sys.stdout.write("\rFrame %i of %i, %.1f%% done, ETA %.0f minutes, " % ( frame_number, frames_total, percent, eta )) except: sys.stdout.write(thread.match.group(0)) sys.stdout.flush() thread.close elif i == 2: unknown_line = thread.match.group(0) sys.stdout.write(unknown_line) sys.stdout.flush() pass def ffmpeg_filename(filename): """ Escape filename path contents for ffmpeg shell command """ #fn = "\\'".join(p for p in filename.split("'")) fn = filename.replace("'", "\\'") fn = fn.replace(" ", "\\ ") return fn class ts2mkv(object): """ Main worker class, contains all the magic & ffmpeg voodoo """ def __init__(self, crf=19, tune='film', scaleto_720p=True, rename=False): self.msg_prepare = "" self.msg_eit = "" self.msg_ffmpeg = "" self.command = None self.filename = None self.outfilebase = None self.fps = 0 self.frames_total = 0 self.overwrite = False self.scaleto_720p = scaleto_720p self.rename = rename self.video_options = [ "-c:v libx264", "-preset faster", # slow "-tune %s" % tune, # film / animation "-crf %i" % crf, # 21, better 19 ] self.audio_options = [ "-c:a copy", ] def get_stream_index(self, data): """ input: ffmpeg stream info string output: ffmpeg stream mapping part """ idx = data.find("Stream #") if idx == -1: return "" idx += 8 self.msg_prepare += "Selecting: %s\n" % data return data[idx:idx+3] def get_movie_description(self): """ looks for eit file with same basename of current filename parse the eit file for txt infofile and optional build new output filename base with movie name and genre output: nothing, manipulates internal variables """ if not self.filename: return # read the EIT file filename = os.path.splitext(self.filename)[0] + ".eit" self.msg_eit = readeit(filename) if not self.rename or not self.msg_eit: return info = eitinfo(filename) name = info.eit.get("name") if name == "": # cancel rename, no movie title found! return genre = info.eit.get("genre") if genre != "": name = "%s (%s)" % (name, genre) # build new filename name = name.replace(' : ', ' - ') name = name.replace(': ', ' - ') name = name.replace(':', '-') name = name.replace('/', '') name = name.replace('\\', '') name = name.replace('?', '') name = name.replace('*', '') name = name.replace('\"', '\'') self.outfilebase = os.path.join( os.path.dirname(filename), name ) def get_crop_option(self): """ parse the ffmpeg analyze output cropdetect lines returns None or valid crop string for ffmpeg video filter """ lines = filter_lines(self.msg_ffmpeg, "[Parsed_cropdetect").split("\n") option = None failcount = 0 for line in lines: tmp = line[line.find(" crop="):].strip() #print "DEBUG: " + tmp if not option: option = tmp else: if option != tmp: failcount += 1 if failcount > 12: print "!!! Crop detect is inconsistent" self.msg_prepare += "WARNING: cropdetect >50% inconsistent over scan time, disabling autocrop\n" return None self.msg_prepare += "Crop detected: %s\n" % option return option def __get_audiomap(self, info): """ Select the wanted german and english audio streams from ffmpeg info output: mapping list """ audiomap = [] audioall = filter_lines(info, "Audio:") audio = filter_lines(audioall, "(deu):") aidx = self.get_stream_index( filter_lines(audio, "ac3")) if aidx == "": print audioall print "No AC3 german audio stream found" # try to find the first german audio stream aidx = self.get_stream_index(audio.split("\n")[0]) if aidx == "": print "No other german audio streams, trying english..." else: print "Selecting first german stream." audiomap.append(aidx) else: audiomap.append(aidx) audio = filter_lines(audioall, "(eng):") aidx = self.get_stream_index( filter_lines(audio, "ac3")) if aidx != "" and filter_lines(audio, "ac3").find(" 0 channels ") < 1: # append english audio too! print "Selecting english ac3 stream." audiomap.append(aidx) return audiomap def get_ffmpeg_command(self): """ Too complex to describe, this does all the magic output: produces internal ffmpeg command list (empty command list on error) """ if not self.filename: return None commands = [] fn = ffmpeg_filename(self.filename) outfn = self.outfilebase + ".mkv" # double-check: pull the kill switch and exit if outfile exists already! # we do not want to overwrite files in accident (caused by automatic file naming) if not self.overwrite and len(glob.glob(outfn)) > 0: print "Output file exists: %s" % outfn print "NOT overwriting it!" return None outfn = ffmpeg_filename(outfn) cmd = [ "ffmpeg", "-hide_banner", "-ss 00:05:00", "-t 2", # search to 5 minutes, analyze 2 seconds "-i %s" % fn, "-vf \"cropdetect=24:2:0\"", # detect black bar crop on top and bottom "-f null", "-" # no output file ] p = subprocess.Popen(shlex.split(" ".join(cmd)), \ stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() self.msg_ffmpeg = out + "\n" + err self.msg_ffmpeg = self.msg_ffmpeg[self.msg_ffmpeg.find("Input #0"):] # find "Stream #0:" lines info = filter_lines(self.msg_ffmpeg, "Stream #0:") v = self.get_stream_index( filter_lines(info, "Video:")) if v == "": print "No video stream found" return None # get total duration and fps from input stream # Input #0, mpegts, from '/srv/storage0/DREAMBOX/Action/Transporter/20101201 0630 - Sky Action HD - Transporter 3.ts': # Duration: 01:39:59.88, start: 93674.825111, bitrate: 9365 kb/s # Stream #0:1[0x1ff]: Video: h264 (High) ([27][0][0][0] / 0x001B), yuv420p(tv, bt709), 1920x1080 [SAR 1:1 DAR 16:9], 25 fps, 50 tbr, 90k tbn, 50 tbc self.frames_total = filter_lines(self.msg_ffmpeg, "Duration:").strip()[10:] self.frames_total = self.frames_total[0:self.frames_total.find(",")].strip() print "Input duration: %s" % self.frames_total try: self.frames_total = int(self.frames_total[0:2]) * 3600 + \ int(self.frames_total[3:5]) * 60 + int(self.frames_total[6:8]) except ValueError: self.frames_total = 0 tmp = filter_lines(info, "Video:").split(",") for fps in tmp: if fps.strip().endswith('fps'): try: self.fps = float(fps.strip().split(' ')[0]) except ValueError: self.fps = 0 break self.frames_total = round(self.frames_total * self.fps, 0) print "Input framerate: %f fps" % self.fps print "Total frames of input file: %i" % (self.frames_total) # copy ALL subtitle streams if present! # Stream #0:0[0x20](deu): Subtitle: dvb_teletext ([6][0][0][0] / 0x0006), 492x250 submap = [] for tmp in filter_lines(info, "Subtitle: dvb_teletext").split("\n"): if self.get_stream_index(tmp): submap.append(self.get_stream_index(tmp)) # Subtitles disabled, that doesnt work as expected, dreambox crashes on copied subtitle stream submap = [] # select audio streams audiomap = self.__get_audiomap(info) if len(audiomap) == 0: print "No suitable audio stream found, aborting." return None # Old dreambox images did a file split: .ts .ts.001 .ts.002 etc. # Find all these files and join them! inputs = [fn] if os.path.splitext(fn)[1].lower() == '.ts': for fpart in glob.glob(self.filename + '.' + ('[0-9]' * 3)): fn = "\\'".join(p for p in fpart.split("'")) fn = fn.replace(" ", "\\ ") inputs.append(fn) if len(inputs) > 1: # use ffmpeg input concat function # attention, ffmpeg doesnt like escape sequences fn = "\"concat:" + \ "|".join(inputs)\ .replace('\ ', ' ')\ .replace("\'", "'")\ + "\"" # no ETA calculation possible since we have only the length of first file # TODO: we COULD estimate by multiplying with factor generated by input file sizes print "NO ETA POSSIBLE" self.frames_total = 0 idx = 0 for tmp in inputs: self.msg_prepare += "Input file #%i: %s\n" % ( idx, os.path.basename(tmp)) idx += 1 cmd = [ "ffmpeg", "-hide_banner", "-i %s" % fn, ] if self.overwrite: cmd.append("-y") for tmp in submap: self.msg_prepare += "Subtitle Stream selected: Stream #%s\n" % tmp cmd.append("-map %s" % tmp) cmd.append("-map %s" % v) self.msg_prepare += "Video Stream selected: Stream #%s\n" % v flt = [] crop = self.get_crop_option() if crop: flt.append(crop) if self.scaleto_720p: # -2 ensures division by two for codec flt.append("scale='min(1280,iw)':-2'") self.msg_prepare += "Scaling output stream to 720p if width >1280\n" if len(flt) > 0: # append video filters cmd.append('-filter:v "%s"' % ",".join(flt)) for tmp in audiomap: self.msg_prepare += "Audio Stream selected: Stream #%s\n" % tmp cmd.append("-map %s" % tmp) if len(submap) > 0: cmd.append("-c:s dvdsub") cmd.extend(self.video_options) cmd.extend(self.audio_options) cmd.append(outfn) commands.append(" ".join(cmd)) return commands def load(self, filename): """ First step: setup, analyze & prepare for conversion """ self.msg_prepare = "" self.msg_eit = "" self.msg_ffmpeg = "" self.fps = 0 self.frames_total = 0 self.filename = filename self.outfilebase = os.path.splitext(filename)[0] self.get_movie_description() self.command = self.get_ffmpeg_command() def convert(self): """ Second step: write info text file and start ffmpeg conversion requires successful load as first step returns ffmpeg conversion exit status """ if not self.command: return None fd = open(self.outfilebase + ".txt", "wb") fd.write(self.msg_eit) fd.write("\n\n# ---DEBUG---\n\n") fd.write(self.msg_prepare) fd.write(self.msg_ffmpeg) fd.close() #print self.msg_ffmpeg for cmd in self.command: print "Executing ffmpeg:\n%s\n" % cmd #return run_command(cmd, self.total_frames) return run_ffmpeg_watch(cmd, frames_total=self.frames_total) if __name__ == "__main__": # parse command line options import argparse, glob parser = argparse.ArgumentParser(description='DVB-TS to MKV kung-fu') parser.add_argument('--crf', type=int, default=19, \ help='h264 crf (default 19)') parser.add_argument('--tune', default='film', \ help='ffmpeg tune preset [film, animation] (default is film)') parser.add_argument('--ns', action='store_true', default=False, \ help='no rescaling (default is scale to 720p)') parser.add_argument('--rename', action='store_true', default=False, \ help='rename file basename to name and genre from EIT file if present') parser.add_argument('input', metavar='input', nargs='+', \ help='one or more files, glob style syntax') parser.add_argument('-f', action='store_true', default=False, \ help='force overwrite of existing file') args = parser.parse_args() processor = ts2mkv(crf=args.crf, tune=args.tune, scaleto_720p=(not args.ns), \ rename=args.rename) processor.overwrite = args.f for srcstr in args.input: src = glob.glob(srcstr) for srcfile in src: print "Processing: %s" % srcfile processor.load(srcfile) processor.convert()