Sat, 25 Nov 2017 16:51:08 +0100
added support for dvb_teletext subtitles
7 | 1 | #!/usr/bin/env python |
10 | 2 | """ |
3 | DVB-TS to MKV kung-fu | |
4 | 2017 by mdd | |
5 | ||
6 | Toolkit / executable to automagically convert DVB recordings to h264 mkv. | |
7 | Automatic audio stream selection (deu/eng) | |
8 | Automatic crop detection to remove cinematic bars | |
9 | """ | |
7 | 10 | |
11 | import subprocess | |
9 | 12 | from eit import readeit, eitinfo |
7 | 13 | import os, shlex |
14 | ||
15 | def filter_lines(data, search): | |
10 | 16 | """ |
17 | input: data = \n separated string | |
18 | output: tuple containing all lines where search is found | |
19 | """ | |
7 | 20 | ret = [] |
21 | for line in data.split("\n"): | |
22 | if line.find(search) == -1: | |
23 | continue | |
24 | ret.append(line) | |
25 | return "\n".join(ret) | |
26 | ||
27 | def run_command(command): | |
10 | 28 | """ |
29 | run command as blocking subprocess, returns exit code | |
30 | """ | |
31 | process = subprocess.Popen(shlex.split(command), \ | |
32 | stdout=subprocess.PIPE) | |
7 | 33 | while True: |
34 | output = process.stdout.readline() | |
35 | if output == '' and process.poll() is not None: | |
36 | break | |
37 | if output: | |
38 | print output.strip() | |
39 | rc = process.poll() | |
40 | return rc | |
41 | ||
14 | 42 | def ffmpeg_filename(filename): |
43 | """ | |
44 | Escape filename path contents for ffmpeg shell command | |
45 | """ | |
46 | fn = "\\'".join(p for p in filename.split("'")) | |
47 | fn = fn.replace(" ", "\\ ") | |
48 | return fn | |
49 | ||
7 | 50 | class ts2mkv(object): |
10 | 51 | """ |
52 | Main worker class, contains all the magic & ffmpeg voodoo | |
53 | """ | |
9 | 54 | def __init__(self, crf=19, tune='film', scaleto_720p=True, rename=False): |
7 | 55 | self.msg_prepare = "" |
56 | self.msg_eit = "" | |
57 | self.msg_ffmpeg = "" | |
58 | self.command = None | |
13
cf5c5cec1b2b
bugfix: cleanup status messages when processing multiple files
mdd
parents:
12
diff
changeset
|
59 | self.filename = None |
cf5c5cec1b2b
bugfix: cleanup status messages when processing multiple files
mdd
parents:
12
diff
changeset
|
60 | self.outfilebase = None |
7 | 61 | |
9 | 62 | self.scaleto_720p = scaleto_720p |
63 | self.rename = rename | |
8 | 64 | |
7 | 65 | self.video_options = [ |
66 | "-c:v libx264", | |
8 | 67 | "-preset faster", # slow |
9 | 68 | "-tune %s" % tune, # film / animation |
10 | 69 | "-crf %i" % crf, # 21, better 19 |
70 | ] | |
7 | 71 | self.audio_options = [ |
72 | "-c:a copy", | |
10 | 73 | ] |
74 | ||
7 | 75 | |
76 | def get_stream_index(self, data): | |
10 | 77 | """ |
78 | input: ffmpeg stream info string | |
79 | output: ffmpeg stream mapping part | |
80 | """ | |
7 | 81 | idx = data.find("Stream #") |
82 | if idx == -1: | |
83 | return "" | |
84 | idx += 8 | |
85 | self.msg_prepare += "Selecting: %s\n" % data | |
86 | return data[idx:idx+3] | |
87 | ||
9 | 88 | def get_movie_description(self): |
10 | 89 | """ |
90 | looks for eit file with same basename of current filename | |
91 | parse the eit file for txt infofile and optional build new | |
92 | output filename base with movie name and genre | |
93 | ||
94 | output: nothing, manipulates internal variables | |
95 | """ | |
9 | 96 | if not self.filename: |
97 | return | |
7 | 98 | # read the EIT file |
9 | 99 | filename = os.path.splitext(self.filename)[0] + ".eit" |
100 | self.msg_eit = readeit(filename) | |
101 | if not self.rename or not self.msg_eit: | |
102 | return | |
103 | info = eitinfo(filename) | |
104 | name = info.eit.get("name") | |
105 | if name == "": | |
106 | # cancel rename, no movie title found! | |
107 | return | |
108 | genre = info.eit.get("genre") | |
109 | if genre != "": | |
110 | name = "%s (%s)" % (name, genre) | |
111 | # build new filename | |
112 | name = name.replace(' : ', ' - ') | |
113 | name = name.replace(': ', ' - ') | |
114 | name = name.replace(':', '-') | |
115 | name = name.replace('/', '') | |
116 | name = name.replace('\\', '') | |
117 | name = name.replace('?', '') | |
118 | name = name.replace('*', '') | |
119 | name = name.replace('\"', '\'') | |
120 | ||
121 | self.outfilebase = os.path.join( | |
122 | os.path.dirname(filename), | |
123 | name | |
124 | ) | |
125 | ||
7 | 126 | |
8 | 127 | def get_crop_option(self): |
10 | 128 | """ |
129 | parse the ffmpeg analyze output cropdetect lines | |
130 | returns None or valid crop string for ffmpeg video filter | |
131 | """ | |
8 | 132 | lines = filter_lines(self.msg_ffmpeg, "[Parsed_cropdetect").split("\n") |
133 | option = None | |
134 | for line in lines: | |
135 | tmp = line[line.find(" crop="):].strip() | |
136 | #print "DEBUG: " + tmp | |
137 | if not option: | |
138 | option = tmp | |
139 | else: | |
140 | if option != tmp: | |
141 | self.msg_prepare += "WARNING: cropdetect inconsistent over scan time, disabling autocrop\n" | |
142 | return None | |
143 | self.msg_prepare += "Crop detected: %s\n" % option | |
144 | return option | |
145 | ||
14 | 146 | def __get_audiomap(self, info): |
147 | """ | |
148 | Select the wanted german and english audio streams from ffmpeg info | |
149 | output: mapping list | |
150 | """ | |
151 | audiomap = [] | |
152 | audioall = filter_lines(info, "Audio:") | |
153 | audio = filter_lines(audioall, "(deu):") | |
154 | aidx = self.get_stream_index( | |
155 | filter_lines(audio, "ac3")) | |
156 | if aidx == "": | |
157 | print audioall | |
158 | print "No AC3 german audio stream found" | |
159 | # try to find the first german audio stream | |
160 | aidx = self.get_stream_index(audio.split("\n")[0]) | |
161 | if aidx == "": | |
162 | print "No other german audio streams, trying english..." | |
163 | else: | |
164 | print "Selecting first german stream." | |
165 | audiomap.append(aidx) | |
166 | else: | |
167 | audiomap.append(aidx) | |
168 | ||
169 | audio = filter_lines(audioall, "(eng):") | |
170 | aidx = self.get_stream_index( | |
171 | filter_lines(audio, "ac3")) | |
172 | if aidx != "": | |
173 | # append english audio too! | |
174 | print "Selecting english ac3 stream." | |
175 | audiomap.append(aidx) | |
176 | return audiomap | |
177 | ||
9 | 178 | def get_ffmpeg_command(self): |
10 | 179 | """ |
180 | Too complex to describe, this does all the magic | |
181 | output: produces internal ffmpeg command list (empty command list on error) | |
182 | """ | |
9 | 183 | if not self.filename: |
184 | return None | |
185 | ||
11 | 186 | |
7 | 187 | commands = [] |
14 | 188 | fn = ffmpeg_filename(self.filename) |
9 | 189 | outfn = self.outfilebase + ".mkv" |
11 | 190 | # double-check: pull the kill switch and exit if outfile exists already! |
191 | # we do not want to overwrite files in accident (caused by automatic file naming) | |
192 | if len(glob.glob(outfn)) > 0: | |
193 | print "Output file exists: %s" % outfn | |
194 | print "NOT overwriting it!" | |
195 | return None | |
14 | 196 | outfn = ffmpeg_filename(outfn) |
7 | 197 | |
14 | 198 | cmd = [ |
199 | "ffmpeg", "-hide_banner", | |
200 | "-ss 00:05:00", "-t 1", # search to 5 minutes, analyze 1 second | |
201 | "-i %s" % fn, | |
202 | "-vf \"cropdetect=24:2:0\"", # detect black bar crop on top and bottom | |
203 | "-f null", "-" # no output file | |
204 | ] | |
10 | 205 | p = subprocess.Popen(shlex.split(" ".join(cmd)), \ |
206 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
7 | 207 | out, err = p.communicate() |
208 | self.msg_ffmpeg = out + "\n" + err | |
209 | self.msg_ffmpeg = self.msg_ffmpeg[self.msg_ffmpeg.find("Input #0"):] | |
8 | 210 | |
7 | 211 | # find "Stream #0:" lines |
212 | info = filter_lines(self.msg_ffmpeg, "Stream #0:") | |
213 | ||
214 | v = self.get_stream_index( | |
215 | filter_lines(info, "Video:")) | |
216 | if v == "": | |
217 | print "No video stream found" | |
218 | return None | |
219 | ||
220 | ||
14 | 221 | # TODO: copy ALL subtitle streams if present! |
222 | # Stream #0:0[0x20](deu): Subtitle: dvb_teletext ([6][0][0][0] / 0x0006), 492x250 | |
223 | submap = [] | |
224 | for tmp in filter_lines(info, "Subtitle: dvb_teletext").split("\n"): | |
225 | if self.get_stream_index(tmp): | |
226 | submap.append(self.get_stream_index(tmp)) | |
7 | 227 | |
14 | 228 | # select audio streams |
229 | audiomap = self.__get_audiomap(info) | |
9 | 230 | if len(audiomap) == 0: |
231 | print "No suitable audio stream found, aborting." | |
232 | return None | |
7 | 233 | |
14 | 234 | # Old dreambox images did a file split: .ts .ts.001 .ts.002 etc. |
11 | 235 | # Find all these files and join them! |
12 | 236 | inputs = [fn] |
237 | if os.path.splitext(fn)[1].lower() == '.ts': | |
238 | for fpart in glob.glob(self.filename + '.' + ('[0-9]' * 3)): | |
239 | fn = "\\'".join(p for p in fpart.split("'")) | |
240 | fn = fn.replace(" ", "\\ ") | |
241 | inputs.append(fn) | |
242 | ||
243 | if len(inputs) > 1: | |
244 | # use ffmpeg input concat function | |
245 | # attention, ffmpeg doesnt like escape sequences | |
246 | fn = "\"concat:" + \ | |
247 | "|".join(inputs)\ | |
248 | .replace('\ ', ' ')\ | |
249 | .replace("\'", "'")\ | |
250 | + "\"" | |
251 | ||
252 | idx = 0 | |
253 | for tmp in inputs: | |
254 | self.msg_prepare += "Input file #%i: %s\n" % ( | |
255 | idx, os.path.basename(tmp)) | |
256 | idx += 1 | |
11 | 257 | |
7 | 258 | cmd = [ |
14 | 259 | "ffmpeg", "-hide_banner", |
7 | 260 | "-i %s" % fn, |
261 | ] | |
14 | 262 | |
263 | for tmp in submap: | |
264 | self.msg_prepare += "Subtitle Stream selected: Stream #%s\n" % tmp | |
265 | cmd.append("-map %s" % tmp) | |
266 | ||
267 | cmd.append("-map %s" % v) | |
268 | self.msg_prepare += "Video Stream selected: Stream #%s\n" % v | |
269 | ||
8 | 270 | flt = [] |
271 | crop = self.get_crop_option() | |
272 | if crop: | |
273 | flt.append(crop) | |
274 | if self.scaleto_720p: | |
10 | 275 | # -2 ensures division by two for codec |
276 | flt.append("scale='min(1280,iw)':-2'") | |
14 | 277 | self.msg_prepare += "Scaling output stream to 720p if width >1280\n" |
8 | 278 | if len(flt) > 0: |
279 | # append video filters | |
280 | cmd.append('-filter:v "%s"' % ",".join(flt)) | |
14 | 281 | for tmp in audiomap: |
282 | self.msg_prepare += "Audio Stream selected: Stream #%s\n" % tmp | |
283 | cmd.append("-map %s" % tmp) | |
284 | if len(submap) > 0: | |
285 | cmd.append("-c:s dvdsub") | |
7 | 286 | cmd.extend(self.video_options) |
287 | cmd.extend(self.audio_options) | |
9 | 288 | cmd.append(outfn) |
7 | 289 | |
290 | commands.append(" ".join(cmd)) | |
291 | return commands | |
292 | ||
293 | def load(self, filename): | |
10 | 294 | """ |
295 | First step: setup, analyze & prepare for conversion | |
296 | """ | |
13
cf5c5cec1b2b
bugfix: cleanup status messages when processing multiple files
mdd
parents:
12
diff
changeset
|
297 | self.msg_prepare = "" |
cf5c5cec1b2b
bugfix: cleanup status messages when processing multiple files
mdd
parents:
12
diff
changeset
|
298 | self.msg_eit = "" |
cf5c5cec1b2b
bugfix: cleanup status messages when processing multiple files
mdd
parents:
12
diff
changeset
|
299 | self.msg_ffmpeg = "" |
cf5c5cec1b2b
bugfix: cleanup status messages when processing multiple files
mdd
parents:
12
diff
changeset
|
300 | |
7 | 301 | self.filename = filename |
9 | 302 | self.outfilebase = os.path.splitext(filename)[0] |
303 | self.get_movie_description() | |
304 | self.command = self.get_ffmpeg_command() | |
7 | 305 | |
9 | 306 | def convert(self): |
10 | 307 | """ |
308 | Second step: write info text file and start ffmpeg conversion | |
309 | requires successful load as first step | |
310 | returns ffmpeg conversion exit status | |
311 | """ | |
9 | 312 | if not self.command: |
313 | return None | |
314 | fd = open(self.outfilebase + ".txt", "wb") | |
315 | fd.write(self.msg_eit) | |
7 | 316 | fd.write("\n\n# ---DEBUG---\n\n") |
9 | 317 | fd.write(self.msg_prepare) |
318 | fd.write(self.msg_ffmpeg) | |
7 | 319 | fd.close() |
9 | 320 | #print self.msg_ffmpeg |
7 | 321 | |
10 | 322 | for cmd in self.command: |
8 | 323 | print "Executing ffmpeg:\n%s\n" % cmd |
9 | 324 | return run_command(cmd) |
325 | ||
326 | ||
327 | ||
328 | if __name__ == "__main__": | |
329 | # parse command line options | |
10 | 330 | import argparse, glob |
9 | 331 | |
10 | 332 | parser = argparse.ArgumentParser(description='DVB-TS to MKV kung-fu') |
333 | parser.add_argument('--crf', type=int, default=19, \ | |
9 | 334 | help='h264 crf (default 19)') |
10 | 335 | parser.add_argument('--tune', default='film', \ |
9 | 336 | help='ffmpeg tune preset [film, animation] (default is film)') |
10 | 337 | parser.add_argument('--ns', action='store_true', default=False, \ |
9 | 338 | help='no rescaling (default is scale to 720p)') |
10 | 339 | parser.add_argument('--rename', action='store_true', default=False, \ |
9 | 340 | help='rename file basename to name and genre from EIT file if present') |
10 | 341 | parser.add_argument('input', metavar='input', nargs='+', \ |
9 | 342 | help='one or more files, glob style syntax') |
343 | ||
344 | args = parser.parse_args() | |
10 | 345 | processor = ts2mkv(crf=args.crf, tune=args.tune, scaleto_720p=(not args.ns), \ |
346 | rename=args.rename) | |
9 | 347 | |
348 | for srcstr in args.input: | |
349 | src = glob.glob(srcstr) | |
10 | 350 | for srcfile in src: |
351 | print "Processing: %s" % srcfile | |
352 | processor.load(srcfile) | |
353 | processor.convert() | |
9 | 354 | |
355 |