Tue, 28 Nov 2017 22:35:35 +0100
brainfuck
3 | 1 | #!/usr/bin/env python |
2 | ||
4 | 3 | from __future__ import print_function |
3 | 4 | import difflib |
5 | import os, sys | |
6 | ||
7 | BASEDIR="../DREAMBOX" | |
8 | ||
9 | FILELIST=[] | |
10 | DUPLICATES={} | |
11 | ||
4 | 12 | print("Reading files...") |
3 | 13 | for root, subdirs, files in os.walk(BASEDIR): |
14 | for filename in files: | |
15 | if filename.endswith(".ts"): | |
16 | file_path = os.path.join(root, filename) | |
17 | title = filename.split(" - ") | |
18 | if len(title) == 1: | |
19 | title = title[0] | |
20 | else: | |
21 | title = " - ".join(title[2:]) | |
22 | title = title[:-3].lower() | |
23 | FILELIST.append([title, filename, root]) | |
24 | elif filename.endswith(".mkv"): | |
25 | title = filename[:-4].lower() | |
26 | FILELIST.append([title, filename, root]) | |
15
82361ad7b3fe
some changes, also implemented ffmpeg progress info and added force overwrite mode
mdd
parents:
5
diff
changeset
|
27 | elif filename.endswith(".mp4"): |
82361ad7b3fe
some changes, also implemented ffmpeg progress info and added force overwrite mode
mdd
parents:
5
diff
changeset
|
28 | title = filename[:-4].lower() |
82361ad7b3fe
some changes, also implemented ffmpeg progress info and added force overwrite mode
mdd
parents:
5
diff
changeset
|
29 | FILELIST.append([title, filename, root]) |
4 | 30 | print("%i files found, running duplicate testing loop" % len(FILELIST)) |
3 | 31 | |
5 | 32 | listlen = len(FILELIST) |
33 | for idx in range(listlen): | |
34 | if not FILELIST[idx]: | |
35 | continue | |
36 | print("\r%d %s\033[K" % (idx, FILELIST[idx][0]), | |
4 | 37 | end='') |
38 | sys.stdout.flush() | |
5 | 39 | for idx2 in range(idx + 1, listlen): |
40 | if FILELIST[idx2] and difflib.SequenceMatcher(a = FILELIST[idx][0], b = FILELIST[idx2][0]).ratio() > 0.85: | |
3 | 41 | #print "possible duplicate %d %s" % (idx2, item2[0]) |
5 | 42 | key = os.path.join(FILELIST[idx][2], FILELIST[idx][1]) |
3 | 43 | if not key in DUPLICATES.keys(): |
44 | DUPLICATES[key] = [] | |
45 | DUPLICATES[key].append( | |
5 | 46 | os.path.join(FILELIST[idx2][2], FILELIST[idx2][1])) |
47 | # unset the found duplicate, so that this will not be scanned again | |
48 | FILELIST[idx2] = None | |
3 | 49 | |
4 | 50 | print("\n\n\n") |
51 | idx = 1 | |
3 | 52 | for base in DUPLICATES.keys(): |
4 | 53 | print("Duplicate file set #%i" % idx) |
54 | print(base) | |
3 | 55 | for dup in DUPLICATES[base]: |
4 | 56 | print(dup) |
57 | print() | |
58 | idx += 1 |